{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9998192662208565, "eval_steps": 500, "global_step": 24897, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.06317149847745895, "learning_rate": 7.499999999999999e-07, "loss": 1.0918, "step": 5 }, { "epoch": 0.0, "grad_norm": 0.06490806490182877, "learning_rate": 1.4999999999999998e-06, "loss": 1.1945, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.06881864368915558, "learning_rate": 2.2499999999999996e-06, "loss": 1.1409, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.07844685763120651, "learning_rate": 2.9999999999999997e-06, "loss": 1.2207, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.08064830303192139, "learning_rate": 3.7499999999999997e-06, "loss": 1.1945, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.07337764650583267, "learning_rate": 4.499999999999999e-06, "loss": 1.1097, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.07508231699466705, "learning_rate": 5.25e-06, "loss": 1.1413, "step": 35 }, { "epoch": 0.0, "grad_norm": 0.0660620704293251, "learning_rate": 5.999999999999999e-06, "loss": 1.0751, "step": 40 }, { "epoch": 0.01, "grad_norm": 0.0696869045495987, "learning_rate": 6.749999999999999e-06, "loss": 1.1048, "step": 45 }, { "epoch": 0.01, "grad_norm": 0.0708666518330574, "learning_rate": 7.499999999999999e-06, "loss": 1.1369, "step": 50 }, { "epoch": 0.01, "grad_norm": 0.07739271968603134, "learning_rate": 8.249999999999999e-06, "loss": 1.1332, "step": 55 }, { "epoch": 0.01, "grad_norm": 0.0700831338763237, "learning_rate": 8.999999999999999e-06, "loss": 1.2094, "step": 60 }, { "epoch": 0.01, "grad_norm": 0.07708919048309326, "learning_rate": 9.75e-06, "loss": 1.1344, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.07628925144672394, "learning_rate": 1.05e-05, "loss": 1.0817, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.08013926446437836, "learning_rate": 1.1249999999999999e-05, "loss": 1.0898, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.09165573865175247, "learning_rate": 1.1999999999999999e-05, "loss": 1.1173, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.08685644716024399, "learning_rate": 1.275e-05, "loss": 1.1268, "step": 85 }, { "epoch": 0.01, "grad_norm": 0.08818597346544266, "learning_rate": 1.3499999999999998e-05, "loss": 1.1928, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.0848746970295906, "learning_rate": 1.4249999999999999e-05, "loss": 1.1179, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.09676993638277054, "learning_rate": 1.4999999999999999e-05, "loss": 1.1478, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.09682285040616989, "learning_rate": 1.5749999999999997e-05, "loss": 1.0813, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.09817025810480118, "learning_rate": 1.6499999999999998e-05, "loss": 1.096, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.10309538245201111, "learning_rate": 1.725e-05, "loss": 1.1418, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.10393266379833221, "learning_rate": 1.7999999999999997e-05, "loss": 1.1351, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.10402549803256989, "learning_rate": 1.875e-05, "loss": 1.0959, "step": 125 }, { "epoch": 0.02, "grad_norm": 0.10404133796691895, "learning_rate": 1.95e-05, "loss": 1.1869, "step": 130 }, { "epoch": 0.02, "grad_norm": 0.12272530794143677, "learning_rate": 2.025e-05, "loss": 1.1436, "step": 135 }, { "epoch": 0.02, "grad_norm": 0.1208878755569458, "learning_rate": 2.1e-05, "loss": 1.1444, "step": 140 }, { "epoch": 0.02, "grad_norm": 0.12307845801115036, "learning_rate": 2.1749999999999997e-05, "loss": 1.0762, "step": 145 }, { "epoch": 0.02, "grad_norm": 0.11315160989761353, "learning_rate": 2.2499999999999998e-05, "loss": 1.0269, "step": 150 }, { "epoch": 0.02, "grad_norm": 0.10878825187683105, "learning_rate": 2.325e-05, "loss": 1.1535, "step": 155 }, { "epoch": 0.02, "grad_norm": 0.11939235776662827, "learning_rate": 2.3999999999999997e-05, "loss": 1.1705, "step": 160 }, { "epoch": 0.02, "grad_norm": 0.11615506559610367, "learning_rate": 2.475e-05, "loss": 1.1406, "step": 165 }, { "epoch": 0.02, "grad_norm": 0.11318700015544891, "learning_rate": 2.55e-05, "loss": 1.0874, "step": 170 }, { "epoch": 0.02, "grad_norm": 0.1265944391489029, "learning_rate": 2.6249999999999998e-05, "loss": 1.1317, "step": 175 }, { "epoch": 0.02, "grad_norm": 0.153924360871315, "learning_rate": 2.6999999999999996e-05, "loss": 1.102, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.1322435736656189, "learning_rate": 2.7749999999999997e-05, "loss": 1.0387, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.1285255402326584, "learning_rate": 2.8499999999999998e-05, "loss": 1.1082, "step": 190 }, { "epoch": 0.02, "grad_norm": 0.13724276423454285, "learning_rate": 2.925e-05, "loss": 1.082, "step": 195 }, { "epoch": 0.02, "grad_norm": 0.13925635814666748, "learning_rate": 2.9999999999999997e-05, "loss": 1.2066, "step": 200 }, { "epoch": 0.02, "grad_norm": 0.16304455697536469, "learning_rate": 3.0749999999999995e-05, "loss": 1.1404, "step": 205 }, { "epoch": 0.03, "grad_norm": 0.1349136382341385, "learning_rate": 3.149999999999999e-05, "loss": 1.0513, "step": 210 }, { "epoch": 0.03, "grad_norm": 0.150814950466156, "learning_rate": 3.225e-05, "loss": 1.0785, "step": 215 }, { "epoch": 0.03, "grad_norm": 0.13001343607902527, "learning_rate": 3.2999999999999996e-05, "loss": 1.121, "step": 220 }, { "epoch": 0.03, "grad_norm": 0.1322905719280243, "learning_rate": 3.375e-05, "loss": 1.1, "step": 225 }, { "epoch": 0.03, "grad_norm": 0.1382080763578415, "learning_rate": 3.45e-05, "loss": 1.1415, "step": 230 }, { "epoch": 0.03, "grad_norm": 0.14844882488250732, "learning_rate": 3.5249999999999996e-05, "loss": 1.1337, "step": 235 }, { "epoch": 0.03, "grad_norm": 0.13436168432235718, "learning_rate": 3.5999999999999994e-05, "loss": 1.1736, "step": 240 }, { "epoch": 0.03, "grad_norm": 0.1420661062002182, "learning_rate": 3.675e-05, "loss": 1.0822, "step": 245 }, { "epoch": 0.03, "grad_norm": 0.14159414172172546, "learning_rate": 3.75e-05, "loss": 1.0864, "step": 250 }, { "epoch": 0.03, "grad_norm": 0.15307262539863586, "learning_rate": 3.8249999999999995e-05, "loss": 1.114, "step": 255 }, { "epoch": 0.03, "grad_norm": 0.1432732194662094, "learning_rate": 3.9e-05, "loss": 1.1041, "step": 260 }, { "epoch": 0.03, "grad_norm": 0.14971235394477844, "learning_rate": 3.975e-05, "loss": 1.1015, "step": 265 }, { "epoch": 0.03, "grad_norm": 0.13618604838848114, "learning_rate": 4.05e-05, "loss": 1.1041, "step": 270 }, { "epoch": 0.03, "grad_norm": 0.15167336165905, "learning_rate": 4.125e-05, "loss": 1.1905, "step": 275 }, { "epoch": 0.03, "grad_norm": 0.17375151813030243, "learning_rate": 4.2e-05, "loss": 1.0554, "step": 280 }, { "epoch": 0.03, "grad_norm": 0.15129323303699493, "learning_rate": 4.2749999999999996e-05, "loss": 1.1595, "step": 285 }, { "epoch": 0.03, "grad_norm": 0.13002634048461914, "learning_rate": 4.3499999999999993e-05, "loss": 1.0993, "step": 290 }, { "epoch": 0.04, "grad_norm": 0.15022237598896027, "learning_rate": 4.424999999999999e-05, "loss": 1.1154, "step": 295 }, { "epoch": 0.04, "grad_norm": 0.15329715609550476, "learning_rate": 4.4999999999999996e-05, "loss": 1.1181, "step": 300 }, { "epoch": 0.04, "grad_norm": 0.14781033992767334, "learning_rate": 4.5749999999999994e-05, "loss": 1.1708, "step": 305 }, { "epoch": 0.04, "grad_norm": 0.13594377040863037, "learning_rate": 4.65e-05, "loss": 1.0517, "step": 310 }, { "epoch": 0.04, "grad_norm": 0.14029279351234436, "learning_rate": 4.7249999999999997e-05, "loss": 1.0883, "step": 315 }, { "epoch": 0.04, "grad_norm": 0.14294695854187012, "learning_rate": 4.7999999999999994e-05, "loss": 1.166, "step": 320 }, { "epoch": 0.04, "grad_norm": 0.1437993049621582, "learning_rate": 4.875e-05, "loss": 1.0838, "step": 325 }, { "epoch": 0.04, "grad_norm": 0.13464024662971497, "learning_rate": 4.95e-05, "loss": 1.076, "step": 330 }, { "epoch": 0.04, "grad_norm": 0.15120777487754822, "learning_rate": 5.025e-05, "loss": 1.1465, "step": 335 }, { "epoch": 0.04, "grad_norm": 0.13077160716056824, "learning_rate": 5.1e-05, "loss": 0.9944, "step": 340 }, { "epoch": 0.04, "grad_norm": 0.14656583964824677, "learning_rate": 5.174999999999999e-05, "loss": 1.142, "step": 345 }, { "epoch": 0.04, "grad_norm": 0.14403922855854034, "learning_rate": 5.2499999999999995e-05, "loss": 1.1545, "step": 350 }, { "epoch": 0.04, "grad_norm": 0.14558084309101105, "learning_rate": 5.324999999999999e-05, "loss": 1.1135, "step": 355 }, { "epoch": 0.04, "grad_norm": 0.13940554857254028, "learning_rate": 5.399999999999999e-05, "loss": 1.1143, "step": 360 }, { "epoch": 0.04, "grad_norm": 0.12889456748962402, "learning_rate": 5.4749999999999996e-05, "loss": 1.0995, "step": 365 }, { "epoch": 0.04, "grad_norm": 0.13410042226314545, "learning_rate": 5.5499999999999994e-05, "loss": 1.0377, "step": 370 }, { "epoch": 0.05, "grad_norm": 0.1537715345621109, "learning_rate": 5.625e-05, "loss": 1.1654, "step": 375 }, { "epoch": 0.05, "grad_norm": 0.14372758567333221, "learning_rate": 5.6999999999999996e-05, "loss": 1.0393, "step": 380 }, { "epoch": 0.05, "grad_norm": 0.14559227228164673, "learning_rate": 5.7749999999999994e-05, "loss": 1.1712, "step": 385 }, { "epoch": 0.05, "grad_norm": 0.14143848419189453, "learning_rate": 5.85e-05, "loss": 1.0294, "step": 390 }, { "epoch": 0.05, "grad_norm": 0.13969220221042633, "learning_rate": 5.925e-05, "loss": 1.1678, "step": 395 }, { "epoch": 0.05, "grad_norm": 0.14875860512256622, "learning_rate": 5.9999999999999995e-05, "loss": 1.0829, "step": 400 }, { "epoch": 0.05, "grad_norm": 0.13244011998176575, "learning_rate": 6.075e-05, "loss": 1.173, "step": 405 }, { "epoch": 0.05, "grad_norm": 0.14143739640712738, "learning_rate": 6.149999999999999e-05, "loss": 1.0446, "step": 410 }, { "epoch": 0.05, "grad_norm": 0.13888464868068695, "learning_rate": 6.225e-05, "loss": 1.1222, "step": 415 }, { "epoch": 0.05, "grad_norm": 0.1454809457063675, "learning_rate": 6.299999999999999e-05, "loss": 1.0266, "step": 420 }, { "epoch": 0.05, "grad_norm": 0.1294557899236679, "learning_rate": 6.374999999999999e-05, "loss": 1.0773, "step": 425 }, { "epoch": 0.05, "grad_norm": 0.13563519716262817, "learning_rate": 6.45e-05, "loss": 1.0808, "step": 430 }, { "epoch": 0.05, "grad_norm": 0.1332767903804779, "learning_rate": 6.525e-05, "loss": 1.1356, "step": 435 }, { "epoch": 0.05, "grad_norm": 0.14194917678833008, "learning_rate": 6.599999999999999e-05, "loss": 1.0592, "step": 440 }, { "epoch": 0.05, "grad_norm": 0.1359052062034607, "learning_rate": 6.675e-05, "loss": 1.1103, "step": 445 }, { "epoch": 0.05, "grad_norm": 0.12448085099458694, "learning_rate": 6.75e-05, "loss": 1.0912, "step": 450 }, { "epoch": 0.05, "grad_norm": 0.14592844247817993, "learning_rate": 6.824999999999999e-05, "loss": 1.0549, "step": 455 }, { "epoch": 0.06, "grad_norm": 0.14550206065177917, "learning_rate": 6.9e-05, "loss": 1.0997, "step": 460 }, { "epoch": 0.06, "grad_norm": 0.13571631908416748, "learning_rate": 6.975e-05, "loss": 1.1559, "step": 465 }, { "epoch": 0.06, "grad_norm": 0.12359726428985596, "learning_rate": 7.049999999999999e-05, "loss": 1.1352, "step": 470 }, { "epoch": 0.06, "grad_norm": 0.1370527297258377, "learning_rate": 7.125e-05, "loss": 1.1114, "step": 475 }, { "epoch": 0.06, "grad_norm": 0.1392049938440323, "learning_rate": 7.199999999999999e-05, "loss": 1.1121, "step": 480 }, { "epoch": 0.06, "grad_norm": 0.13348931074142456, "learning_rate": 7.274999999999999e-05, "loss": 1.1471, "step": 485 }, { "epoch": 0.06, "grad_norm": 0.1287151575088501, "learning_rate": 7.35e-05, "loss": 1.1812, "step": 490 }, { "epoch": 0.06, "grad_norm": 0.13077159225940704, "learning_rate": 7.424999999999999e-05, "loss": 1.0183, "step": 495 }, { "epoch": 0.06, "grad_norm": 0.13283278048038483, "learning_rate": 7.5e-05, "loss": 1.1351, "step": 500 }, { "epoch": 0.06, "grad_norm": 0.14058330655097961, "learning_rate": 7.575e-05, "loss": 1.0175, "step": 505 }, { "epoch": 0.06, "grad_norm": 0.1260114163160324, "learning_rate": 7.649999999999999e-05, "loss": 1.0918, "step": 510 }, { "epoch": 0.06, "grad_norm": 0.12077819555997849, "learning_rate": 7.725e-05, "loss": 1.2125, "step": 515 }, { "epoch": 0.06, "grad_norm": 0.13014502823352814, "learning_rate": 7.8e-05, "loss": 1.0959, "step": 520 }, { "epoch": 0.06, "grad_norm": 0.13080598413944244, "learning_rate": 7.874999999999999e-05, "loss": 1.1083, "step": 525 }, { "epoch": 0.06, "grad_norm": 0.12461178004741669, "learning_rate": 7.95e-05, "loss": 1.0968, "step": 530 }, { "epoch": 0.06, "grad_norm": 0.13079579174518585, "learning_rate": 8.025e-05, "loss": 1.1039, "step": 535 }, { "epoch": 0.07, "grad_norm": 0.12605786323547363, "learning_rate": 8.1e-05, "loss": 1.0558, "step": 540 }, { "epoch": 0.07, "grad_norm": 0.13258296251296997, "learning_rate": 8.175e-05, "loss": 1.0635, "step": 545 }, { "epoch": 0.07, "grad_norm": 0.1320507675409317, "learning_rate": 8.25e-05, "loss": 1.042, "step": 550 }, { "epoch": 0.07, "grad_norm": 0.11775451898574829, "learning_rate": 8.325e-05, "loss": 1.1299, "step": 555 }, { "epoch": 0.07, "grad_norm": 0.12879742681980133, "learning_rate": 8.4e-05, "loss": 1.0587, "step": 560 }, { "epoch": 0.07, "grad_norm": 0.1299828737974167, "learning_rate": 8.474999999999999e-05, "loss": 1.0773, "step": 565 }, { "epoch": 0.07, "grad_norm": 0.1389782577753067, "learning_rate": 8.549999999999999e-05, "loss": 1.066, "step": 570 }, { "epoch": 0.07, "grad_norm": 0.12050167471170425, "learning_rate": 8.624999999999998e-05, "loss": 1.0786, "step": 575 }, { "epoch": 0.07, "grad_norm": 0.14948152005672455, "learning_rate": 8.699999999999999e-05, "loss": 1.1208, "step": 580 }, { "epoch": 0.07, "grad_norm": 0.13367661833763123, "learning_rate": 8.774999999999999e-05, "loss": 1.0754, "step": 585 }, { "epoch": 0.07, "grad_norm": 0.12811419367790222, "learning_rate": 8.849999999999998e-05, "loss": 1.1275, "step": 590 }, { "epoch": 0.07, "grad_norm": 0.12895521521568298, "learning_rate": 8.924999999999999e-05, "loss": 1.112, "step": 595 }, { "epoch": 0.07, "grad_norm": 0.1275772899389267, "learning_rate": 8.999999999999999e-05, "loss": 1.1171, "step": 600 }, { "epoch": 0.07, "grad_norm": 0.12136266380548477, "learning_rate": 9.074999999999998e-05, "loss": 1.119, "step": 605 }, { "epoch": 0.07, "grad_norm": 0.12577873468399048, "learning_rate": 9.149999999999999e-05, "loss": 1.059, "step": 610 }, { "epoch": 0.07, "grad_norm": 0.11611288040876389, "learning_rate": 9.224999999999999e-05, "loss": 1.093, "step": 615 }, { "epoch": 0.07, "grad_norm": 0.1267736703157425, "learning_rate": 9.3e-05, "loss": 1.1074, "step": 620 }, { "epoch": 0.08, "grad_norm": 0.12328934669494629, "learning_rate": 9.374999999999999e-05, "loss": 1.0373, "step": 625 }, { "epoch": 0.08, "grad_norm": 0.13137248158454895, "learning_rate": 9.449999999999999e-05, "loss": 1.0127, "step": 630 }, { "epoch": 0.08, "grad_norm": 0.13046279549598694, "learning_rate": 9.525e-05, "loss": 1.0549, "step": 635 }, { "epoch": 0.08, "grad_norm": 0.11722920835018158, "learning_rate": 9.599999999999999e-05, "loss": 1.0051, "step": 640 }, { "epoch": 0.08, "grad_norm": 0.14764057099819183, "learning_rate": 9.675e-05, "loss": 1.1092, "step": 645 }, { "epoch": 0.08, "grad_norm": 0.12383400648832321, "learning_rate": 9.75e-05, "loss": 1.1019, "step": 650 }, { "epoch": 0.08, "grad_norm": 0.13521508872509003, "learning_rate": 9.824999999999999e-05, "loss": 1.0963, "step": 655 }, { "epoch": 0.08, "grad_norm": 0.12719859182834625, "learning_rate": 9.9e-05, "loss": 1.1604, "step": 660 }, { "epoch": 0.08, "grad_norm": 0.14219126105308533, "learning_rate": 9.975e-05, "loss": 1.0636, "step": 665 }, { "epoch": 0.08, "grad_norm": 0.1256975382566452, "learning_rate": 0.0001005, "loss": 1.1046, "step": 670 }, { "epoch": 0.08, "grad_norm": 0.12861751019954681, "learning_rate": 0.00010125, "loss": 1.0016, "step": 675 }, { "epoch": 0.08, "grad_norm": 0.1278897374868393, "learning_rate": 0.000102, "loss": 1.108, "step": 680 }, { "epoch": 0.08, "grad_norm": 0.11729888617992401, "learning_rate": 0.00010275, "loss": 1.1411, "step": 685 }, { "epoch": 0.08, "grad_norm": 0.12147238105535507, "learning_rate": 0.00010349999999999998, "loss": 1.094, "step": 690 }, { "epoch": 0.08, "grad_norm": 0.12336426228284836, "learning_rate": 0.00010424999999999999, "loss": 1.0397, "step": 695 }, { "epoch": 0.08, "grad_norm": 0.12023478001356125, "learning_rate": 0.00010499999999999999, "loss": 1.0665, "step": 700 }, { "epoch": 0.08, "grad_norm": 0.11755697429180145, "learning_rate": 0.00010574999999999998, "loss": 1.061, "step": 705 }, { "epoch": 0.09, "grad_norm": 0.11740587651729584, "learning_rate": 0.00010649999999999999, "loss": 1.0637, "step": 710 }, { "epoch": 0.09, "grad_norm": 0.13212309777736664, "learning_rate": 0.00010724999999999999, "loss": 1.0128, "step": 715 }, { "epoch": 0.09, "grad_norm": 0.13335004448890686, "learning_rate": 0.00010799999999999998, "loss": 1.064, "step": 720 }, { "epoch": 0.09, "grad_norm": 0.12249240279197693, "learning_rate": 0.00010874999999999999, "loss": 1.0939, "step": 725 }, { "epoch": 0.09, "grad_norm": 0.13641893863677979, "learning_rate": 0.00010949999999999999, "loss": 1.1127, "step": 730 }, { "epoch": 0.09, "grad_norm": 0.127156600356102, "learning_rate": 0.00011024999999999998, "loss": 1.0527, "step": 735 }, { "epoch": 0.09, "grad_norm": 0.1096034049987793, "learning_rate": 0.00011099999999999999, "loss": 1.1304, "step": 740 }, { "epoch": 0.09, "grad_norm": 0.12487472593784332, "learning_rate": 0.00011174999999999999, "loss": 1.0862, "step": 745 }, { "epoch": 0.09, "grad_norm": 0.12395402044057846, "learning_rate": 0.0001125, "loss": 1.0641, "step": 750 }, { "epoch": 0.09, "grad_norm": 0.12146257609128952, "learning_rate": 0.00011324999999999999, "loss": 1.1303, "step": 755 }, { "epoch": 0.09, "grad_norm": 0.12403228133916855, "learning_rate": 0.00011399999999999999, "loss": 1.0214, "step": 760 }, { "epoch": 0.09, "grad_norm": 0.11013570427894592, "learning_rate": 0.00011475, "loss": 1.0696, "step": 765 }, { "epoch": 0.09, "grad_norm": 0.11409767717123032, "learning_rate": 0.00011549999999999999, "loss": 1.2188, "step": 770 }, { "epoch": 0.09, "grad_norm": 0.12632502615451813, "learning_rate": 0.00011624999999999999, "loss": 1.113, "step": 775 }, { "epoch": 0.09, "grad_norm": 0.11955476552248001, "learning_rate": 0.000117, "loss": 1.1429, "step": 780 }, { "epoch": 0.09, "grad_norm": 0.13300061225891113, "learning_rate": 0.00011774999999999999, "loss": 1.1171, "step": 785 }, { "epoch": 0.1, "grad_norm": 0.12019680440425873, "learning_rate": 0.0001185, "loss": 1.0675, "step": 790 }, { "epoch": 0.1, "grad_norm": 0.12077269703149796, "learning_rate": 0.00011925, "loss": 1.0694, "step": 795 }, { "epoch": 0.1, "grad_norm": 0.12203273177146912, "learning_rate": 0.00011999999999999999, "loss": 1.0378, "step": 800 }, { "epoch": 0.1, "grad_norm": 0.1268533170223236, "learning_rate": 0.00012075, "loss": 1.1294, "step": 805 }, { "epoch": 0.1, "grad_norm": 0.1281730681657791, "learning_rate": 0.0001215, "loss": 1.024, "step": 810 }, { "epoch": 0.1, "grad_norm": 0.11289986968040466, "learning_rate": 0.00012225, "loss": 1.0776, "step": 815 }, { "epoch": 0.1, "grad_norm": 0.11376605927944183, "learning_rate": 0.00012299999999999998, "loss": 1.107, "step": 820 }, { "epoch": 0.1, "grad_norm": 0.1271873265504837, "learning_rate": 0.00012374999999999997, "loss": 1.1817, "step": 825 }, { "epoch": 0.1, "grad_norm": 0.10974998027086258, "learning_rate": 0.0001245, "loss": 1.0228, "step": 830 }, { "epoch": 0.1, "grad_norm": 0.13189394772052765, "learning_rate": 0.00012524999999999998, "loss": 1.1298, "step": 835 }, { "epoch": 0.1, "grad_norm": 0.10656459629535675, "learning_rate": 0.00012599999999999997, "loss": 1.0833, "step": 840 }, { "epoch": 0.1, "grad_norm": 0.14083336293697357, "learning_rate": 0.00012675, "loss": 1.0027, "step": 845 }, { "epoch": 0.1, "grad_norm": 0.1123281791806221, "learning_rate": 0.00012749999999999998, "loss": 1.0953, "step": 850 }, { "epoch": 0.1, "grad_norm": 0.12783868610858917, "learning_rate": 0.00012824999999999997, "loss": 1.0793, "step": 855 }, { "epoch": 0.1, "grad_norm": 0.12001547962427139, "learning_rate": 0.000129, "loss": 1.0685, "step": 860 }, { "epoch": 0.1, "grad_norm": 0.11165610700845718, "learning_rate": 0.00012974999999999998, "loss": 1.111, "step": 865 }, { "epoch": 0.1, "grad_norm": 0.12007506936788559, "learning_rate": 0.0001305, "loss": 1.0681, "step": 870 }, { "epoch": 0.11, "grad_norm": 0.12255184352397919, "learning_rate": 0.00013125, "loss": 1.1185, "step": 875 }, { "epoch": 0.11, "grad_norm": 0.12230581790208817, "learning_rate": 0.00013199999999999998, "loss": 1.0196, "step": 880 }, { "epoch": 0.11, "grad_norm": 0.11624744534492493, "learning_rate": 0.00013275, "loss": 1.0478, "step": 885 }, { "epoch": 0.11, "grad_norm": 0.11575216054916382, "learning_rate": 0.0001335, "loss": 1.0387, "step": 890 }, { "epoch": 0.11, "grad_norm": 0.1169903427362442, "learning_rate": 0.00013424999999999998, "loss": 1.1127, "step": 895 }, { "epoch": 0.11, "grad_norm": 0.11636984348297119, "learning_rate": 0.000135, "loss": 1.0555, "step": 900 }, { "epoch": 0.11, "grad_norm": 0.12122248113155365, "learning_rate": 0.00013575, "loss": 1.1497, "step": 905 }, { "epoch": 0.11, "grad_norm": 0.1243358924984932, "learning_rate": 0.00013649999999999998, "loss": 1.0675, "step": 910 }, { "epoch": 0.11, "grad_norm": 0.11892006546258926, "learning_rate": 0.00013725, "loss": 1.1499, "step": 915 }, { "epoch": 0.11, "grad_norm": 0.12357810884714127, "learning_rate": 0.000138, "loss": 1.0508, "step": 920 }, { "epoch": 0.11, "grad_norm": 0.11074083298444748, "learning_rate": 0.00013874999999999998, "loss": 0.9855, "step": 925 }, { "epoch": 0.11, "grad_norm": 0.1215604916214943, "learning_rate": 0.0001395, "loss": 1.0723, "step": 930 }, { "epoch": 0.11, "grad_norm": 0.11568805575370789, "learning_rate": 0.00014025, "loss": 1.141, "step": 935 }, { "epoch": 0.11, "grad_norm": 0.1251344233751297, "learning_rate": 0.00014099999999999998, "loss": 1.0817, "step": 940 }, { "epoch": 0.11, "grad_norm": 0.12408124655485153, "learning_rate": 0.00014174999999999998, "loss": 0.9859, "step": 945 }, { "epoch": 0.11, "grad_norm": 0.13382701575756073, "learning_rate": 0.0001425, "loss": 1.0902, "step": 950 }, { "epoch": 0.12, "grad_norm": 0.12058982998132706, "learning_rate": 0.00014324999999999999, "loss": 1.0817, "step": 955 }, { "epoch": 0.12, "grad_norm": 0.12119226902723312, "learning_rate": 0.00014399999999999998, "loss": 1.0502, "step": 960 }, { "epoch": 0.12, "grad_norm": 0.14532142877578735, "learning_rate": 0.00014475, "loss": 1.0244, "step": 965 }, { "epoch": 0.12, "grad_norm": 0.12690915167331696, "learning_rate": 0.00014549999999999999, "loss": 1.1313, "step": 970 }, { "epoch": 0.12, "grad_norm": 0.11626715213060379, "learning_rate": 0.00014624999999999998, "loss": 1.088, "step": 975 }, { "epoch": 0.12, "grad_norm": 0.11747872829437256, "learning_rate": 0.000147, "loss": 1.0855, "step": 980 }, { "epoch": 0.12, "grad_norm": 0.12771141529083252, "learning_rate": 0.00014774999999999999, "loss": 1.1304, "step": 985 }, { "epoch": 0.12, "grad_norm": 0.1297111064195633, "learning_rate": 0.00014849999999999998, "loss": 1.0307, "step": 990 }, { "epoch": 0.12, "grad_norm": 0.12018551677465439, "learning_rate": 0.00014925, "loss": 1.0946, "step": 995 }, { "epoch": 0.12, "grad_norm": 0.11594726890325546, "learning_rate": 0.00015, "loss": 1.0314, "step": 1000 }, { "epoch": 0.12, "grad_norm": 0.133875772356987, "learning_rate": 0.00015074999999999998, "loss": 1.0018, "step": 1005 }, { "epoch": 0.12, "grad_norm": 0.1387271285057068, "learning_rate": 0.0001515, "loss": 1.0417, "step": 1010 }, { "epoch": 0.12, "grad_norm": 0.11644894629716873, "learning_rate": 0.00015224999999999996, "loss": 1.045, "step": 1015 }, { "epoch": 0.12, "grad_norm": 0.13126513361930847, "learning_rate": 0.00015299999999999998, "loss": 1.0666, "step": 1020 }, { "epoch": 0.12, "grad_norm": 0.12716297805309296, "learning_rate": 0.00015374999999999997, "loss": 1.0578, "step": 1025 }, { "epoch": 0.12, "grad_norm": 0.11895552277565002, "learning_rate": 0.0001545, "loss": 1.1052, "step": 1030 }, { "epoch": 0.12, "grad_norm": 0.12736555933952332, "learning_rate": 0.00015524999999999998, "loss": 0.9881, "step": 1035 }, { "epoch": 0.13, "grad_norm": 0.11497914791107178, "learning_rate": 0.000156, "loss": 1.0462, "step": 1040 }, { "epoch": 0.13, "grad_norm": 0.13537730276584625, "learning_rate": 0.00015675, "loss": 0.9891, "step": 1045 }, { "epoch": 0.13, "grad_norm": 0.12179003655910492, "learning_rate": 0.00015749999999999998, "loss": 1.0565, "step": 1050 }, { "epoch": 0.13, "grad_norm": 0.12696489691734314, "learning_rate": 0.00015824999999999997, "loss": 1.0551, "step": 1055 }, { "epoch": 0.13, "grad_norm": 0.11586569994688034, "learning_rate": 0.000159, "loss": 1.1354, "step": 1060 }, { "epoch": 0.13, "grad_norm": 0.1284131109714508, "learning_rate": 0.00015974999999999998, "loss": 1.1317, "step": 1065 }, { "epoch": 0.13, "grad_norm": 0.13363447785377502, "learning_rate": 0.0001605, "loss": 1.0358, "step": 1070 }, { "epoch": 0.13, "grad_norm": 0.13264328241348267, "learning_rate": 0.00016125, "loss": 1.0852, "step": 1075 }, { "epoch": 0.13, "grad_norm": 0.12372339516878128, "learning_rate": 0.000162, "loss": 0.997, "step": 1080 }, { "epoch": 0.13, "grad_norm": 0.12511901557445526, "learning_rate": 0.00016274999999999997, "loss": 1.0837, "step": 1085 }, { "epoch": 0.13, "grad_norm": 0.12333539128303528, "learning_rate": 0.0001635, "loss": 1.0565, "step": 1090 }, { "epoch": 0.13, "grad_norm": 0.1194729283452034, "learning_rate": 0.00016424999999999998, "loss": 1.0099, "step": 1095 }, { "epoch": 0.13, "grad_norm": 0.12635278701782227, "learning_rate": 0.000165, "loss": 1.039, "step": 1100 }, { "epoch": 0.13, "grad_norm": 0.124270498752594, "learning_rate": 0.00016575, "loss": 1.165, "step": 1105 }, { "epoch": 0.13, "grad_norm": 0.12829731404781342, "learning_rate": 0.0001665, "loss": 1.0653, "step": 1110 }, { "epoch": 0.13, "grad_norm": 0.13601495325565338, "learning_rate": 0.00016724999999999997, "loss": 1.0509, "step": 1115 }, { "epoch": 0.13, "grad_norm": 0.1377023309469223, "learning_rate": 0.000168, "loss": 1.0376, "step": 1120 }, { "epoch": 0.14, "grad_norm": 0.12421699613332748, "learning_rate": 0.00016874999999999998, "loss": 1.0082, "step": 1125 }, { "epoch": 0.14, "grad_norm": 0.12038341909646988, "learning_rate": 0.00016949999999999997, "loss": 1.0865, "step": 1130 }, { "epoch": 0.14, "grad_norm": 0.11900181323289871, "learning_rate": 0.00017025, "loss": 1.1051, "step": 1135 }, { "epoch": 0.14, "grad_norm": 0.12548121809959412, "learning_rate": 0.00017099999999999998, "loss": 1.1436, "step": 1140 }, { "epoch": 0.14, "grad_norm": 0.12389590591192245, "learning_rate": 0.00017175, "loss": 0.9946, "step": 1145 }, { "epoch": 0.14, "grad_norm": 0.13046157360076904, "learning_rate": 0.00017249999999999996, "loss": 1.0448, "step": 1150 }, { "epoch": 0.14, "grad_norm": 0.13118329644203186, "learning_rate": 0.00017324999999999998, "loss": 1.0698, "step": 1155 }, { "epoch": 0.14, "grad_norm": 0.11619829386472702, "learning_rate": 0.00017399999999999997, "loss": 1.0619, "step": 1160 }, { "epoch": 0.14, "grad_norm": 0.12934699654579163, "learning_rate": 0.00017475, "loss": 1.0755, "step": 1165 }, { "epoch": 0.14, "grad_norm": 0.13581542670726776, "learning_rate": 0.00017549999999999998, "loss": 1.0456, "step": 1170 }, { "epoch": 0.14, "grad_norm": 0.1390586644411087, "learning_rate": 0.00017625, "loss": 1.0822, "step": 1175 }, { "epoch": 0.14, "grad_norm": 0.13362278044223785, "learning_rate": 0.00017699999999999997, "loss": 1.0588, "step": 1180 }, { "epoch": 0.14, "grad_norm": 0.1188979223370552, "learning_rate": 0.00017774999999999998, "loss": 1.0941, "step": 1185 }, { "epoch": 0.14, "grad_norm": 0.11467181891202927, "learning_rate": 0.00017849999999999997, "loss": 1.0485, "step": 1190 }, { "epoch": 0.14, "grad_norm": 0.13196997344493866, "learning_rate": 0.00017925, "loss": 1.1128, "step": 1195 }, { "epoch": 0.14, "grad_norm": 0.13005390763282776, "learning_rate": 0.00017999999999999998, "loss": 1.0536, "step": 1200 }, { "epoch": 0.15, "grad_norm": 0.13372902572155, "learning_rate": 0.00018075, "loss": 1.1123, "step": 1205 }, { "epoch": 0.15, "grad_norm": 0.13549335300922394, "learning_rate": 0.00018149999999999997, "loss": 1.0998, "step": 1210 }, { "epoch": 0.15, "grad_norm": 0.13829483091831207, "learning_rate": 0.00018224999999999998, "loss": 1.0174, "step": 1215 }, { "epoch": 0.15, "grad_norm": 0.1173296719789505, "learning_rate": 0.00018299999999999998, "loss": 1.0898, "step": 1220 }, { "epoch": 0.15, "grad_norm": 0.12226750701665878, "learning_rate": 0.00018375, "loss": 1.0516, "step": 1225 }, { "epoch": 0.15, "grad_norm": 0.12172769755125046, "learning_rate": 0.00018449999999999999, "loss": 1.0495, "step": 1230 }, { "epoch": 0.15, "grad_norm": 0.12832506000995636, "learning_rate": 0.00018525, "loss": 1.0816, "step": 1235 }, { "epoch": 0.15, "grad_norm": 0.12533822655677795, "learning_rate": 0.000186, "loss": 1.0122, "step": 1240 }, { "epoch": 0.15, "grad_norm": 0.13508310914039612, "learning_rate": 0.00018675, "loss": 1.0122, "step": 1245 }, { "epoch": 0.15, "grad_norm": 0.11473240703344345, "learning_rate": 0.00018749999999999998, "loss": 1.0813, "step": 1250 }, { "epoch": 0.15, "grad_norm": 0.12293410301208496, "learning_rate": 0.00018824999999999997, "loss": 1.03, "step": 1255 }, { "epoch": 0.15, "grad_norm": 0.14231853187084198, "learning_rate": 0.00018899999999999999, "loss": 1.0724, "step": 1260 }, { "epoch": 0.15, "grad_norm": 0.1204795315861702, "learning_rate": 0.00018974999999999998, "loss": 1.0449, "step": 1265 }, { "epoch": 0.15, "grad_norm": 0.12521016597747803, "learning_rate": 0.0001905, "loss": 1.1008, "step": 1270 }, { "epoch": 0.15, "grad_norm": 0.12301227450370789, "learning_rate": 0.00019124999999999996, "loss": 1.1471, "step": 1275 }, { "epoch": 0.15, "grad_norm": 0.1287146359682083, "learning_rate": 0.00019199999999999998, "loss": 1.0501, "step": 1280 }, { "epoch": 0.15, "grad_norm": 0.13978512585163116, "learning_rate": 0.00019274999999999997, "loss": 1.03, "step": 1285 }, { "epoch": 0.16, "grad_norm": 0.12224988639354706, "learning_rate": 0.0001935, "loss": 1.0996, "step": 1290 }, { "epoch": 0.16, "grad_norm": 0.1454314887523651, "learning_rate": 0.00019424999999999998, "loss": 1.0529, "step": 1295 }, { "epoch": 0.16, "grad_norm": 0.12719061970710754, "learning_rate": 0.000195, "loss": 1.0984, "step": 1300 }, { "epoch": 0.16, "grad_norm": 0.13518300652503967, "learning_rate": 0.00019574999999999996, "loss": 1.0336, "step": 1305 }, { "epoch": 0.16, "grad_norm": 0.1353674679994583, "learning_rate": 0.00019649999999999998, "loss": 1.1046, "step": 1310 }, { "epoch": 0.16, "grad_norm": 0.13263052701950073, "learning_rate": 0.00019724999999999997, "loss": 0.9654, "step": 1315 }, { "epoch": 0.16, "grad_norm": 0.13989952206611633, "learning_rate": 0.000198, "loss": 1.0994, "step": 1320 }, { "epoch": 0.16, "grad_norm": 0.13030202686786652, "learning_rate": 0.00019874999999999998, "loss": 1.1445, "step": 1325 }, { "epoch": 0.16, "grad_norm": 0.12201202660799026, "learning_rate": 0.0001995, "loss": 1.0605, "step": 1330 }, { "epoch": 0.16, "grad_norm": 0.1385253220796585, "learning_rate": 0.00020025, "loss": 1.0405, "step": 1335 }, { "epoch": 0.16, "grad_norm": 0.14180108904838562, "learning_rate": 0.000201, "loss": 1.0612, "step": 1340 }, { "epoch": 0.16, "grad_norm": 0.1332985907793045, "learning_rate": 0.00020174999999999997, "loss": 1.0653, "step": 1345 }, { "epoch": 0.16, "grad_norm": 0.14803795516490936, "learning_rate": 0.0002025, "loss": 1.1237, "step": 1350 }, { "epoch": 0.16, "grad_norm": 0.13238713145256042, "learning_rate": 0.00020324999999999998, "loss": 1.1257, "step": 1355 }, { "epoch": 0.16, "grad_norm": 0.12412701547145844, "learning_rate": 0.000204, "loss": 1.0947, "step": 1360 }, { "epoch": 0.16, "grad_norm": 0.13152962923049927, "learning_rate": 0.00020475, "loss": 1.1676, "step": 1365 }, { "epoch": 0.17, "grad_norm": 0.13383346796035767, "learning_rate": 0.0002055, "loss": 1.0314, "step": 1370 }, { "epoch": 0.17, "grad_norm": 0.12599609792232513, "learning_rate": 0.00020624999999999997, "loss": 0.9767, "step": 1375 }, { "epoch": 0.17, "grad_norm": 0.12485210597515106, "learning_rate": 0.00020699999999999996, "loss": 1.1578, "step": 1380 }, { "epoch": 0.17, "grad_norm": 0.15362361073493958, "learning_rate": 0.00020774999999999998, "loss": 1.0263, "step": 1385 }, { "epoch": 0.17, "grad_norm": 0.13580173254013062, "learning_rate": 0.00020849999999999997, "loss": 1.0553, "step": 1390 }, { "epoch": 0.17, "grad_norm": 0.13229776918888092, "learning_rate": 0.00020925, "loss": 1.0242, "step": 1395 }, { "epoch": 0.17, "grad_norm": 0.14048846065998077, "learning_rate": 0.00020999999999999998, "loss": 1.0541, "step": 1400 }, { "epoch": 0.17, "grad_norm": 0.1278860718011856, "learning_rate": 0.00021074999999999997, "loss": 1.0295, "step": 1405 }, { "epoch": 0.17, "grad_norm": 0.12558186054229736, "learning_rate": 0.00021149999999999996, "loss": 0.9833, "step": 1410 }, { "epoch": 0.17, "grad_norm": 0.13044369220733643, "learning_rate": 0.00021224999999999998, "loss": 1.0244, "step": 1415 }, { "epoch": 0.17, "grad_norm": 0.12153918296098709, "learning_rate": 0.00021299999999999997, "loss": 1.0513, "step": 1420 }, { "epoch": 0.17, "grad_norm": 0.1474679559469223, "learning_rate": 0.00021375, "loss": 1.0771, "step": 1425 }, { "epoch": 0.17, "grad_norm": 0.13985954225063324, "learning_rate": 0.00021449999999999998, "loss": 1.0419, "step": 1430 }, { "epoch": 0.17, "grad_norm": 0.13685761392116547, "learning_rate": 0.00021525, "loss": 1.045, "step": 1435 }, { "epoch": 0.17, "grad_norm": 0.13151884078979492, "learning_rate": 0.00021599999999999996, "loss": 1.0856, "step": 1440 }, { "epoch": 0.17, "grad_norm": 0.12494616955518723, "learning_rate": 0.00021674999999999998, "loss": 1.093, "step": 1445 }, { "epoch": 0.17, "grad_norm": 0.1363985389471054, "learning_rate": 0.00021749999999999997, "loss": 1.0921, "step": 1450 }, { "epoch": 0.18, "grad_norm": 0.13812515139579773, "learning_rate": 0.00021825, "loss": 1.0832, "step": 1455 }, { "epoch": 0.18, "grad_norm": 0.1363314837217331, "learning_rate": 0.00021899999999999998, "loss": 1.0995, "step": 1460 }, { "epoch": 0.18, "grad_norm": 0.12492544949054718, "learning_rate": 0.00021975, "loss": 1.0806, "step": 1465 }, { "epoch": 0.18, "grad_norm": 0.12725725769996643, "learning_rate": 0.00022049999999999997, "loss": 1.0276, "step": 1470 }, { "epoch": 0.18, "grad_norm": 0.11777817457914352, "learning_rate": 0.00022124999999999998, "loss": 0.9419, "step": 1475 }, { "epoch": 0.18, "grad_norm": 0.13982278108596802, "learning_rate": 0.00022199999999999998, "loss": 1.0673, "step": 1480 }, { "epoch": 0.18, "grad_norm": 0.15291635692119598, "learning_rate": 0.00022275, "loss": 1.0753, "step": 1485 }, { "epoch": 0.18, "grad_norm": 0.1316549926996231, "learning_rate": 0.00022349999999999998, "loss": 1.0793, "step": 1490 }, { "epoch": 0.18, "grad_norm": 0.13658076524734497, "learning_rate": 0.00022425, "loss": 1.0742, "step": 1495 }, { "epoch": 0.18, "grad_norm": 0.12889617681503296, "learning_rate": 0.000225, "loss": 1.0272, "step": 1500 }, { "epoch": 0.18, "grad_norm": 0.1431025117635727, "learning_rate": 0.00022574999999999996, "loss": 1.056, "step": 1505 }, { "epoch": 0.18, "grad_norm": 0.12882299721240997, "learning_rate": 0.00022649999999999998, "loss": 1.0965, "step": 1510 }, { "epoch": 0.18, "grad_norm": 0.1326344758272171, "learning_rate": 0.00022724999999999997, "loss": 1.1601, "step": 1515 }, { "epoch": 0.18, "grad_norm": 0.1312105506658554, "learning_rate": 0.00022799999999999999, "loss": 1.0286, "step": 1520 }, { "epoch": 0.18, "grad_norm": 0.13419494032859802, "learning_rate": 0.00022874999999999998, "loss": 1.0998, "step": 1525 }, { "epoch": 0.18, "grad_norm": 0.12598495185375214, "learning_rate": 0.0002295, "loss": 1.0099, "step": 1530 }, { "epoch": 0.18, "grad_norm": 0.12234248965978622, "learning_rate": 0.00023024999999999996, "loss": 1.0512, "step": 1535 }, { "epoch": 0.19, "grad_norm": 0.1486140936613083, "learning_rate": 0.00023099999999999998, "loss": 1.0563, "step": 1540 }, { "epoch": 0.19, "grad_norm": 0.13529768586158752, "learning_rate": 0.00023174999999999997, "loss": 0.9952, "step": 1545 }, { "epoch": 0.19, "grad_norm": 0.1367512345314026, "learning_rate": 0.00023249999999999999, "loss": 0.9924, "step": 1550 }, { "epoch": 0.19, "grad_norm": 0.14160625636577606, "learning_rate": 0.00023324999999999998, "loss": 0.9568, "step": 1555 }, { "epoch": 0.19, "grad_norm": 0.13413269817829132, "learning_rate": 0.000234, "loss": 1.0429, "step": 1560 }, { "epoch": 0.19, "grad_norm": 0.13511444628238678, "learning_rate": 0.00023474999999999996, "loss": 1.0548, "step": 1565 }, { "epoch": 0.19, "grad_norm": 0.13897758722305298, "learning_rate": 0.00023549999999999998, "loss": 1.0312, "step": 1570 }, { "epoch": 0.19, "grad_norm": 0.13271476328372955, "learning_rate": 0.00023624999999999997, "loss": 1.1496, "step": 1575 }, { "epoch": 0.19, "grad_norm": 0.12695930898189545, "learning_rate": 0.000237, "loss": 1.0083, "step": 1580 }, { "epoch": 0.19, "grad_norm": 0.1538165807723999, "learning_rate": 0.00023774999999999998, "loss": 1.1104, "step": 1585 }, { "epoch": 0.19, "grad_norm": 0.13440971076488495, "learning_rate": 0.0002385, "loss": 1.0746, "step": 1590 }, { "epoch": 0.19, "grad_norm": 0.15520578622817993, "learning_rate": 0.00023925, "loss": 1.0702, "step": 1595 }, { "epoch": 0.19, "grad_norm": 0.1417379379272461, "learning_rate": 0.00023999999999999998, "loss": 1.0313, "step": 1600 }, { "epoch": 0.19, "grad_norm": 0.1348305493593216, "learning_rate": 0.00024074999999999997, "loss": 1.0489, "step": 1605 }, { "epoch": 0.19, "grad_norm": 0.13864970207214355, "learning_rate": 0.0002415, "loss": 1.0563, "step": 1610 }, { "epoch": 0.19, "grad_norm": 0.12686601281166077, "learning_rate": 0.00024224999999999998, "loss": 1.1186, "step": 1615 }, { "epoch": 0.2, "grad_norm": 0.1524277776479721, "learning_rate": 0.000243, "loss": 1.0285, "step": 1620 }, { "epoch": 0.2, "grad_norm": 0.13417251408100128, "learning_rate": 0.00024375, "loss": 1.0816, "step": 1625 }, { "epoch": 0.2, "grad_norm": 0.13841432332992554, "learning_rate": 0.0002445, "loss": 0.9558, "step": 1630 }, { "epoch": 0.2, "grad_norm": 0.14064748585224152, "learning_rate": 0.00024524999999999997, "loss": 1.0487, "step": 1635 }, { "epoch": 0.2, "grad_norm": 0.15057429671287537, "learning_rate": 0.00024599999999999996, "loss": 1.0382, "step": 1640 }, { "epoch": 0.2, "grad_norm": 0.140291228890419, "learning_rate": 0.00024675, "loss": 1.0297, "step": 1645 }, { "epoch": 0.2, "grad_norm": 0.1320800930261612, "learning_rate": 0.00024749999999999994, "loss": 1.1047, "step": 1650 }, { "epoch": 0.2, "grad_norm": 0.13335539400577545, "learning_rate": 0.00024825, "loss": 1.0456, "step": 1655 }, { "epoch": 0.2, "grad_norm": 0.14252524077892303, "learning_rate": 0.000249, "loss": 1.1067, "step": 1660 }, { "epoch": 0.2, "grad_norm": 0.14384162425994873, "learning_rate": 0.00024974999999999997, "loss": 1.0627, "step": 1665 }, { "epoch": 0.2, "grad_norm": 0.1295383721590042, "learning_rate": 0.00025049999999999996, "loss": 0.9831, "step": 1670 }, { "epoch": 0.2, "grad_norm": 0.14580777287483215, "learning_rate": 0.00025125, "loss": 1.1085, "step": 1675 }, { "epoch": 0.2, "grad_norm": 0.1554543673992157, "learning_rate": 0.00025199999999999995, "loss": 1.0539, "step": 1680 }, { "epoch": 0.2, "grad_norm": 0.1461145579814911, "learning_rate": 0.00025275, "loss": 1.0761, "step": 1685 }, { "epoch": 0.2, "grad_norm": 0.12733665108680725, "learning_rate": 0.0002535, "loss": 1.0143, "step": 1690 }, { "epoch": 0.2, "grad_norm": 0.16144566237926483, "learning_rate": 0.00025425, "loss": 0.9641, "step": 1695 }, { "epoch": 0.2, "grad_norm": 0.14675092697143555, "learning_rate": 0.00025499999999999996, "loss": 1.029, "step": 1700 }, { "epoch": 0.21, "grad_norm": 0.15714582800865173, "learning_rate": 0.00025575, "loss": 1.0251, "step": 1705 }, { "epoch": 0.21, "grad_norm": 0.1321059912443161, "learning_rate": 0.00025649999999999995, "loss": 1.0629, "step": 1710 }, { "epoch": 0.21, "grad_norm": 0.13915494084358215, "learning_rate": 0.00025725, "loss": 1.0581, "step": 1715 }, { "epoch": 0.21, "grad_norm": 0.1294393241405487, "learning_rate": 0.000258, "loss": 0.939, "step": 1720 }, { "epoch": 0.21, "grad_norm": 0.14472445845603943, "learning_rate": 0.00025875, "loss": 1.0293, "step": 1725 }, { "epoch": 0.21, "grad_norm": 0.1563960462808609, "learning_rate": 0.00025949999999999997, "loss": 1.0984, "step": 1730 }, { "epoch": 0.21, "grad_norm": 0.13787269592285156, "learning_rate": 0.00026025, "loss": 1.0645, "step": 1735 }, { "epoch": 0.21, "grad_norm": 0.142653688788414, "learning_rate": 0.000261, "loss": 1.0271, "step": 1740 }, { "epoch": 0.21, "grad_norm": 0.16362793743610382, "learning_rate": 0.00026175, "loss": 0.9973, "step": 1745 }, { "epoch": 0.21, "grad_norm": 0.13521093130111694, "learning_rate": 0.0002625, "loss": 1.0968, "step": 1750 }, { "epoch": 0.21, "grad_norm": 0.1348579227924347, "learning_rate": 0.00026325, "loss": 1.084, "step": 1755 }, { "epoch": 0.21, "grad_norm": 0.15295952558517456, "learning_rate": 0.00026399999999999997, "loss": 1.0843, "step": 1760 }, { "epoch": 0.21, "grad_norm": 0.12325224280357361, "learning_rate": 0.00026474999999999996, "loss": 1.0448, "step": 1765 }, { "epoch": 0.21, "grad_norm": 0.13043299317359924, "learning_rate": 0.0002655, "loss": 1.0078, "step": 1770 }, { "epoch": 0.21, "grad_norm": 0.13307566940784454, "learning_rate": 0.00026624999999999994, "loss": 1.0496, "step": 1775 }, { "epoch": 0.21, "grad_norm": 0.1296854168176651, "learning_rate": 0.000267, "loss": 1.0945, "step": 1780 }, { "epoch": 0.22, "grad_norm": 0.14310450851917267, "learning_rate": 0.00026775, "loss": 0.9254, "step": 1785 }, { "epoch": 0.22, "grad_norm": 0.14738260209560394, "learning_rate": 0.00026849999999999997, "loss": 1.0589, "step": 1790 }, { "epoch": 0.22, "grad_norm": 0.1367303431034088, "learning_rate": 0.00026924999999999996, "loss": 1.1256, "step": 1795 }, { "epoch": 0.22, "grad_norm": 0.1518729031085968, "learning_rate": 0.00027, "loss": 1.1417, "step": 1800 }, { "epoch": 0.22, "grad_norm": 0.14660024642944336, "learning_rate": 0.00027074999999999994, "loss": 1.0219, "step": 1805 }, { "epoch": 0.22, "grad_norm": 0.14216068387031555, "learning_rate": 0.0002715, "loss": 1.0597, "step": 1810 }, { "epoch": 0.22, "grad_norm": 0.1290411353111267, "learning_rate": 0.00027225, "loss": 1.0887, "step": 1815 }, { "epoch": 0.22, "grad_norm": 0.14220093190670013, "learning_rate": 0.00027299999999999997, "loss": 1.0228, "step": 1820 }, { "epoch": 0.22, "grad_norm": 0.15115365386009216, "learning_rate": 0.00027374999999999996, "loss": 1.0877, "step": 1825 }, { "epoch": 0.22, "grad_norm": 0.12890294194221497, "learning_rate": 0.0002745, "loss": 1.1167, "step": 1830 }, { "epoch": 0.22, "grad_norm": 0.14485861361026764, "learning_rate": 0.00027525, "loss": 1.0937, "step": 1835 }, { "epoch": 0.22, "grad_norm": 0.1425914615392685, "learning_rate": 0.000276, "loss": 1.0565, "step": 1840 }, { "epoch": 0.22, "grad_norm": 0.16691897809505463, "learning_rate": 0.00027675, "loss": 1.0881, "step": 1845 }, { "epoch": 0.22, "grad_norm": 0.1356891691684723, "learning_rate": 0.00027749999999999997, "loss": 0.9696, "step": 1850 }, { "epoch": 0.22, "grad_norm": 0.14420901238918304, "learning_rate": 0.00027824999999999996, "loss": 0.9847, "step": 1855 }, { "epoch": 0.22, "grad_norm": 0.1378943920135498, "learning_rate": 0.000279, "loss": 0.9863, "step": 1860 }, { "epoch": 0.22, "grad_norm": 0.13317394256591797, "learning_rate": 0.00027975, "loss": 1.0724, "step": 1865 }, { "epoch": 0.23, "grad_norm": 0.14231087267398834, "learning_rate": 0.0002805, "loss": 1.0886, "step": 1870 }, { "epoch": 0.23, "grad_norm": 0.13940946757793427, "learning_rate": 0.00028125, "loss": 1.047, "step": 1875 }, { "epoch": 0.23, "grad_norm": 0.13190115988254547, "learning_rate": 0.00028199999999999997, "loss": 1.0192, "step": 1880 }, { "epoch": 0.23, "grad_norm": 0.16745632886886597, "learning_rate": 0.00028274999999999996, "loss": 0.9855, "step": 1885 }, { "epoch": 0.23, "grad_norm": 0.1373772770166397, "learning_rate": 0.00028349999999999995, "loss": 1.0242, "step": 1890 }, { "epoch": 0.23, "grad_norm": 0.13555435836315155, "learning_rate": 0.00028425, "loss": 1.075, "step": 1895 }, { "epoch": 0.23, "grad_norm": 0.145888552069664, "learning_rate": 0.000285, "loss": 1.0011, "step": 1900 }, { "epoch": 0.23, "grad_norm": 0.14030475914478302, "learning_rate": 0.00028575, "loss": 1.0065, "step": 1905 }, { "epoch": 0.23, "grad_norm": 0.13973671197891235, "learning_rate": 0.00028649999999999997, "loss": 0.9631, "step": 1910 }, { "epoch": 0.23, "grad_norm": 0.15829509496688843, "learning_rate": 0.00028724999999999996, "loss": 1.0565, "step": 1915 }, { "epoch": 0.23, "grad_norm": 0.14472809433937073, "learning_rate": 0.00028799999999999995, "loss": 0.9725, "step": 1920 }, { "epoch": 0.23, "grad_norm": 0.16352474689483643, "learning_rate": 0.00028875, "loss": 0.9782, "step": 1925 }, { "epoch": 0.23, "grad_norm": 0.14885759353637695, "learning_rate": 0.0002895, "loss": 1.0521, "step": 1930 }, { "epoch": 0.23, "grad_norm": 0.14806737005710602, "learning_rate": 0.00029025, "loss": 1.0596, "step": 1935 }, { "epoch": 0.23, "grad_norm": 0.15192674100399017, "learning_rate": 0.00029099999999999997, "loss": 1.0307, "step": 1940 }, { "epoch": 0.23, "grad_norm": 0.15374353528022766, "learning_rate": 0.00029174999999999996, "loss": 1.0676, "step": 1945 }, { "epoch": 0.23, "grad_norm": 0.1534864455461502, "learning_rate": 0.00029249999999999995, "loss": 0.9044, "step": 1950 }, { "epoch": 0.24, "grad_norm": 0.14087004959583282, "learning_rate": 0.00029325, "loss": 1.0849, "step": 1955 }, { "epoch": 0.24, "grad_norm": 0.13771598041057587, "learning_rate": 0.000294, "loss": 1.0698, "step": 1960 }, { "epoch": 0.24, "grad_norm": 0.17788104712963104, "learning_rate": 0.00029475, "loss": 1.0747, "step": 1965 }, { "epoch": 0.24, "grad_norm": 0.13944664597511292, "learning_rate": 0.00029549999999999997, "loss": 1.0289, "step": 1970 }, { "epoch": 0.24, "grad_norm": 0.14483226835727692, "learning_rate": 0.00029624999999999996, "loss": 1.0552, "step": 1975 }, { "epoch": 0.24, "grad_norm": 0.16384923458099365, "learning_rate": 0.00029699999999999996, "loss": 1.103, "step": 1980 }, { "epoch": 0.24, "grad_norm": 0.17012694478034973, "learning_rate": 0.00029775, "loss": 1.0814, "step": 1985 }, { "epoch": 0.24, "grad_norm": 0.1577819287776947, "learning_rate": 0.0002985, "loss": 0.9313, "step": 1990 }, { "epoch": 0.24, "grad_norm": 0.13799041509628296, "learning_rate": 0.00029925, "loss": 0.9898, "step": 1995 }, { "epoch": 0.24, "grad_norm": 0.15773248672485352, "learning_rate": 0.0003, "loss": 1.0522, "step": 2000 }, { "epoch": 0.24, "grad_norm": 0.1652466058731079, "learning_rate": 0.0002999999647025093, "loss": 1.1033, "step": 2005 }, { "epoch": 0.24, "grad_norm": 0.14737606048583984, "learning_rate": 0.0002999998588100539, "loss": 1.024, "step": 2010 }, { "epoch": 0.24, "grad_norm": 0.15708868205547333, "learning_rate": 0.0002999996823226836, "loss": 1.055, "step": 2015 }, { "epoch": 0.24, "grad_norm": 0.13661877810955048, "learning_rate": 0.00029999943524048147, "loss": 1.0728, "step": 2020 }, { "epoch": 0.24, "grad_norm": 0.13963155448436737, "learning_rate": 0.00029999911756356377, "loss": 1.0962, "step": 2025 }, { "epoch": 0.24, "grad_norm": 0.14523880183696747, "learning_rate": 0.00029999872929208, "loss": 0.9992, "step": 2030 }, { "epoch": 0.25, "grad_norm": 0.1487034410238266, "learning_rate": 0.000299998270426213, "loss": 0.9902, "step": 2035 }, { "epoch": 0.25, "grad_norm": 0.14978134632110596, "learning_rate": 0.0002999977409661786, "loss": 1.0294, "step": 2040 }, { "epoch": 0.25, "grad_norm": 0.15391825139522552, "learning_rate": 0.00029999714091222604, "loss": 0.9604, "step": 2045 }, { "epoch": 0.25, "grad_norm": 0.15368473529815674, "learning_rate": 0.0002999964702646377, "loss": 0.9744, "step": 2050 }, { "epoch": 0.25, "grad_norm": 0.15019111335277557, "learning_rate": 0.00029999572902372925, "loss": 0.9179, "step": 2055 }, { "epoch": 0.25, "grad_norm": 0.151919886469841, "learning_rate": 0.00029999491718984945, "loss": 1.0588, "step": 2060 }, { "epoch": 0.25, "grad_norm": 0.14386044442653656, "learning_rate": 0.00029999403476338053, "loss": 0.9972, "step": 2065 }, { "epoch": 0.25, "grad_norm": 0.1599544882774353, "learning_rate": 0.0002999930817447377, "loss": 1.01, "step": 2070 }, { "epoch": 0.25, "grad_norm": 0.14225023984909058, "learning_rate": 0.00029999205813436945, "loss": 1.1015, "step": 2075 }, { "epoch": 0.25, "grad_norm": 0.1291441023349762, "learning_rate": 0.00029999096393275754, "loss": 1.0002, "step": 2080 }, { "epoch": 0.25, "grad_norm": 0.140071839094162, "learning_rate": 0.000299989799140417, "loss": 1.1162, "step": 2085 }, { "epoch": 0.25, "grad_norm": 0.1789652705192566, "learning_rate": 0.00029998856375789594, "loss": 0.9408, "step": 2090 }, { "epoch": 0.25, "grad_norm": 0.13465484976768494, "learning_rate": 0.00029998725778577584, "loss": 1.0336, "step": 2095 }, { "epoch": 0.25, "grad_norm": 0.13943049311637878, "learning_rate": 0.0002999858812246713, "loss": 1.0623, "step": 2100 }, { "epoch": 0.25, "grad_norm": 0.15316350758075714, "learning_rate": 0.0002999844340752302, "loss": 1.0793, "step": 2105 }, { "epoch": 0.25, "grad_norm": 0.15929552912712097, "learning_rate": 0.00029998291633813353, "loss": 1.0003, "step": 2110 }, { "epoch": 0.25, "grad_norm": 0.14730267226696014, "learning_rate": 0.00029998132801409565, "loss": 1.0309, "step": 2115 }, { "epoch": 0.26, "grad_norm": 0.1534448117017746, "learning_rate": 0.00029997966910386413, "loss": 1.0758, "step": 2120 }, { "epoch": 0.26, "grad_norm": 0.14620815217494965, "learning_rate": 0.00029997793960821967, "loss": 1.1534, "step": 2125 }, { "epoch": 0.26, "grad_norm": 0.16130419075489044, "learning_rate": 0.00029997613952797617, "loss": 0.9605, "step": 2130 }, { "epoch": 0.26, "grad_norm": 0.13983474671840668, "learning_rate": 0.00029997426886398094, "loss": 1.0272, "step": 2135 }, { "epoch": 0.26, "grad_norm": 0.13644209504127502, "learning_rate": 0.00029997232761711423, "loss": 1.0924, "step": 2140 }, { "epoch": 0.26, "grad_norm": 0.15886028110980988, "learning_rate": 0.0002999703157882897, "loss": 1.1073, "step": 2145 }, { "epoch": 0.26, "grad_norm": 0.13936792314052582, "learning_rate": 0.0002999682333784542, "loss": 1.0108, "step": 2150 }, { "epoch": 0.26, "grad_norm": 0.1483025848865509, "learning_rate": 0.0002999660803885878, "loss": 1.0614, "step": 2155 }, { "epoch": 0.26, "grad_norm": 0.16978415846824646, "learning_rate": 0.00029996385681970377, "loss": 1.0051, "step": 2160 }, { "epoch": 0.26, "grad_norm": 0.1511315107345581, "learning_rate": 0.0002999615626728486, "loss": 0.9862, "step": 2165 }, { "epoch": 0.26, "grad_norm": 0.16898401081562042, "learning_rate": 0.00029995919794910186, "loss": 1.0046, "step": 2170 }, { "epoch": 0.26, "grad_norm": 0.1466691642999649, "learning_rate": 0.00029995676264957667, "loss": 1.0005, "step": 2175 }, { "epoch": 0.26, "grad_norm": 0.16119731962680817, "learning_rate": 0.000299954256775419, "loss": 0.9917, "step": 2180 }, { "epoch": 0.26, "grad_norm": 0.17459401488304138, "learning_rate": 0.00029995168032780826, "loss": 1.0336, "step": 2185 }, { "epoch": 0.26, "grad_norm": 0.14910408854484558, "learning_rate": 0.0002999490333079571, "loss": 0.9864, "step": 2190 }, { "epoch": 0.26, "grad_norm": 0.16221760213375092, "learning_rate": 0.00029994631571711114, "loss": 1.0274, "step": 2195 }, { "epoch": 0.27, "grad_norm": 0.1858038604259491, "learning_rate": 0.0002999435275565495, "loss": 1.0693, "step": 2200 }, { "epoch": 0.27, "grad_norm": 0.1565389484167099, "learning_rate": 0.00029994066882758425, "loss": 0.958, "step": 2205 }, { "epoch": 0.27, "grad_norm": 0.1515437662601471, "learning_rate": 0.00029993773953156095, "loss": 1.1011, "step": 2210 }, { "epoch": 0.27, "grad_norm": 0.16269664466381073, "learning_rate": 0.0002999347396698581, "loss": 1.0161, "step": 2215 }, { "epoch": 0.27, "grad_norm": 0.18648435175418854, "learning_rate": 0.00029993166924388755, "loss": 0.9996, "step": 2220 }, { "epoch": 0.27, "grad_norm": 0.15165026485919952, "learning_rate": 0.00029992852825509443, "loss": 1.0186, "step": 2225 }, { "epoch": 0.27, "grad_norm": 0.15864279866218567, "learning_rate": 0.00029992531670495695, "loss": 1.1264, "step": 2230 }, { "epoch": 0.27, "grad_norm": 0.13871487975120544, "learning_rate": 0.00029992203459498654, "loss": 1.0645, "step": 2235 }, { "epoch": 0.27, "grad_norm": 0.15242180228233337, "learning_rate": 0.0002999186819267279, "loss": 1.004, "step": 2240 }, { "epoch": 0.27, "grad_norm": 0.16039399802684784, "learning_rate": 0.0002999152587017589, "loss": 1.0212, "step": 2245 }, { "epoch": 0.27, "grad_norm": 0.1544654369354248, "learning_rate": 0.0002999117649216906, "loss": 1.0053, "step": 2250 }, { "epoch": 0.27, "grad_norm": 0.15874332189559937, "learning_rate": 0.0002999082005881673, "loss": 0.991, "step": 2255 }, { "epoch": 0.27, "grad_norm": 0.1567724049091339, "learning_rate": 0.0002999045657028666, "loss": 0.9868, "step": 2260 }, { "epoch": 0.27, "grad_norm": 0.168124258518219, "learning_rate": 0.000299900860267499, "loss": 0.9966, "step": 2265 }, { "epoch": 0.27, "grad_norm": 0.14472615718841553, "learning_rate": 0.0002998970842838086, "loss": 1.0268, "step": 2270 }, { "epoch": 0.27, "grad_norm": 0.1609342396259308, "learning_rate": 0.0002998932377535723, "loss": 1.1297, "step": 2275 }, { "epoch": 0.27, "grad_norm": 0.14865976572036743, "learning_rate": 0.0002998893206786006, "loss": 1.0403, "step": 2280 }, { "epoch": 0.28, "grad_norm": 0.162654310464859, "learning_rate": 0.0002998853330607369, "loss": 1.0732, "step": 2285 }, { "epoch": 0.28, "grad_norm": 0.15120600163936615, "learning_rate": 0.0002998812749018579, "loss": 1.0225, "step": 2290 }, { "epoch": 0.28, "grad_norm": 0.14963847398757935, "learning_rate": 0.0002998771462038735, "loss": 0.9712, "step": 2295 }, { "epoch": 0.28, "grad_norm": 0.14909087121486664, "learning_rate": 0.00029987294696872687, "loss": 1.0717, "step": 2300 }, { "epoch": 0.28, "grad_norm": 0.15834349393844604, "learning_rate": 0.00029986867719839427, "loss": 1.1024, "step": 2305 }, { "epoch": 0.28, "grad_norm": 0.14275187253952026, "learning_rate": 0.00029986433689488515, "loss": 1.0141, "step": 2310 }, { "epoch": 0.28, "grad_norm": 0.148543119430542, "learning_rate": 0.0002998599260602423, "loss": 1.0205, "step": 2315 }, { "epoch": 0.28, "grad_norm": 0.14403221011161804, "learning_rate": 0.00029985544469654155, "loss": 1.0232, "step": 2320 }, { "epoch": 0.28, "grad_norm": 0.1535564363002777, "learning_rate": 0.000299850892805892, "loss": 0.9793, "step": 2325 }, { "epoch": 0.28, "grad_norm": 0.143857941031456, "learning_rate": 0.00029984627039043583, "loss": 0.9961, "step": 2330 }, { "epoch": 0.28, "grad_norm": 0.17255234718322754, "learning_rate": 0.0002998415774523486, "loss": 0.9796, "step": 2335 }, { "epoch": 0.28, "grad_norm": 0.15468095242977142, "learning_rate": 0.00029983681399383896, "loss": 1.0223, "step": 2340 }, { "epoch": 0.28, "grad_norm": 0.14770975708961487, "learning_rate": 0.00029983198001714873, "loss": 1.0694, "step": 2345 }, { "epoch": 0.28, "grad_norm": 0.14611324667930603, "learning_rate": 0.00029982707552455293, "loss": 1.0747, "step": 2350 }, { "epoch": 0.28, "grad_norm": 0.19976738095283508, "learning_rate": 0.0002998221005183598, "loss": 1.0519, "step": 2355 }, { "epoch": 0.28, "grad_norm": 0.1997351050376892, "learning_rate": 0.0002998170550009107, "loss": 1.1093, "step": 2360 }, { "epoch": 0.28, "grad_norm": 0.15991829335689545, "learning_rate": 0.0002998119389745802, "loss": 1.029, "step": 2365 }, { "epoch": 0.29, "grad_norm": 0.1683609038591385, "learning_rate": 0.0002998067524417762, "loss": 0.8991, "step": 2370 }, { "epoch": 0.29, "grad_norm": 0.1613713800907135, "learning_rate": 0.00029980149540493955, "loss": 1.0849, "step": 2375 }, { "epoch": 0.29, "grad_norm": 0.15293876826763153, "learning_rate": 0.0002997961678665444, "loss": 1.0498, "step": 2380 }, { "epoch": 0.29, "grad_norm": 0.15544572472572327, "learning_rate": 0.0002997907698290981, "loss": 0.9695, "step": 2385 }, { "epoch": 0.29, "grad_norm": 0.16265787184238434, "learning_rate": 0.0002997853012951411, "loss": 0.9936, "step": 2390 }, { "epoch": 0.29, "grad_norm": 0.144724041223526, "learning_rate": 0.00029977976226724706, "loss": 1.0893, "step": 2395 }, { "epoch": 0.29, "grad_norm": 0.1512562781572342, "learning_rate": 0.00029977415274802294, "loss": 1.0602, "step": 2400 }, { "epoch": 0.29, "grad_norm": 0.135329931974411, "learning_rate": 0.0002997684727401086, "loss": 1.0371, "step": 2405 }, { "epoch": 0.29, "grad_norm": 0.15818721055984497, "learning_rate": 0.00029976272224617744, "loss": 1.0003, "step": 2410 }, { "epoch": 0.29, "grad_norm": 0.16431140899658203, "learning_rate": 0.00029975690126893566, "loss": 1.017, "step": 2415 }, { "epoch": 0.29, "grad_norm": 0.1531086415052414, "learning_rate": 0.00029975100981112284, "loss": 1.0058, "step": 2420 }, { "epoch": 0.29, "grad_norm": 0.15581297874450684, "learning_rate": 0.0002997450478755118, "loss": 0.9911, "step": 2425 }, { "epoch": 0.29, "grad_norm": 0.14527705311775208, "learning_rate": 0.0002997390154649083, "loss": 1.0665, "step": 2430 }, { "epoch": 0.29, "grad_norm": 0.1660381704568863, "learning_rate": 0.0002997329125821515, "loss": 0.9779, "step": 2435 }, { "epoch": 0.29, "grad_norm": 0.18006062507629395, "learning_rate": 0.0002997267392301135, "loss": 1.0496, "step": 2440 }, { "epoch": 0.29, "grad_norm": 0.16907352209091187, "learning_rate": 0.00029972049541169974, "loss": 0.9899, "step": 2445 }, { "epoch": 0.3, "grad_norm": 0.15815675258636475, "learning_rate": 0.00029971418112984883, "loss": 1.061, "step": 2450 }, { "epoch": 0.3, "grad_norm": 0.15669722855091095, "learning_rate": 0.0002997077963875324, "loss": 1.0424, "step": 2455 }, { "epoch": 0.3, "grad_norm": 0.14894799888134003, "learning_rate": 0.00029970134118775533, "loss": 0.9602, "step": 2460 }, { "epoch": 0.3, "grad_norm": 0.16584117710590363, "learning_rate": 0.00029969481553355565, "loss": 0.9836, "step": 2465 }, { "epoch": 0.3, "grad_norm": 0.15635208785533905, "learning_rate": 0.0002996882194280046, "loss": 0.9357, "step": 2470 }, { "epoch": 0.3, "grad_norm": 0.16133363544940948, "learning_rate": 0.0002996815528742065, "loss": 0.9737, "step": 2475 }, { "epoch": 0.3, "grad_norm": 0.1643681675195694, "learning_rate": 0.00029967481587529884, "loss": 1.0039, "step": 2480 }, { "epoch": 0.3, "grad_norm": 0.16857954859733582, "learning_rate": 0.0002996680084344523, "loss": 1.0575, "step": 2485 }, { "epoch": 0.3, "grad_norm": 0.16123433411121368, "learning_rate": 0.0002996611305548707, "loss": 0.9418, "step": 2490 }, { "epoch": 0.3, "grad_norm": 0.1680585891008377, "learning_rate": 0.0002996541822397909, "loss": 0.9875, "step": 2495 }, { "epoch": 0.3, "grad_norm": 0.1484622061252594, "learning_rate": 0.00029964716349248306, "loss": 0.824, "step": 2500 }, { "epoch": 0.3, "grad_norm": 0.14574463665485382, "learning_rate": 0.0002996400743162505, "loss": 1.0491, "step": 2505 }, { "epoch": 0.3, "grad_norm": 0.15741370618343353, "learning_rate": 0.0002996329147144296, "loss": 0.9946, "step": 2510 }, { "epoch": 0.3, "grad_norm": 0.15442806482315063, "learning_rate": 0.0002996256846903898, "loss": 0.9688, "step": 2515 }, { "epoch": 0.3, "grad_norm": 0.15500134229660034, "learning_rate": 0.00029961838424753394, "loss": 1.0, "step": 2520 }, { "epoch": 0.3, "grad_norm": 0.13550515472888947, "learning_rate": 0.0002996110133892978, "loss": 0.9205, "step": 2525 }, { "epoch": 0.3, "grad_norm": 0.14668314158916473, "learning_rate": 0.00029960357211915024, "loss": 0.9877, "step": 2530 }, { "epoch": 0.31, "grad_norm": 0.1662401258945465, "learning_rate": 0.0002995960604405935, "loss": 1.0079, "step": 2535 }, { "epoch": 0.31, "grad_norm": 0.14927972853183746, "learning_rate": 0.00029958847835716285, "loss": 0.8561, "step": 2540 }, { "epoch": 0.31, "grad_norm": 0.14817696809768677, "learning_rate": 0.0002995808258724265, "loss": 0.9245, "step": 2545 }, { "epoch": 0.31, "grad_norm": 0.1539267748594284, "learning_rate": 0.00029957310298998614, "loss": 1.0536, "step": 2550 }, { "epoch": 0.31, "grad_norm": 0.16719764471054077, "learning_rate": 0.00029956530971347634, "loss": 0.9806, "step": 2555 }, { "epoch": 0.31, "grad_norm": 0.1492961347103119, "learning_rate": 0.0002995574460465648, "loss": 0.9849, "step": 2560 }, { "epoch": 0.31, "grad_norm": 0.16492590308189392, "learning_rate": 0.00029954951199295257, "loss": 1.0159, "step": 2565 }, { "epoch": 0.31, "grad_norm": 0.16332361102104187, "learning_rate": 0.0002995415075563736, "loss": 1.1316, "step": 2570 }, { "epoch": 0.31, "grad_norm": 0.15671640634536743, "learning_rate": 0.0002995334327405951, "loss": 0.9887, "step": 2575 }, { "epoch": 0.31, "grad_norm": 0.1556137055158615, "learning_rate": 0.00029952528754941725, "loss": 1.0926, "step": 2580 }, { "epoch": 0.31, "grad_norm": 0.1703471690416336, "learning_rate": 0.00029951707198667347, "loss": 0.9838, "step": 2585 }, { "epoch": 0.31, "grad_norm": 0.15181928873062134, "learning_rate": 0.0002995087860562304, "loss": 0.9467, "step": 2590 }, { "epoch": 0.31, "grad_norm": 0.16134661436080933, "learning_rate": 0.0002995004297619875, "loss": 1.0854, "step": 2595 }, { "epoch": 0.31, "grad_norm": 0.17149412631988525, "learning_rate": 0.0002994920031078776, "loss": 1.0174, "step": 2600 }, { "epoch": 0.31, "grad_norm": 0.15041255950927734, "learning_rate": 0.0002994835060978666, "loss": 1.0738, "step": 2605 }, { "epoch": 0.31, "grad_norm": 0.1523531824350357, "learning_rate": 0.0002994749387359534, "loss": 1.0006, "step": 2610 }, { "epoch": 0.32, "grad_norm": 0.15003371238708496, "learning_rate": 0.0002994663010261701, "loss": 1.0203, "step": 2615 }, { "epoch": 0.32, "grad_norm": 0.1510554850101471, "learning_rate": 0.000299457592972582, "loss": 0.9488, "step": 2620 }, { "epoch": 0.32, "grad_norm": 0.1823001652956009, "learning_rate": 0.0002994488145792872, "loss": 0.9754, "step": 2625 }, { "epoch": 0.32, "grad_norm": 0.15630380809307098, "learning_rate": 0.00029943996585041736, "loss": 1.1082, "step": 2630 }, { "epoch": 0.32, "grad_norm": 0.1587289720773697, "learning_rate": 0.0002994310467901367, "loss": 1.0511, "step": 2635 }, { "epoch": 0.32, "grad_norm": 0.1543097347021103, "learning_rate": 0.00029942205740264306, "loss": 1.0272, "step": 2640 }, { "epoch": 0.32, "grad_norm": 0.16899822652339935, "learning_rate": 0.00029941299769216704, "loss": 1.0257, "step": 2645 }, { "epoch": 0.32, "grad_norm": 0.14778625965118408, "learning_rate": 0.00029940386766297246, "loss": 0.965, "step": 2650 }, { "epoch": 0.32, "grad_norm": 0.15615618228912354, "learning_rate": 0.00029939466731935616, "loss": 1.0942, "step": 2655 }, { "epoch": 0.32, "grad_norm": 0.15470904111862183, "learning_rate": 0.0002993853966656482, "loss": 0.9682, "step": 2660 }, { "epoch": 0.32, "grad_norm": 0.16802319884300232, "learning_rate": 0.0002993760557062117, "loss": 1.1369, "step": 2665 }, { "epoch": 0.32, "grad_norm": 0.15009041130542755, "learning_rate": 0.0002993666444454426, "loss": 0.9354, "step": 2670 }, { "epoch": 0.32, "grad_norm": 0.1728777289390564, "learning_rate": 0.0002993571628877704, "loss": 1.0449, "step": 2675 }, { "epoch": 0.32, "grad_norm": 0.1533242017030716, "learning_rate": 0.0002993476110376574, "loss": 0.9436, "step": 2680 }, { "epoch": 0.32, "grad_norm": 0.15997876226902008, "learning_rate": 0.0002993379888995989, "loss": 1.0483, "step": 2685 }, { "epoch": 0.32, "grad_norm": 0.16369123756885529, "learning_rate": 0.00029932829647812354, "loss": 0.9826, "step": 2690 }, { "epoch": 0.32, "grad_norm": 0.1492471545934677, "learning_rate": 0.0002993185337777927, "loss": 1.0283, "step": 2695 }, { "epoch": 0.33, "grad_norm": 0.16298407316207886, "learning_rate": 0.00029930870080320125, "loss": 0.9464, "step": 2700 }, { "epoch": 0.33, "grad_norm": 0.20998337864875793, "learning_rate": 0.00029929879755897674, "loss": 1.0735, "step": 2705 }, { "epoch": 0.33, "grad_norm": 0.15956445038318634, "learning_rate": 0.0002992888240497801, "loss": 1.037, "step": 2710 }, { "epoch": 0.33, "grad_norm": 0.15296678245067596, "learning_rate": 0.0002992787802803051, "loss": 1.0028, "step": 2715 }, { "epoch": 0.33, "grad_norm": 0.1596369743347168, "learning_rate": 0.0002992686662552787, "loss": 1.0732, "step": 2720 }, { "epoch": 0.33, "grad_norm": 0.16030800342559814, "learning_rate": 0.0002992584819794609, "loss": 1.0009, "step": 2725 }, { "epoch": 0.33, "grad_norm": 0.16185402870178223, "learning_rate": 0.00029924822745764485, "loss": 0.9573, "step": 2730 }, { "epoch": 0.33, "grad_norm": 0.15265031158924103, "learning_rate": 0.0002992379026946565, "loss": 1.0878, "step": 2735 }, { "epoch": 0.33, "grad_norm": 0.17246966063976288, "learning_rate": 0.00029922750769535505, "loss": 0.9797, "step": 2740 }, { "epoch": 0.33, "grad_norm": 0.16208067536354065, "learning_rate": 0.00029921704246463284, "loss": 1.0398, "step": 2745 }, { "epoch": 0.33, "grad_norm": 0.15843920409679413, "learning_rate": 0.0002992065070074151, "loss": 0.9905, "step": 2750 }, { "epoch": 0.33, "grad_norm": 0.15435267984867096, "learning_rate": 0.0002991959013286602, "loss": 1.0302, "step": 2755 }, { "epoch": 0.33, "grad_norm": 0.14609667658805847, "learning_rate": 0.00029918522543335947, "loss": 0.9949, "step": 2760 }, { "epoch": 0.33, "grad_norm": 0.1628682017326355, "learning_rate": 0.00029917447932653737, "loss": 0.9568, "step": 2765 }, { "epoch": 0.33, "grad_norm": 0.1572001874446869, "learning_rate": 0.0002991636630132513, "loss": 1.0415, "step": 2770 }, { "epoch": 0.33, "grad_norm": 0.1506408005952835, "learning_rate": 0.0002991527764985919, "loss": 0.9368, "step": 2775 }, { "epoch": 0.33, "grad_norm": 0.1670454889535904, "learning_rate": 0.00029914181978768267, "loss": 1.0478, "step": 2780 }, { "epoch": 0.34, "grad_norm": 0.16997024416923523, "learning_rate": 0.0002991307928856802, "loss": 1.0219, "step": 2785 }, { "epoch": 0.34, "grad_norm": 0.15132923424243927, "learning_rate": 0.00029911969579777414, "loss": 1.0253, "step": 2790 }, { "epoch": 0.34, "grad_norm": 0.1776067316532135, "learning_rate": 0.00029910852852918713, "loss": 1.0261, "step": 2795 }, { "epoch": 0.34, "grad_norm": 0.16810853779315948, "learning_rate": 0.0002990972910851748, "loss": 1.1051, "step": 2800 }, { "epoch": 0.34, "grad_norm": 0.16523437201976776, "learning_rate": 0.0002990859834710259, "loss": 1.0066, "step": 2805 }, { "epoch": 0.34, "grad_norm": 0.15905992686748505, "learning_rate": 0.0002990746056920623, "loss": 1.0197, "step": 2810 }, { "epoch": 0.34, "grad_norm": 0.16057956218719482, "learning_rate": 0.00029906315775363857, "loss": 0.9492, "step": 2815 }, { "epoch": 0.34, "grad_norm": 0.1586558222770691, "learning_rate": 0.0002990516396611425, "loss": 1.0266, "step": 2820 }, { "epoch": 0.34, "grad_norm": 0.17754611372947693, "learning_rate": 0.0002990400514199951, "loss": 0.9497, "step": 2825 }, { "epoch": 0.34, "grad_norm": 0.1540936827659607, "learning_rate": 0.00029902839303564994, "loss": 0.9285, "step": 2830 }, { "epoch": 0.34, "grad_norm": 0.1709967404603958, "learning_rate": 0.00029901666451359393, "loss": 0.9819, "step": 2835 }, { "epoch": 0.34, "grad_norm": 0.16606543958187103, "learning_rate": 0.0002990048658593469, "loss": 0.999, "step": 2840 }, { "epoch": 0.34, "grad_norm": 0.16725273430347443, "learning_rate": 0.0002989929970784618, "loss": 1.0302, "step": 2845 }, { "epoch": 0.34, "grad_norm": 0.16970454156398773, "learning_rate": 0.0002989810581765243, "loss": 1.0709, "step": 2850 }, { "epoch": 0.34, "grad_norm": 0.18702805042266846, "learning_rate": 0.0002989690491591533, "loss": 0.9305, "step": 2855 }, { "epoch": 0.34, "grad_norm": 0.16489920020103455, "learning_rate": 0.0002989569700320007, "loss": 0.9604, "step": 2860 }, { "epoch": 0.35, "grad_norm": 0.21395571529865265, "learning_rate": 0.0002989448208007513, "loss": 0.9137, "step": 2865 }, { "epoch": 0.35, "grad_norm": 0.157196506857872, "learning_rate": 0.00029893260147112287, "loss": 1.0027, "step": 2870 }, { "epoch": 0.35, "grad_norm": 0.16530407965183258, "learning_rate": 0.0002989203120488663, "loss": 1.0449, "step": 2875 }, { "epoch": 0.35, "grad_norm": 0.15222971141338348, "learning_rate": 0.0002989079525397654, "loss": 0.985, "step": 2880 }, { "epoch": 0.35, "grad_norm": 0.1723487377166748, "learning_rate": 0.00029889552294963697, "loss": 0.9842, "step": 2885 }, { "epoch": 0.35, "grad_norm": 0.15952201187610626, "learning_rate": 0.0002988830232843308, "loss": 1.0918, "step": 2890 }, { "epoch": 0.35, "grad_norm": 0.1670973002910614, "learning_rate": 0.00029887045354972953, "loss": 1.0285, "step": 2895 }, { "epoch": 0.35, "grad_norm": 0.16919025778770447, "learning_rate": 0.00029885781375174906, "loss": 0.9887, "step": 2900 }, { "epoch": 0.35, "grad_norm": 0.15993523597717285, "learning_rate": 0.000298845103896338, "loss": 1.0421, "step": 2905 }, { "epoch": 0.35, "grad_norm": 0.15911990404129028, "learning_rate": 0.00029883232398947806, "loss": 1.0282, "step": 2910 }, { "epoch": 0.35, "grad_norm": 0.15578506886959076, "learning_rate": 0.0002988194740371839, "loss": 1.0445, "step": 2915 }, { "epoch": 0.35, "grad_norm": 0.1739857941865921, "learning_rate": 0.0002988065540455031, "loss": 1.1877, "step": 2920 }, { "epoch": 0.35, "grad_norm": 0.15756095945835114, "learning_rate": 0.0002987935640205162, "loss": 0.997, "step": 2925 }, { "epoch": 0.35, "grad_norm": 0.1599845141172409, "learning_rate": 0.00029878050396833685, "loss": 0.998, "step": 2930 }, { "epoch": 0.35, "grad_norm": 0.15747900307178497, "learning_rate": 0.0002987673738951115, "loss": 0.9975, "step": 2935 }, { "epoch": 0.35, "grad_norm": 0.17130185663700104, "learning_rate": 0.00029875417380701954, "loss": 1.0535, "step": 2940 }, { "epoch": 0.35, "grad_norm": 0.17812122404575348, "learning_rate": 0.0002987409037102734, "loss": 0.9872, "step": 2945 }, { "epoch": 0.36, "grad_norm": 0.16236154735088348, "learning_rate": 0.0002987275636111185, "loss": 1.0886, "step": 2950 }, { "epoch": 0.36, "grad_norm": 0.16785535216331482, "learning_rate": 0.0002987141535158331, "loss": 0.9917, "step": 2955 }, { "epoch": 0.36, "grad_norm": 0.1586502194404602, "learning_rate": 0.0002987006734307283, "loss": 1.0217, "step": 2960 }, { "epoch": 0.36, "grad_norm": 0.15011291205883026, "learning_rate": 0.0002986871233621484, "loss": 1.0619, "step": 2965 }, { "epoch": 0.36, "grad_norm": 0.14755620062351227, "learning_rate": 0.0002986735033164706, "loss": 1.0555, "step": 2970 }, { "epoch": 0.36, "grad_norm": 0.1600685715675354, "learning_rate": 0.0002986598133001048, "loss": 0.9612, "step": 2975 }, { "epoch": 0.36, "grad_norm": 0.15799719095230103, "learning_rate": 0.00029864605331949396, "loss": 0.9002, "step": 2980 }, { "epoch": 0.36, "grad_norm": 0.16441842913627625, "learning_rate": 0.0002986322233811141, "loss": 0.9336, "step": 2985 }, { "epoch": 0.36, "grad_norm": 0.16389460861682892, "learning_rate": 0.000298618323491474, "loss": 0.8653, "step": 2990 }, { "epoch": 0.36, "grad_norm": 0.16231980919837952, "learning_rate": 0.00029860435365711537, "loss": 1.0476, "step": 2995 }, { "epoch": 0.36, "grad_norm": 0.15994654595851898, "learning_rate": 0.00029859031388461296, "loss": 0.9567, "step": 3000 }, { "epoch": 0.36, "grad_norm": 0.16480427980422974, "learning_rate": 0.00029857620418057424, "loss": 0.9938, "step": 3005 }, { "epoch": 0.36, "grad_norm": 0.16214433312416077, "learning_rate": 0.0002985620245516398, "loss": 0.9345, "step": 3010 }, { "epoch": 0.36, "grad_norm": 0.1897832155227661, "learning_rate": 0.00029854777500448303, "loss": 0.9927, "step": 3015 }, { "epoch": 0.36, "grad_norm": 0.1605633646249771, "learning_rate": 0.00029853345554581024, "loss": 0.9714, "step": 3020 }, { "epoch": 0.36, "grad_norm": 0.17080001533031464, "learning_rate": 0.0002985190661823606, "loss": 1.0147, "step": 3025 }, { "epoch": 0.37, "grad_norm": 0.16237355768680573, "learning_rate": 0.0002985046069209062, "loss": 1.0514, "step": 3030 }, { "epoch": 0.37, "grad_norm": 0.17489008605480194, "learning_rate": 0.0002984900777682522, "loss": 0.9702, "step": 3035 }, { "epoch": 0.37, "grad_norm": 0.1539561152458191, "learning_rate": 0.00029847547873123627, "loss": 0.9965, "step": 3040 }, { "epoch": 0.37, "grad_norm": 0.15789541602134705, "learning_rate": 0.0002984608098167295, "loss": 1.0079, "step": 3045 }, { "epoch": 0.37, "grad_norm": 0.17130644619464874, "learning_rate": 0.0002984460710316353, "loss": 1.006, "step": 3050 }, { "epoch": 0.37, "grad_norm": 0.19519822299480438, "learning_rate": 0.0002984312623828903, "loss": 0.9245, "step": 3055 }, { "epoch": 0.37, "grad_norm": 0.16348280012607574, "learning_rate": 0.000298416383877464, "loss": 1.042, "step": 3060 }, { "epoch": 0.37, "grad_norm": 0.14846982061862946, "learning_rate": 0.0002984014355223587, "loss": 1.0291, "step": 3065 }, { "epoch": 0.37, "grad_norm": 0.18363744020462036, "learning_rate": 0.0002983864173246096, "loss": 1.0021, "step": 3070 }, { "epoch": 0.37, "grad_norm": 0.15816280245780945, "learning_rate": 0.00029837132929128474, "loss": 0.9942, "step": 3075 }, { "epoch": 0.37, "grad_norm": 0.1437516212463379, "learning_rate": 0.00029835617142948503, "loss": 1.0911, "step": 3080 }, { "epoch": 0.37, "grad_norm": 0.16180740296840668, "learning_rate": 0.0002983409437463443, "loss": 1.0143, "step": 3085 }, { "epoch": 0.37, "grad_norm": 0.1789834052324295, "learning_rate": 0.0002983256462490292, "loss": 1.0422, "step": 3090 }, { "epoch": 0.37, "grad_norm": 0.17736496031284332, "learning_rate": 0.00029831027894473925, "loss": 1.028, "step": 3095 }, { "epoch": 0.37, "grad_norm": 0.16583746671676636, "learning_rate": 0.00029829484184070674, "loss": 1.0083, "step": 3100 }, { "epoch": 0.37, "grad_norm": 0.1849256455898285, "learning_rate": 0.000298279334944197, "loss": 1.0822, "step": 3105 }, { "epoch": 0.37, "grad_norm": 0.16508488357067108, "learning_rate": 0.000298263758262508, "loss": 0.9659, "step": 3110 }, { "epoch": 0.38, "grad_norm": 0.1658620983362198, "learning_rate": 0.0002982481118029707, "loss": 1.0032, "step": 3115 }, { "epoch": 0.38, "grad_norm": 0.16383464634418488, "learning_rate": 0.0002982323955729488, "loss": 1.012, "step": 3120 }, { "epoch": 0.38, "grad_norm": 0.16278916597366333, "learning_rate": 0.0002982166095798389, "loss": 1.0042, "step": 3125 }, { "epoch": 0.38, "grad_norm": 0.16901999711990356, "learning_rate": 0.0002982007538310704, "loss": 0.9857, "step": 3130 }, { "epoch": 0.38, "grad_norm": 0.15740418434143066, "learning_rate": 0.0002981848283341056, "loss": 0.9621, "step": 3135 }, { "epoch": 0.38, "grad_norm": 0.1794396936893463, "learning_rate": 0.00029816883309643946, "loss": 0.9642, "step": 3140 }, { "epoch": 0.38, "grad_norm": 0.16382227838039398, "learning_rate": 0.0002981527681255999, "loss": 1.0157, "step": 3145 }, { "epoch": 0.38, "grad_norm": 0.17343173921108246, "learning_rate": 0.00029813663342914774, "loss": 1.0122, "step": 3150 }, { "epoch": 0.38, "grad_norm": 0.16225513815879822, "learning_rate": 0.0002981204290146764, "loss": 0.9324, "step": 3155 }, { "epoch": 0.38, "grad_norm": 0.1837487518787384, "learning_rate": 0.00029810415488981223, "loss": 0.9521, "step": 3160 }, { "epoch": 0.38, "grad_norm": 0.16996806859970093, "learning_rate": 0.0002980878110622144, "loss": 0.9372, "step": 3165 }, { "epoch": 0.38, "grad_norm": 0.1729620099067688, "learning_rate": 0.0002980713975395748, "loss": 1.0272, "step": 3170 }, { "epoch": 0.38, "grad_norm": 0.15893127024173737, "learning_rate": 0.0002980549143296182, "loss": 1.0605, "step": 3175 }, { "epoch": 0.38, "grad_norm": 0.19714705646038055, "learning_rate": 0.0002980383614401023, "loss": 1.0335, "step": 3180 }, { "epoch": 0.38, "grad_norm": 0.17942222952842712, "learning_rate": 0.0002980217388788172, "loss": 1.0134, "step": 3185 }, { "epoch": 0.38, "grad_norm": 0.1684209406375885, "learning_rate": 0.0002980050466535861, "loss": 1.0344, "step": 3190 }, { "epoch": 0.38, "grad_norm": 0.16573862731456757, "learning_rate": 0.000297988284772265, "loss": 1.0624, "step": 3195 }, { "epoch": 0.39, "grad_norm": 0.16821695864200592, "learning_rate": 0.00029797145324274256, "loss": 0.9756, "step": 3200 }, { "epoch": 0.39, "grad_norm": 0.15828034281730652, "learning_rate": 0.0002979545520729402, "loss": 0.9553, "step": 3205 }, { "epoch": 0.39, "grad_norm": 0.17686916887760162, "learning_rate": 0.00029793758127081226, "loss": 0.9576, "step": 3210 }, { "epoch": 0.39, "grad_norm": 0.16397157311439514, "learning_rate": 0.00029792054084434573, "loss": 0.9603, "step": 3215 }, { "epoch": 0.39, "grad_norm": 0.16657304763793945, "learning_rate": 0.0002979034308015603, "loss": 1.0867, "step": 3220 }, { "epoch": 0.39, "grad_norm": 0.16404330730438232, "learning_rate": 0.00029788625115050873, "loss": 1.0548, "step": 3225 }, { "epoch": 0.39, "grad_norm": 0.16127341985702515, "learning_rate": 0.0002978690018992761, "loss": 1.0754, "step": 3230 }, { "epoch": 0.39, "grad_norm": 0.1616549789905548, "learning_rate": 0.0002978516830559807, "loss": 0.9912, "step": 3235 }, { "epoch": 0.39, "grad_norm": 0.15977180004119873, "learning_rate": 0.0002978342946287732, "loss": 0.9411, "step": 3240 }, { "epoch": 0.39, "grad_norm": 0.18043774366378784, "learning_rate": 0.00029781683662583725, "loss": 1.0057, "step": 3245 }, { "epoch": 0.39, "grad_norm": 0.16147902607917786, "learning_rate": 0.00029779930905538915, "loss": 0.9489, "step": 3250 }, { "epoch": 0.39, "grad_norm": 0.17931294441223145, "learning_rate": 0.0002977817119256779, "loss": 1.0077, "step": 3255 }, { "epoch": 0.39, "grad_norm": 0.16193710267543793, "learning_rate": 0.00029776404524498533, "loss": 1.0576, "step": 3260 }, { "epoch": 0.39, "grad_norm": 0.17987242341041565, "learning_rate": 0.00029774630902162604, "loss": 0.9772, "step": 3265 }, { "epoch": 0.39, "grad_norm": 0.15659894049167633, "learning_rate": 0.0002977285032639472, "loss": 0.9922, "step": 3270 }, { "epoch": 0.39, "grad_norm": 0.17230859398841858, "learning_rate": 0.0002977106279803288, "loss": 1.0216, "step": 3275 }, { "epoch": 0.4, "grad_norm": 0.16389912366867065, "learning_rate": 0.00029769268317918354, "loss": 0.9859, "step": 3280 }, { "epoch": 0.4, "grad_norm": 0.1696440726518631, "learning_rate": 0.00029767466886895685, "loss": 1.0755, "step": 3285 }, { "epoch": 0.4, "grad_norm": 0.18958187103271484, "learning_rate": 0.0002976565850581269, "loss": 0.9506, "step": 3290 }, { "epoch": 0.4, "grad_norm": 0.1598709523677826, "learning_rate": 0.0002976384317552044, "loss": 0.9734, "step": 3295 }, { "epoch": 0.4, "grad_norm": 0.17023463547229767, "learning_rate": 0.0002976202089687331, "loss": 0.9537, "step": 3300 }, { "epoch": 0.4, "grad_norm": 0.19109511375427246, "learning_rate": 0.0002976019167072891, "loss": 0.9778, "step": 3305 }, { "epoch": 0.4, "grad_norm": 0.1838565170764923, "learning_rate": 0.00029758355497948145, "loss": 1.0258, "step": 3310 }, { "epoch": 0.4, "grad_norm": 0.16939514875411987, "learning_rate": 0.0002975651237939517, "loss": 1.0244, "step": 3315 }, { "epoch": 0.4, "grad_norm": 0.1645985096693039, "learning_rate": 0.0002975466231593742, "loss": 0.9729, "step": 3320 }, { "epoch": 0.4, "grad_norm": 0.17347556352615356, "learning_rate": 0.0002975280530844559, "loss": 1.0492, "step": 3325 }, { "epoch": 0.4, "grad_norm": 0.21827654540538788, "learning_rate": 0.00029750941357793666, "loss": 0.9775, "step": 3330 }, { "epoch": 0.4, "grad_norm": 0.1544768363237381, "learning_rate": 0.00029749070464858875, "loss": 1.0207, "step": 3335 }, { "epoch": 0.4, "grad_norm": 0.15815982222557068, "learning_rate": 0.00029747192630521715, "loss": 1.0502, "step": 3340 }, { "epoch": 0.4, "grad_norm": 0.17267605662345886, "learning_rate": 0.0002974530785566597, "loss": 0.9846, "step": 3345 }, { "epoch": 0.4, "grad_norm": 0.1674361526966095, "learning_rate": 0.00029743416141178667, "loss": 0.9606, "step": 3350 }, { "epoch": 0.4, "grad_norm": 0.16751405596733093, "learning_rate": 0.00029741517487950116, "loss": 1.0469, "step": 3355 }, { "epoch": 0.4, "grad_norm": 0.20405547320842743, "learning_rate": 0.00029739611896873884, "loss": 0.9714, "step": 3360 }, { "epoch": 0.41, "grad_norm": 0.15737318992614746, "learning_rate": 0.00029737699368846806, "loss": 0.982, "step": 3365 }, { "epoch": 0.41, "grad_norm": 0.16428694128990173, "learning_rate": 0.0002973577990476899, "loss": 1.0685, "step": 3370 }, { "epoch": 0.41, "grad_norm": 0.16110824048519135, "learning_rate": 0.0002973385350554378, "loss": 0.947, "step": 3375 }, { "epoch": 0.41, "grad_norm": 0.1924237459897995, "learning_rate": 0.00029731920172077815, "loss": 0.9267, "step": 3380 }, { "epoch": 0.41, "grad_norm": 0.17750895023345947, "learning_rate": 0.00029729979905280987, "loss": 1.0132, "step": 3385 }, { "epoch": 0.41, "grad_norm": 0.16638635098934174, "learning_rate": 0.0002972803270606645, "loss": 0.9343, "step": 3390 }, { "epoch": 0.41, "grad_norm": 0.16308121383190155, "learning_rate": 0.00029726078575350613, "loss": 1.0279, "step": 3395 }, { "epoch": 0.41, "grad_norm": 0.16061437129974365, "learning_rate": 0.00029724117514053164, "loss": 0.9822, "step": 3400 }, { "epoch": 0.41, "grad_norm": 0.1641559600830078, "learning_rate": 0.00029722149523097046, "loss": 1.014, "step": 3405 }, { "epoch": 0.41, "grad_norm": 0.16438022255897522, "learning_rate": 0.0002972017460340845, "loss": 1.0781, "step": 3410 }, { "epoch": 0.41, "grad_norm": 0.16575871407985687, "learning_rate": 0.0002971819275591684, "loss": 1.0313, "step": 3415 }, { "epoch": 0.41, "grad_norm": 0.1603136956691742, "learning_rate": 0.00029716203981554947, "loss": 0.9883, "step": 3420 }, { "epoch": 0.41, "grad_norm": 0.1538233458995819, "learning_rate": 0.0002971420828125875, "loss": 1.0134, "step": 3425 }, { "epoch": 0.41, "grad_norm": 0.15688014030456543, "learning_rate": 0.0002971220565596749, "loss": 0.9029, "step": 3430 }, { "epoch": 0.41, "grad_norm": 0.17302924394607544, "learning_rate": 0.0002971019610662367, "loss": 0.9459, "step": 3435 }, { "epoch": 0.41, "grad_norm": 0.17244279384613037, "learning_rate": 0.00029708179634173055, "loss": 0.9259, "step": 3440 }, { "epoch": 0.42, "grad_norm": 0.17056743800640106, "learning_rate": 0.00029706156239564665, "loss": 0.8916, "step": 3445 }, { "epoch": 0.42, "grad_norm": 0.18607419729232788, "learning_rate": 0.00029704125923750766, "loss": 0.9067, "step": 3450 }, { "epoch": 0.42, "grad_norm": 0.18427631258964539, "learning_rate": 0.000297020886876869, "loss": 0.9818, "step": 3455 }, { "epoch": 0.42, "grad_norm": 0.17508511245250702, "learning_rate": 0.00029700044532331854, "loss": 0.9721, "step": 3460 }, { "epoch": 0.42, "grad_norm": 0.1710289865732193, "learning_rate": 0.0002969799345864768, "loss": 1.0841, "step": 3465 }, { "epoch": 0.42, "grad_norm": 0.16923823952674866, "learning_rate": 0.00029695935467599676, "loss": 1.036, "step": 3470 }, { "epoch": 0.42, "grad_norm": 0.1833118349313736, "learning_rate": 0.00029693870560156406, "loss": 1.0369, "step": 3475 }, { "epoch": 0.42, "grad_norm": 0.158515065908432, "learning_rate": 0.0002969179873728968, "loss": 0.9474, "step": 3480 }, { "epoch": 0.42, "grad_norm": 0.18448443710803986, "learning_rate": 0.0002968971999997458, "loss": 1.036, "step": 3485 }, { "epoch": 0.42, "grad_norm": 0.17607718706130981, "learning_rate": 0.000296876343491894, "loss": 1.1111, "step": 3490 }, { "epoch": 0.42, "grad_norm": 0.16396880149841309, "learning_rate": 0.0002968554178591575, "loss": 1.0609, "step": 3495 }, { "epoch": 0.42, "grad_norm": 0.1581658273935318, "learning_rate": 0.00029683442311138436, "loss": 1.0826, "step": 3500 }, { "epoch": 0.42, "grad_norm": 0.16231244802474976, "learning_rate": 0.00029681335925845544, "loss": 0.9405, "step": 3505 }, { "epoch": 0.42, "grad_norm": 0.17052994668483734, "learning_rate": 0.0002967922263102842, "loss": 1.057, "step": 3510 }, { "epoch": 0.42, "grad_norm": 0.1752976030111313, "learning_rate": 0.00029677102427681643, "loss": 0.9852, "step": 3515 }, { "epoch": 0.42, "grad_norm": 0.1679937094449997, "learning_rate": 0.00029674975316803056, "loss": 1.053, "step": 3520 }, { "epoch": 0.42, "grad_norm": 0.17589174211025238, "learning_rate": 0.00029672841299393734, "loss": 1.0338, "step": 3525 }, { "epoch": 0.43, "grad_norm": 0.17791646718978882, "learning_rate": 0.00029670700376458034, "loss": 0.9818, "step": 3530 }, { "epoch": 0.43, "grad_norm": 0.1609509289264679, "learning_rate": 0.0002966855254900353, "loss": 0.9841, "step": 3535 }, { "epoch": 0.43, "grad_norm": 0.1620442420244217, "learning_rate": 0.0002966639781804108, "loss": 1.0825, "step": 3540 }, { "epoch": 0.43, "grad_norm": 0.18047846853733063, "learning_rate": 0.00029664236184584757, "loss": 1.0661, "step": 3545 }, { "epoch": 0.43, "grad_norm": 0.16510747373104095, "learning_rate": 0.00029662067649651895, "loss": 0.8942, "step": 3550 }, { "epoch": 0.43, "grad_norm": 0.18629179894924164, "learning_rate": 0.00029659892214263094, "loss": 1.0014, "step": 3555 }, { "epoch": 0.43, "grad_norm": 0.18440277874469757, "learning_rate": 0.0002965770987944217, "loss": 1.0379, "step": 3560 }, { "epoch": 0.43, "grad_norm": 0.1701250523328781, "learning_rate": 0.00029655520646216214, "loss": 0.9512, "step": 3565 }, { "epoch": 0.43, "grad_norm": 0.17910198867321014, "learning_rate": 0.00029653324515615544, "loss": 1.0178, "step": 3570 }, { "epoch": 0.43, "grad_norm": 0.15879999101161957, "learning_rate": 0.00029651121488673737, "loss": 0.9555, "step": 3575 }, { "epoch": 0.43, "grad_norm": 0.17738574743270874, "learning_rate": 0.00029648911566427605, "loss": 0.9916, "step": 3580 }, { "epoch": 0.43, "grad_norm": 0.1952105015516281, "learning_rate": 0.00029646694749917217, "loss": 1.0, "step": 3585 }, { "epoch": 0.43, "grad_norm": 0.1668039858341217, "learning_rate": 0.0002964447104018588, "loss": 0.9486, "step": 3590 }, { "epoch": 0.43, "grad_norm": 0.15947400033473969, "learning_rate": 0.0002964224043828014, "loss": 0.9834, "step": 3595 }, { "epoch": 0.43, "grad_norm": 0.18073715269565582, "learning_rate": 0.000296400029452498, "loss": 0.9501, "step": 3600 }, { "epoch": 0.43, "grad_norm": 0.18462218344211578, "learning_rate": 0.00029637758562147895, "loss": 1.0168, "step": 3605 }, { "epoch": 0.43, "grad_norm": 0.1783631145954132, "learning_rate": 0.000296355072900307, "loss": 0.9695, "step": 3610 }, { "epoch": 0.44, "grad_norm": 0.1757393628358841, "learning_rate": 0.00029633249129957747, "loss": 0.9556, "step": 3615 }, { "epoch": 0.44, "grad_norm": 0.17686530947685242, "learning_rate": 0.000296309840829918, "loss": 0.9561, "step": 3620 }, { "epoch": 0.44, "grad_norm": 0.16874971985816956, "learning_rate": 0.00029628712150198865, "loss": 0.9509, "step": 3625 }, { "epoch": 0.44, "grad_norm": 0.17538826167583466, "learning_rate": 0.00029626433332648183, "loss": 1.0664, "step": 3630 }, { "epoch": 0.44, "grad_norm": 0.16366077959537506, "learning_rate": 0.00029624147631412246, "loss": 0.9833, "step": 3635 }, { "epoch": 0.44, "grad_norm": 0.17649723589420319, "learning_rate": 0.0002962185504756678, "loss": 0.9077, "step": 3640 }, { "epoch": 0.44, "grad_norm": 0.1709517389535904, "learning_rate": 0.0002961955558219076, "loss": 0.9386, "step": 3645 }, { "epoch": 0.44, "grad_norm": 0.1735381931066513, "learning_rate": 0.0002961724923636637, "loss": 1.0339, "step": 3650 }, { "epoch": 0.44, "grad_norm": 0.17340226471424103, "learning_rate": 0.00029614936011179076, "loss": 0.9962, "step": 3655 }, { "epoch": 0.44, "grad_norm": 0.1836249977350235, "learning_rate": 0.0002961261590771755, "loss": 0.8727, "step": 3660 }, { "epoch": 0.44, "grad_norm": 0.16805151104927063, "learning_rate": 0.000296102889270737, "loss": 0.986, "step": 3665 }, { "epoch": 0.44, "grad_norm": 0.1725672036409378, "learning_rate": 0.00029607955070342685, "loss": 1.0204, "step": 3670 }, { "epoch": 0.44, "grad_norm": 0.17753121256828308, "learning_rate": 0.00029605614338622905, "loss": 0.9744, "step": 3675 }, { "epoch": 0.44, "grad_norm": 0.18001006543636322, "learning_rate": 0.00029603266733015983, "loss": 0.9984, "step": 3680 }, { "epoch": 0.44, "grad_norm": 0.18695253133773804, "learning_rate": 0.0002960091225462677, "loss": 1.0663, "step": 3685 }, { "epoch": 0.44, "grad_norm": 0.18563799560070038, "learning_rate": 0.00029598550904563374, "loss": 0.9452, "step": 3690 }, { "epoch": 0.45, "grad_norm": 0.18646039068698883, "learning_rate": 0.0002959618268393712, "loss": 1.0318, "step": 3695 }, { "epoch": 0.45, "grad_norm": 0.16049382090568542, "learning_rate": 0.00029593807593862565, "loss": 1.0137, "step": 3700 }, { "epoch": 0.45, "grad_norm": 0.17187224328517914, "learning_rate": 0.00029591425635457514, "loss": 0.9957, "step": 3705 }, { "epoch": 0.45, "grad_norm": 0.18150264024734497, "learning_rate": 0.00029589036809842987, "loss": 0.9998, "step": 3710 }, { "epoch": 0.45, "grad_norm": 0.18180853128433228, "learning_rate": 0.0002958664111814326, "loss": 0.9865, "step": 3715 }, { "epoch": 0.45, "grad_norm": 0.17173470556735992, "learning_rate": 0.0002958423856148581, "loss": 0.9939, "step": 3720 }, { "epoch": 0.45, "grad_norm": 0.18035240471363068, "learning_rate": 0.0002958182914100137, "loss": 0.9806, "step": 3725 }, { "epoch": 0.45, "grad_norm": 0.18080519139766693, "learning_rate": 0.00029579412857823887, "loss": 0.9165, "step": 3730 }, { "epoch": 0.45, "grad_norm": 0.18667446076869965, "learning_rate": 0.0002957698971309054, "loss": 0.9465, "step": 3735 }, { "epoch": 0.45, "grad_norm": 0.17435042560100555, "learning_rate": 0.0002957455970794175, "loss": 0.9562, "step": 3740 }, { "epoch": 0.45, "grad_norm": 0.19074921309947968, "learning_rate": 0.0002957212284352116, "loss": 0.9912, "step": 3745 }, { "epoch": 0.45, "grad_norm": 0.1836334615945816, "learning_rate": 0.0002956967912097563, "loss": 0.9995, "step": 3750 }, { "epoch": 0.45, "grad_norm": 0.19965748488903046, "learning_rate": 0.00029567228541455264, "loss": 0.9773, "step": 3755 }, { "epoch": 0.45, "grad_norm": 0.19002239406108856, "learning_rate": 0.0002956477110611338, "loss": 0.978, "step": 3760 }, { "epoch": 0.45, "grad_norm": 0.1769300252199173, "learning_rate": 0.00029562306816106535, "loss": 0.9428, "step": 3765 }, { "epoch": 0.45, "grad_norm": 0.1742662787437439, "learning_rate": 0.000295598356725945, "loss": 0.9801, "step": 3770 }, { "epoch": 0.45, "grad_norm": 0.17609982192516327, "learning_rate": 0.00029557357676740286, "loss": 0.9655, "step": 3775 }, { "epoch": 0.46, "grad_norm": 0.152922123670578, "learning_rate": 0.00029554872829710114, "loss": 1.0099, "step": 3780 }, { "epoch": 0.46, "grad_norm": 0.18085353076457977, "learning_rate": 0.0002955238113267344, "loss": 1.0337, "step": 3785 }, { "epoch": 0.46, "grad_norm": 0.17463679611682892, "learning_rate": 0.00029549882586802923, "loss": 0.9719, "step": 3790 }, { "epoch": 0.46, "grad_norm": 0.1799176186323166, "learning_rate": 0.0002954737719327448, "loss": 0.9933, "step": 3795 }, { "epoch": 0.46, "grad_norm": 0.18026649951934814, "learning_rate": 0.00029544864953267224, "loss": 1.0829, "step": 3800 }, { "epoch": 0.46, "grad_norm": 0.18664585053920746, "learning_rate": 0.000295423458679635, "loss": 0.9727, "step": 3805 }, { "epoch": 0.46, "grad_norm": 0.1912483125925064, "learning_rate": 0.0002953981993854888, "loss": 0.9602, "step": 3810 }, { "epoch": 0.46, "grad_norm": 0.17371563613414764, "learning_rate": 0.00029537287166212146, "loss": 0.9695, "step": 3815 }, { "epoch": 0.46, "grad_norm": 0.17529870569705963, "learning_rate": 0.00029534747552145295, "loss": 1.0112, "step": 3820 }, { "epoch": 0.46, "grad_norm": 0.20106241106987, "learning_rate": 0.00029532201097543566, "loss": 1.045, "step": 3825 }, { "epoch": 0.46, "grad_norm": 0.16965335607528687, "learning_rate": 0.00029529647803605406, "loss": 0.9602, "step": 3830 }, { "epoch": 0.46, "grad_norm": 0.16712799668312073, "learning_rate": 0.00029527087671532467, "loss": 0.8681, "step": 3835 }, { "epoch": 0.46, "grad_norm": 0.17524246871471405, "learning_rate": 0.00029524520702529645, "loss": 1.0298, "step": 3840 }, { "epoch": 0.46, "grad_norm": 0.16729198396205902, "learning_rate": 0.00029521946897805034, "loss": 1.0393, "step": 3845 }, { "epoch": 0.46, "grad_norm": 0.1771865338087082, "learning_rate": 0.00029519366258569954, "loss": 0.9163, "step": 3850 }, { "epoch": 0.46, "grad_norm": 0.19516459107398987, "learning_rate": 0.0002951677878603894, "loss": 1.0929, "step": 3855 }, { "epoch": 0.47, "grad_norm": 0.16571563482284546, "learning_rate": 0.0002951418448142974, "loss": 0.9895, "step": 3860 }, { "epoch": 0.47, "grad_norm": 0.17341572046279907, "learning_rate": 0.00029511583345963327, "loss": 0.951, "step": 3865 }, { "epoch": 0.47, "grad_norm": 0.16998711228370667, "learning_rate": 0.00029508975380863867, "loss": 0.9239, "step": 3870 }, { "epoch": 0.47, "grad_norm": 0.17954027652740479, "learning_rate": 0.0002950636058735877, "loss": 0.9932, "step": 3875 }, { "epoch": 0.47, "grad_norm": 0.17167799174785614, "learning_rate": 0.0002950373896667864, "loss": 1.0185, "step": 3880 }, { "epoch": 0.47, "grad_norm": 0.16167834401130676, "learning_rate": 0.000295011105200573, "loss": 1.0683, "step": 3885 }, { "epoch": 0.47, "grad_norm": 0.1931363195180893, "learning_rate": 0.0002949847524873178, "loss": 0.8858, "step": 3890 }, { "epoch": 0.47, "grad_norm": 0.1638021469116211, "learning_rate": 0.0002949583315394233, "loss": 0.9769, "step": 3895 }, { "epoch": 0.47, "grad_norm": 0.17214910686016083, "learning_rate": 0.00029493184236932405, "loss": 1.0506, "step": 3900 }, { "epoch": 0.47, "grad_norm": 0.18750952184200287, "learning_rate": 0.0002949052849894867, "loss": 0.9815, "step": 3905 }, { "epoch": 0.47, "grad_norm": 0.16336201131343842, "learning_rate": 0.00029487865941241014, "loss": 0.9287, "step": 3910 }, { "epoch": 0.47, "grad_norm": 0.17103566229343414, "learning_rate": 0.00029485196565062516, "loss": 0.9731, "step": 3915 }, { "epoch": 0.47, "grad_norm": 0.16729870438575745, "learning_rate": 0.0002948252037166948, "loss": 0.9477, "step": 3920 }, { "epoch": 0.47, "grad_norm": 0.1882411539554596, "learning_rate": 0.00029479837362321405, "loss": 0.9904, "step": 3925 }, { "epoch": 0.47, "grad_norm": 0.18706083297729492, "learning_rate": 0.00029477147538281004, "loss": 0.997, "step": 3930 }, { "epoch": 0.47, "grad_norm": 0.18847805261611938, "learning_rate": 0.000294744509008142, "loss": 0.9378, "step": 3935 }, { "epoch": 0.47, "grad_norm": 0.1840084046125412, "learning_rate": 0.00029471747451190124, "loss": 0.9559, "step": 3940 }, { "epoch": 0.48, "grad_norm": 0.1757187843322754, "learning_rate": 0.000294690371906811, "loss": 1.011, "step": 3945 }, { "epoch": 0.48, "grad_norm": 0.16202908754348755, "learning_rate": 0.00029466320120562683, "loss": 0.9077, "step": 3950 }, { "epoch": 0.48, "grad_norm": 0.18490107357501984, "learning_rate": 0.00029463596242113596, "loss": 1.0702, "step": 3955 }, { "epoch": 0.48, "grad_norm": 0.1752914935350418, "learning_rate": 0.000294608655566158, "loss": 0.9612, "step": 3960 }, { "epoch": 0.48, "grad_norm": 0.17405173182487488, "learning_rate": 0.00029458128065354444, "loss": 1.0131, "step": 3965 }, { "epoch": 0.48, "grad_norm": 0.18187612295150757, "learning_rate": 0.0002945538376961788, "loss": 0.9848, "step": 3970 }, { "epoch": 0.48, "grad_norm": 0.19082453846931458, "learning_rate": 0.0002945263267069766, "loss": 0.9559, "step": 3975 }, { "epoch": 0.48, "grad_norm": 0.17413316667079926, "learning_rate": 0.0002944987476988855, "loss": 1.0772, "step": 3980 }, { "epoch": 0.48, "grad_norm": 0.15954262018203735, "learning_rate": 0.00029447110068488516, "loss": 0.9037, "step": 3985 }, { "epoch": 0.48, "grad_norm": 0.17299918830394745, "learning_rate": 0.000294443385677987, "loss": 0.9624, "step": 3990 }, { "epoch": 0.48, "grad_norm": 0.179169163107872, "learning_rate": 0.00029441560269123483, "loss": 0.983, "step": 3995 }, { "epoch": 0.48, "grad_norm": 0.16181403398513794, "learning_rate": 0.00029438775173770405, "loss": 0.9705, "step": 4000 }, { "epoch": 0.48, "grad_norm": 0.1745695024728775, "learning_rate": 0.0002943598328305024, "loss": 0.9664, "step": 4005 }, { "epoch": 0.48, "grad_norm": 0.1865403801202774, "learning_rate": 0.0002943318459827693, "loss": 1.0259, "step": 4010 }, { "epoch": 0.48, "grad_norm": 0.21468792855739594, "learning_rate": 0.0002943037912076764, "loss": 0.9311, "step": 4015 }, { "epoch": 0.48, "grad_norm": 0.18576794862747192, "learning_rate": 0.0002942756685184272, "loss": 1.07, "step": 4020 }, { "epoch": 0.48, "grad_norm": 0.18512584269046783, "learning_rate": 0.0002942474779282571, "loss": 0.9204, "step": 4025 }, { "epoch": 0.49, "grad_norm": 0.18075776100158691, "learning_rate": 0.00029421921945043365, "loss": 0.9853, "step": 4030 }, { "epoch": 0.49, "grad_norm": 0.17702947556972504, "learning_rate": 0.0002941908930982561, "loss": 1.0537, "step": 4035 }, { "epoch": 0.49, "grad_norm": 0.17525263130664825, "learning_rate": 0.0002941624988850558, "loss": 0.9713, "step": 4040 }, { "epoch": 0.49, "grad_norm": 0.16947941482067108, "learning_rate": 0.00029413403682419613, "loss": 0.9988, "step": 4045 }, { "epoch": 0.49, "grad_norm": 0.17657147347927094, "learning_rate": 0.0002941055069290721, "loss": 0.9946, "step": 4050 }, { "epoch": 0.49, "grad_norm": 0.17668572068214417, "learning_rate": 0.00029407690921311094, "loss": 0.9288, "step": 4055 }, { "epoch": 0.49, "grad_norm": 0.18631796538829803, "learning_rate": 0.0002940482436897717, "loss": 0.9218, "step": 4060 }, { "epoch": 0.49, "grad_norm": 0.1784496307373047, "learning_rate": 0.00029401951037254524, "loss": 1.059, "step": 4065 }, { "epoch": 0.49, "grad_norm": 0.19037973880767822, "learning_rate": 0.00029399070927495447, "loss": 0.9622, "step": 4070 }, { "epoch": 0.49, "grad_norm": 0.17048045992851257, "learning_rate": 0.0002939618404105541, "loss": 0.8903, "step": 4075 }, { "epoch": 0.49, "grad_norm": 0.1878838837146759, "learning_rate": 0.00029393290379293085, "loss": 1.0106, "step": 4080 }, { "epoch": 0.49, "grad_norm": 0.18040554225444794, "learning_rate": 0.0002939038994357032, "loss": 0.9517, "step": 4085 }, { "epoch": 0.49, "grad_norm": 0.19907140731811523, "learning_rate": 0.0002938748273525216, "loss": 0.9628, "step": 4090 }, { "epoch": 0.49, "grad_norm": 0.18067404627799988, "learning_rate": 0.00029384568755706824, "loss": 0.9726, "step": 4095 }, { "epoch": 0.49, "grad_norm": 0.16621145606040955, "learning_rate": 0.0002938164800630574, "loss": 1.0339, "step": 4100 }, { "epoch": 0.49, "grad_norm": 0.17213256657123566, "learning_rate": 0.00029378720488423506, "loss": 0.933, "step": 4105 }, { "epoch": 0.5, "grad_norm": 0.1835915744304657, "learning_rate": 0.00029375786203437906, "loss": 0.9645, "step": 4110 }, { "epoch": 0.5, "grad_norm": 0.19219228625297546, "learning_rate": 0.00029372845152729916, "loss": 1.0066, "step": 4115 }, { "epoch": 0.5, "grad_norm": 0.1753523349761963, "learning_rate": 0.0002936989733768368, "loss": 0.98, "step": 4120 }, { "epoch": 0.5, "grad_norm": 0.16420799493789673, "learning_rate": 0.00029366942759686556, "loss": 0.9817, "step": 4125 }, { "epoch": 0.5, "grad_norm": 0.17462868988513947, "learning_rate": 0.0002936398142012906, "loss": 1.024, "step": 4130 }, { "epoch": 0.5, "grad_norm": 0.1853259801864624, "learning_rate": 0.0002936101332040489, "loss": 0.9538, "step": 4135 }, { "epoch": 0.5, "grad_norm": 0.17520953714847565, "learning_rate": 0.00029358038461910934, "loss": 0.9453, "step": 4140 }, { "epoch": 0.5, "grad_norm": 0.18350806832313538, "learning_rate": 0.00029355056846047266, "loss": 0.9445, "step": 4145 }, { "epoch": 0.5, "grad_norm": 0.17712032794952393, "learning_rate": 0.0002935206847421713, "loss": 0.9821, "step": 4150 }, { "epoch": 0.5, "grad_norm": 0.17345529794692993, "learning_rate": 0.00029349073347826953, "loss": 1.0079, "step": 4155 }, { "epoch": 0.5, "grad_norm": 0.18202108144760132, "learning_rate": 0.0002934607146828634, "loss": 1.0103, "step": 4160 }, { "epoch": 0.5, "grad_norm": 0.17905019223690033, "learning_rate": 0.00029343062837008076, "loss": 0.9744, "step": 4165 }, { "epoch": 0.5, "grad_norm": 0.16872918605804443, "learning_rate": 0.0002934004745540812, "loss": 1.0276, "step": 4170 }, { "epoch": 0.5, "grad_norm": 0.1752139776945114, "learning_rate": 0.00029337025324905616, "loss": 0.9822, "step": 4175 }, { "epoch": 0.5, "grad_norm": 0.18080003559589386, "learning_rate": 0.0002933399644692287, "loss": 1.0102, "step": 4180 }, { "epoch": 0.5, "grad_norm": 0.22213223576545715, "learning_rate": 0.00029330960822885385, "loss": 0.9913, "step": 4185 }, { "epoch": 0.5, "grad_norm": 0.18527017533779144, "learning_rate": 0.0002932791845422182, "loss": 0.9575, "step": 4190 }, { "epoch": 0.51, "grad_norm": 0.18203973770141602, "learning_rate": 0.00029324869342364014, "loss": 0.9891, "step": 4195 }, { "epoch": 0.51, "grad_norm": 0.17969951033592224, "learning_rate": 0.00029321813488746983, "loss": 0.994, "step": 4200 }, { "epoch": 0.51, "grad_norm": 0.18661588430404663, "learning_rate": 0.0002931875089480891, "loss": 1.0039, "step": 4205 }, { "epoch": 0.51, "grad_norm": 0.18751640617847443, "learning_rate": 0.0002931568156199115, "loss": 0.9442, "step": 4210 }, { "epoch": 0.51, "grad_norm": 0.16617530584335327, "learning_rate": 0.0002931260549173825, "loss": 0.885, "step": 4215 }, { "epoch": 0.51, "grad_norm": 0.1970120072364807, "learning_rate": 0.00029309522685497886, "loss": 1.0264, "step": 4220 }, { "epoch": 0.51, "grad_norm": 0.1805301010608673, "learning_rate": 0.00029306433144720947, "loss": 1.0025, "step": 4225 }, { "epoch": 0.51, "grad_norm": 0.1933399736881256, "learning_rate": 0.0002930333687086147, "loss": 0.9908, "step": 4230 }, { "epoch": 0.51, "grad_norm": 0.18400885164737701, "learning_rate": 0.0002930023386537666, "loss": 0.988, "step": 4235 }, { "epoch": 0.51, "grad_norm": 0.18429554998874664, "learning_rate": 0.000292971241297269, "loss": 0.9955, "step": 4240 }, { "epoch": 0.51, "grad_norm": 0.17350837588310242, "learning_rate": 0.0002929400766537573, "loss": 0.9956, "step": 4245 }, { "epoch": 0.51, "grad_norm": 0.17750461399555206, "learning_rate": 0.0002929088447378986, "loss": 0.9968, "step": 4250 }, { "epoch": 0.51, "grad_norm": 0.16923023760318756, "learning_rate": 0.0002928775455643917, "loss": 0.9371, "step": 4255 }, { "epoch": 0.51, "grad_norm": 0.17826791107654572, "learning_rate": 0.0002928461791479671, "loss": 0.999, "step": 4260 }, { "epoch": 0.51, "grad_norm": 0.1948651373386383, "learning_rate": 0.0002928147455033868, "loss": 0.9618, "step": 4265 }, { "epoch": 0.51, "grad_norm": 0.16941259801387787, "learning_rate": 0.00029278324464544455, "loss": 0.9693, "step": 4270 }, { "epoch": 0.52, "grad_norm": 0.1716768741607666, "learning_rate": 0.00029275167658896563, "loss": 1.0408, "step": 4275 }, { "epoch": 0.52, "grad_norm": 0.17600005865097046, "learning_rate": 0.00029272004134880714, "loss": 1.0234, "step": 4280 }, { "epoch": 0.52, "grad_norm": 0.18008339405059814, "learning_rate": 0.00029268833893985756, "loss": 0.9356, "step": 4285 }, { "epoch": 0.52, "grad_norm": 0.18551653623580933, "learning_rate": 0.00029265656937703713, "loss": 0.9183, "step": 4290 }, { "epoch": 0.52, "grad_norm": 0.18553385138511658, "learning_rate": 0.0002926247326752977, "loss": 0.9736, "step": 4295 }, { "epoch": 0.52, "grad_norm": 0.18531882762908936, "learning_rate": 0.00029259282884962266, "loss": 1.0085, "step": 4300 }, { "epoch": 0.52, "grad_norm": 0.17965206503868103, "learning_rate": 0.000292560857915027, "loss": 0.9765, "step": 4305 }, { "epoch": 0.52, "grad_norm": 0.1778474599123001, "learning_rate": 0.0002925288198865573, "loss": 0.9646, "step": 4310 }, { "epoch": 0.52, "grad_norm": 0.19227425754070282, "learning_rate": 0.0002924967147792917, "loss": 0.996, "step": 4315 }, { "epoch": 0.52, "grad_norm": 0.20739661157131195, "learning_rate": 0.00029246454260833997, "loss": 0.9635, "step": 4320 }, { "epoch": 0.52, "grad_norm": 0.17460639774799347, "learning_rate": 0.0002924323033888434, "loss": 0.9606, "step": 4325 }, { "epoch": 0.52, "grad_norm": 0.20777195692062378, "learning_rate": 0.0002923999971359748, "loss": 1.0319, "step": 4330 }, { "epoch": 0.52, "grad_norm": 0.19790644943714142, "learning_rate": 0.00029236762386493863, "loss": 1.0417, "step": 4335 }, { "epoch": 0.52, "grad_norm": 0.17253664135932922, "learning_rate": 0.0002923351835909707, "loss": 0.9815, "step": 4340 }, { "epoch": 0.52, "grad_norm": 0.18922512233257294, "learning_rate": 0.0002923026763293387, "loss": 1.0032, "step": 4345 }, { "epoch": 0.52, "grad_norm": 0.1988437920808792, "learning_rate": 0.0002922701020953414, "loss": 0.9955, "step": 4350 }, { "epoch": 0.52, "grad_norm": 0.19901318848133087, "learning_rate": 0.00029223746090430944, "loss": 0.9678, "step": 4355 }, { "epoch": 0.53, "grad_norm": 0.1846579760313034, "learning_rate": 0.0002922047527716048, "loss": 0.8918, "step": 4360 }, { "epoch": 0.53, "grad_norm": 0.18826933205127716, "learning_rate": 0.000292171977712621, "loss": 0.9736, "step": 4365 }, { "epoch": 0.53, "grad_norm": 0.1800205409526825, "learning_rate": 0.00029213913574278324, "loss": 0.9471, "step": 4370 }, { "epoch": 0.53, "grad_norm": 0.184101402759552, "learning_rate": 0.00029210622687754777, "loss": 1.0363, "step": 4375 }, { "epoch": 0.53, "grad_norm": 0.18093040585517883, "learning_rate": 0.0002920732511324028, "loss": 0.8811, "step": 4380 }, { "epoch": 0.53, "grad_norm": 0.16744713485240936, "learning_rate": 0.0002920402085228677, "loss": 0.9148, "step": 4385 }, { "epoch": 0.53, "grad_norm": 0.3018034100532532, "learning_rate": 0.0002920070990644935, "loss": 1.0029, "step": 4390 }, { "epoch": 0.53, "grad_norm": 0.2019168734550476, "learning_rate": 0.0002919739227728625, "loss": 0.8603, "step": 4395 }, { "epoch": 0.53, "grad_norm": 0.20707763731479645, "learning_rate": 0.0002919406796635887, "loss": 1.0115, "step": 4400 }, { "epoch": 0.53, "grad_norm": 0.18676406145095825, "learning_rate": 0.00029190736975231736, "loss": 1.0022, "step": 4405 }, { "epoch": 0.53, "grad_norm": 0.1945050209760666, "learning_rate": 0.00029187399305472515, "loss": 1.068, "step": 4410 }, { "epoch": 0.53, "grad_norm": 0.17335373163223267, "learning_rate": 0.0002918405495865203, "loss": 0.9242, "step": 4415 }, { "epoch": 0.53, "grad_norm": 0.17637088894844055, "learning_rate": 0.0002918070393634425, "loss": 0.9241, "step": 4420 }, { "epoch": 0.53, "grad_norm": 0.18394945561885834, "learning_rate": 0.00029177346240126273, "loss": 1.0194, "step": 4425 }, { "epoch": 0.53, "grad_norm": 0.21276423335075378, "learning_rate": 0.0002917398187157834, "loss": 1.0077, "step": 4430 }, { "epoch": 0.53, "grad_norm": 0.19257494807243347, "learning_rate": 0.0002917061083228383, "loss": 0.982, "step": 4435 }, { "epoch": 0.53, "grad_norm": 0.2055777907371521, "learning_rate": 0.0002916723312382927, "loss": 1.0397, "step": 4440 }, { "epoch": 0.54, "grad_norm": 0.18082697689533234, "learning_rate": 0.00029163848747804327, "loss": 0.963, "step": 4445 }, { "epoch": 0.54, "grad_norm": 0.18980878591537476, "learning_rate": 0.00029160457705801796, "loss": 0.9314, "step": 4450 }, { "epoch": 0.54, "grad_norm": 0.1710338592529297, "learning_rate": 0.0002915705999941761, "loss": 1.0261, "step": 4455 }, { "epoch": 0.54, "grad_norm": 0.17998263239860535, "learning_rate": 0.0002915365563025085, "loss": 0.9295, "step": 4460 }, { "epoch": 0.54, "grad_norm": 0.19266332685947418, "learning_rate": 0.00029150244599903725, "loss": 0.8856, "step": 4465 }, { "epoch": 0.54, "grad_norm": 0.1885053515434265, "learning_rate": 0.0002914682690998157, "loss": 0.9803, "step": 4470 }, { "epoch": 0.54, "grad_norm": 0.19506719708442688, "learning_rate": 0.00029143402562092875, "loss": 0.9873, "step": 4475 }, { "epoch": 0.54, "grad_norm": 0.20998205244541168, "learning_rate": 0.0002913997155784924, "loss": 0.9647, "step": 4480 }, { "epoch": 0.54, "grad_norm": 0.21117228269577026, "learning_rate": 0.00029136533898865423, "loss": 0.9814, "step": 4485 }, { "epoch": 0.54, "grad_norm": 0.18329034745693207, "learning_rate": 0.0002913308958675929, "loss": 0.9783, "step": 4490 }, { "epoch": 0.54, "grad_norm": 0.18641537427902222, "learning_rate": 0.0002912963862315185, "loss": 0.8872, "step": 4495 }, { "epoch": 0.54, "grad_norm": 0.19384218752384186, "learning_rate": 0.0002912618100966725, "loss": 0.9641, "step": 4500 }, { "epoch": 0.54, "grad_norm": 0.19041642546653748, "learning_rate": 0.00029122716747932747, "loss": 0.9446, "step": 4505 }, { "epoch": 0.54, "grad_norm": 0.20249037444591522, "learning_rate": 0.0002911924583957874, "loss": 0.9998, "step": 4510 }, { "epoch": 0.54, "grad_norm": 0.1947818547487259, "learning_rate": 0.00029115768286238757, "loss": 0.9414, "step": 4515 }, { "epoch": 0.54, "grad_norm": 0.1864546537399292, "learning_rate": 0.00029112284089549445, "loss": 1.0308, "step": 4520 }, { "epoch": 0.55, "grad_norm": 0.18546593189239502, "learning_rate": 0.0002910879325115059, "loss": 0.9426, "step": 4525 }, { "epoch": 0.55, "grad_norm": 0.19408635795116425, "learning_rate": 0.0002910529577268509, "loss": 0.9535, "step": 4530 }, { "epoch": 0.55, "grad_norm": 0.19161821901798248, "learning_rate": 0.0002910179165579898, "loss": 0.9453, "step": 4535 }, { "epoch": 0.55, "grad_norm": 0.1930609792470932, "learning_rate": 0.00029098280902141406, "loss": 1.051, "step": 4540 }, { "epoch": 0.55, "grad_norm": 0.17416320741176605, "learning_rate": 0.0002909476351336465, "loss": 0.9667, "step": 4545 }, { "epoch": 0.55, "grad_norm": 0.1923515945672989, "learning_rate": 0.0002909123949112412, "loss": 0.9336, "step": 4550 }, { "epoch": 0.55, "grad_norm": 0.19930413365364075, "learning_rate": 0.0002908770883707832, "loss": 1.032, "step": 4555 }, { "epoch": 0.55, "grad_norm": 0.19108958542346954, "learning_rate": 0.00029084171552888914, "loss": 0.9452, "step": 4560 }, { "epoch": 0.55, "grad_norm": 0.20645657181739807, "learning_rate": 0.00029080627640220647, "loss": 0.9045, "step": 4565 }, { "epoch": 0.55, "grad_norm": 0.21397635340690613, "learning_rate": 0.0002907707710074141, "loss": 0.8976, "step": 4570 }, { "epoch": 0.55, "grad_norm": 0.1848032921552658, "learning_rate": 0.000290735199361222, "loss": 0.8218, "step": 4575 }, { "epoch": 0.55, "grad_norm": 0.18126817047595978, "learning_rate": 0.0002906995614803715, "loss": 1.0663, "step": 4580 }, { "epoch": 0.55, "grad_norm": 0.1907825469970703, "learning_rate": 0.0002906638573816348, "loss": 0.9277, "step": 4585 }, { "epoch": 0.55, "grad_norm": 0.18289636075496674, "learning_rate": 0.0002906280870818156, "loss": 1.0149, "step": 4590 }, { "epoch": 0.55, "grad_norm": 0.17690010368824005, "learning_rate": 0.00029059225059774844, "loss": 1.0116, "step": 4595 }, { "epoch": 0.55, "grad_norm": 0.19581712782382965, "learning_rate": 0.00029055634794629924, "loss": 0.9966, "step": 4600 }, { "epoch": 0.55, "grad_norm": 0.20098170638084412, "learning_rate": 0.00029052037914436494, "loss": 1.0375, "step": 4605 }, { "epoch": 0.56, "grad_norm": 0.20016787946224213, "learning_rate": 0.00029048434420887373, "loss": 1.0045, "step": 4610 }, { "epoch": 0.56, "grad_norm": 0.1856658011674881, "learning_rate": 0.0002904482431567847, "loss": 1.0191, "step": 4615 }, { "epoch": 0.56, "grad_norm": 0.19540125131607056, "learning_rate": 0.0002904120760050884, "loss": 1.0104, "step": 4620 }, { "epoch": 0.56, "grad_norm": 0.20216308534145355, "learning_rate": 0.00029037584277080616, "loss": 1.0659, "step": 4625 }, { "epoch": 0.56, "grad_norm": 0.224759042263031, "learning_rate": 0.00029033954347099057, "loss": 0.9496, "step": 4630 }, { "epoch": 0.56, "grad_norm": 0.16096115112304688, "learning_rate": 0.0002903031781227253, "loss": 0.9887, "step": 4635 }, { "epoch": 0.56, "grad_norm": 0.20530791580677032, "learning_rate": 0.00029026674674312503, "loss": 0.9417, "step": 4640 }, { "epoch": 0.56, "grad_norm": 0.17820000648498535, "learning_rate": 0.0002902302493493357, "loss": 0.9157, "step": 4645 }, { "epoch": 0.56, "grad_norm": 0.20749807357788086, "learning_rate": 0.00029019368595853407, "loss": 0.958, "step": 4650 }, { "epoch": 0.56, "grad_norm": 0.19266118109226227, "learning_rate": 0.00029015705658792817, "loss": 1.05, "step": 4655 }, { "epoch": 0.56, "grad_norm": 0.2041333168745041, "learning_rate": 0.00029012036125475695, "loss": 0.9891, "step": 4660 }, { "epoch": 0.56, "grad_norm": 0.19730497896671295, "learning_rate": 0.00029008359997629045, "loss": 1.0394, "step": 4665 }, { "epoch": 0.56, "grad_norm": 0.17330877482891083, "learning_rate": 0.00029004677276982986, "loss": 1.0221, "step": 4670 }, { "epoch": 0.56, "grad_norm": 0.1951434314250946, "learning_rate": 0.0002900098796527071, "loss": 1.0018, "step": 4675 }, { "epoch": 0.56, "grad_norm": 0.21202173829078674, "learning_rate": 0.00028997292064228544, "loss": 0.9171, "step": 4680 }, { "epoch": 0.56, "grad_norm": 0.19243790209293365, "learning_rate": 0.00028993589575595894, "loss": 0.9833, "step": 4685 }, { "epoch": 0.57, "grad_norm": 0.1826474815607071, "learning_rate": 0.00028989880501115276, "loss": 0.875, "step": 4690 }, { "epoch": 0.57, "grad_norm": 0.20149312913417816, "learning_rate": 0.0002898616484253231, "loss": 1.0055, "step": 4695 }, { "epoch": 0.57, "grad_norm": 0.19697706401348114, "learning_rate": 0.000289824426015957, "loss": 0.9464, "step": 4700 }, { "epoch": 0.57, "grad_norm": 0.19809427857398987, "learning_rate": 0.00028978713780057256, "loss": 0.9814, "step": 4705 }, { "epoch": 0.57, "grad_norm": 0.206287682056427, "learning_rate": 0.00028974978379671894, "loss": 0.9652, "step": 4710 }, { "epoch": 0.57, "grad_norm": 0.18428125977516174, "learning_rate": 0.0002897123640219761, "loss": 0.942, "step": 4715 }, { "epoch": 0.57, "grad_norm": 0.16364052891731262, "learning_rate": 0.000289674878493955, "loss": 0.9741, "step": 4720 }, { "epoch": 0.57, "grad_norm": 0.20780855417251587, "learning_rate": 0.0002896373272302977, "loss": 0.9654, "step": 4725 }, { "epoch": 0.57, "grad_norm": 0.2168758362531662, "learning_rate": 0.0002895997102486769, "loss": 0.9247, "step": 4730 }, { "epoch": 0.57, "grad_norm": 0.1811356246471405, "learning_rate": 0.0002895620275667966, "loss": 0.9142, "step": 4735 }, { "epoch": 0.57, "grad_norm": 0.18936580419540405, "learning_rate": 0.00028952427920239134, "loss": 0.9091, "step": 4740 }, { "epoch": 0.57, "grad_norm": 0.19724291563034058, "learning_rate": 0.00028948646517322686, "loss": 0.9266, "step": 4745 }, { "epoch": 0.57, "grad_norm": 0.17332862317562103, "learning_rate": 0.0002894485854970997, "loss": 1.0734, "step": 4750 }, { "epoch": 0.57, "grad_norm": 0.1730174571275711, "learning_rate": 0.00028941064019183713, "loss": 1.0088, "step": 4755 }, { "epoch": 0.57, "grad_norm": 0.18203985691070557, "learning_rate": 0.0002893726292752977, "loss": 0.9595, "step": 4760 }, { "epoch": 0.57, "grad_norm": 0.18378064036369324, "learning_rate": 0.0002893345527653705, "loss": 0.9354, "step": 4765 }, { "epoch": 0.57, "grad_norm": 0.18141499161720276, "learning_rate": 0.0002892964106799757, "loss": 0.9606, "step": 4770 }, { "epoch": 0.58, "grad_norm": 0.19755259156227112, "learning_rate": 0.000289258203037064, "loss": 1.054, "step": 4775 }, { "epoch": 0.58, "grad_norm": 0.213453009724617, "learning_rate": 0.0002892199298546174, "loss": 0.8363, "step": 4780 }, { "epoch": 0.58, "grad_norm": 0.18598608672618866, "learning_rate": 0.00028918159115064846, "loss": 0.9375, "step": 4785 }, { "epoch": 0.58, "grad_norm": 0.19149407744407654, "learning_rate": 0.0002891431869432006, "loss": 0.97, "step": 4790 }, { "epoch": 0.58, "grad_norm": 0.20365601778030396, "learning_rate": 0.0002891047172503482, "loss": 0.9405, "step": 4795 }, { "epoch": 0.58, "grad_norm": 0.18206772208213806, "learning_rate": 0.0002890661820901963, "loss": 1.011, "step": 4800 }, { "epoch": 0.58, "grad_norm": 0.18795382976531982, "learning_rate": 0.00028902758148088094, "loss": 0.854, "step": 4805 }, { "epoch": 0.58, "grad_norm": 0.18891260027885437, "learning_rate": 0.00028898891544056873, "loss": 0.9468, "step": 4810 }, { "epoch": 0.58, "grad_norm": 0.19970501959323883, "learning_rate": 0.0002889501839874572, "loss": 0.9918, "step": 4815 }, { "epoch": 0.58, "grad_norm": 0.20287778973579407, "learning_rate": 0.00028891138713977476, "loss": 0.9039, "step": 4820 }, { "epoch": 0.58, "grad_norm": 0.17449329793453217, "learning_rate": 0.0002888725249157804, "loss": 0.928, "step": 4825 }, { "epoch": 0.58, "grad_norm": 0.22723489999771118, "learning_rate": 0.000288833597333764, "loss": 0.9622, "step": 4830 }, { "epoch": 0.58, "grad_norm": 0.17067822813987732, "learning_rate": 0.0002887946044120461, "loss": 0.9349, "step": 4835 }, { "epoch": 0.58, "grad_norm": 0.19400516152381897, "learning_rate": 0.00028875554616897823, "loss": 0.9223, "step": 4840 }, { "epoch": 0.58, "grad_norm": 0.1980266571044922, "learning_rate": 0.00028871642262294234, "loss": 0.9518, "step": 4845 }, { "epoch": 0.58, "grad_norm": 0.1934380978345871, "learning_rate": 0.0002886772337923514, "loss": 1.0257, "step": 4850 }, { "epoch": 0.58, "grad_norm": 0.1748974621295929, "learning_rate": 0.0002886379796956489, "loss": 1.0209, "step": 4855 }, { "epoch": 0.59, "grad_norm": 0.17561843991279602, "learning_rate": 0.0002885986603513091, "loss": 0.9646, "step": 4860 }, { "epoch": 0.59, "grad_norm": 0.19227540493011475, "learning_rate": 0.00028855927577783706, "loss": 0.9638, "step": 4865 }, { "epoch": 0.59, "grad_norm": 0.20389790832996368, "learning_rate": 0.0002885198259937684, "loss": 0.9399, "step": 4870 }, { "epoch": 0.59, "grad_norm": 0.18054336309432983, "learning_rate": 0.0002884803110176695, "loss": 0.9318, "step": 4875 }, { "epoch": 0.59, "grad_norm": 0.1733674854040146, "learning_rate": 0.00028844073086813744, "loss": 0.8768, "step": 4880 }, { "epoch": 0.59, "grad_norm": 0.185361847281456, "learning_rate": 0.00028840108556380006, "loss": 0.9853, "step": 4885 }, { "epoch": 0.59, "grad_norm": 0.1938917487859726, "learning_rate": 0.00028836137512331555, "loss": 0.9558, "step": 4890 }, { "epoch": 0.59, "grad_norm": 0.18514370918273926, "learning_rate": 0.00028832159956537306, "loss": 0.9225, "step": 4895 }, { "epoch": 0.59, "grad_norm": 0.1769241839647293, "learning_rate": 0.0002882817589086924, "loss": 1.0125, "step": 4900 }, { "epoch": 0.59, "grad_norm": 0.18880169093608856, "learning_rate": 0.0002882418531720237, "loss": 0.9748, "step": 4905 }, { "epoch": 0.59, "grad_norm": 0.20545773208141327, "learning_rate": 0.0002882018823741481, "loss": 0.972, "step": 4910 }, { "epoch": 0.59, "grad_norm": 0.17919191718101501, "learning_rate": 0.00028816184653387706, "loss": 0.9201, "step": 4915 }, { "epoch": 0.59, "grad_norm": 0.17469099164009094, "learning_rate": 0.00028812174567005285, "loss": 0.9619, "step": 4920 }, { "epoch": 0.59, "grad_norm": 0.1765175461769104, "learning_rate": 0.0002880815798015483, "loss": 0.9707, "step": 4925 }, { "epoch": 0.59, "grad_norm": 0.18801696598529816, "learning_rate": 0.00028804134894726665, "loss": 0.9369, "step": 4930 }, { "epoch": 0.59, "grad_norm": 0.19199238717556, "learning_rate": 0.00028800105312614196, "loss": 0.8942, "step": 4935 }, { "epoch": 0.6, "grad_norm": 0.17167691886425018, "learning_rate": 0.00028796069235713893, "loss": 0.8596, "step": 4940 }, { "epoch": 0.6, "grad_norm": 0.18981756269931793, "learning_rate": 0.00028792026665925245, "loss": 1.0821, "step": 4945 }, { "epoch": 0.6, "grad_norm": 0.18595005571842194, "learning_rate": 0.0002878797760515083, "loss": 1.0417, "step": 4950 }, { "epoch": 0.6, "grad_norm": 0.20380659401416779, "learning_rate": 0.0002878392205529627, "loss": 0.9396, "step": 4955 }, { "epoch": 0.6, "grad_norm": 0.21177223324775696, "learning_rate": 0.0002877986001827024, "loss": 0.9777, "step": 4960 }, { "epoch": 0.6, "grad_norm": 0.18329915404319763, "learning_rate": 0.00028775791495984474, "loss": 1.068, "step": 4965 }, { "epoch": 0.6, "grad_norm": 0.2002885490655899, "learning_rate": 0.0002877171649035375, "loss": 0.9144, "step": 4970 }, { "epoch": 0.6, "grad_norm": 0.18435192108154297, "learning_rate": 0.00028767635003295895, "loss": 0.8893, "step": 4975 }, { "epoch": 0.6, "grad_norm": 0.18662065267562866, "learning_rate": 0.000287635470367318, "loss": 0.9352, "step": 4980 }, { "epoch": 0.6, "grad_norm": 0.19780193269252777, "learning_rate": 0.000287594525925854, "loss": 0.9593, "step": 4985 }, { "epoch": 0.6, "grad_norm": 0.18069545924663544, "learning_rate": 0.0002875535167278367, "loss": 1.0398, "step": 4990 }, { "epoch": 0.6, "grad_norm": 0.1925516277551651, "learning_rate": 0.00028751244279256647, "loss": 0.9382, "step": 4995 }, { "epoch": 0.6, "grad_norm": 0.1789090633392334, "learning_rate": 0.0002874713041393739, "loss": 1.0408, "step": 5000 }, { "epoch": 0.6, "grad_norm": 0.20351684093475342, "learning_rate": 0.0002874301007876204, "loss": 1.0124, "step": 5005 }, { "epoch": 0.6, "grad_norm": 0.20231549441814423, "learning_rate": 0.00028738883275669755, "loss": 1.0305, "step": 5010 }, { "epoch": 0.6, "grad_norm": 0.17243053019046783, "learning_rate": 0.00028734750006602746, "loss": 0.9991, "step": 5015 }, { "epoch": 0.6, "grad_norm": 0.17361502349376678, "learning_rate": 0.0002873061027350627, "loss": 0.971, "step": 5020 }, { "epoch": 0.61, "grad_norm": 0.1816028356552124, "learning_rate": 0.00028726464078328615, "loss": 0.9863, "step": 5025 }, { "epoch": 0.61, "grad_norm": 0.18330328166484833, "learning_rate": 0.00028722311423021125, "loss": 1.0478, "step": 5030 }, { "epoch": 0.61, "grad_norm": 0.20598195493221283, "learning_rate": 0.00028718152309538175, "loss": 0.9304, "step": 5035 }, { "epoch": 0.61, "grad_norm": 0.19323958456516266, "learning_rate": 0.00028713986739837183, "loss": 0.9557, "step": 5040 }, { "epoch": 0.61, "grad_norm": 0.17913362383842468, "learning_rate": 0.0002870981471587861, "loss": 0.8907, "step": 5045 }, { "epoch": 0.61, "grad_norm": 0.1934191733598709, "learning_rate": 0.0002870563623962593, "loss": 0.9355, "step": 5050 }, { "epoch": 0.61, "grad_norm": 0.1913587599992752, "learning_rate": 0.00028701451313045695, "loss": 0.9905, "step": 5055 }, { "epoch": 0.61, "grad_norm": 0.19491787254810333, "learning_rate": 0.0002869725993810746, "loss": 0.9745, "step": 5060 }, { "epoch": 0.61, "grad_norm": 0.20128677785396576, "learning_rate": 0.00028693062116783816, "loss": 0.9302, "step": 5065 }, { "epoch": 0.61, "grad_norm": 0.18914251029491425, "learning_rate": 0.00028688857851050416, "loss": 0.891, "step": 5070 }, { "epoch": 0.61, "grad_norm": 0.19719275832176208, "learning_rate": 0.0002868464714288592, "loss": 0.9716, "step": 5075 }, { "epoch": 0.61, "grad_norm": 0.1894945651292801, "learning_rate": 0.0002868042999427202, "loss": 0.9554, "step": 5080 }, { "epoch": 0.61, "grad_norm": 0.1891942322254181, "learning_rate": 0.0002867620640719346, "loss": 0.8918, "step": 5085 }, { "epoch": 0.61, "grad_norm": 0.1876845359802246, "learning_rate": 0.00028671976383637984, "loss": 0.98, "step": 5090 }, { "epoch": 0.61, "grad_norm": 0.17926909029483795, "learning_rate": 0.00028667739925596395, "loss": 0.8958, "step": 5095 }, { "epoch": 0.61, "grad_norm": 0.203196182847023, "learning_rate": 0.000286634970350625, "loss": 0.9356, "step": 5100 }, { "epoch": 0.62, "grad_norm": 0.17668934166431427, "learning_rate": 0.00028659247714033154, "loss": 0.9355, "step": 5105 }, { "epoch": 0.62, "grad_norm": 0.1789340376853943, "learning_rate": 0.00028654991964508224, "loss": 1.0171, "step": 5110 }, { "epoch": 0.62, "grad_norm": 0.17405730485916138, "learning_rate": 0.00028650729788490606, "loss": 0.9634, "step": 5115 }, { "epoch": 0.62, "grad_norm": 0.1780971735715866, "learning_rate": 0.0002864646118798622, "loss": 0.9728, "step": 5120 }, { "epoch": 0.62, "grad_norm": 0.2179042249917984, "learning_rate": 0.0002864218616500402, "loss": 1.0805, "step": 5125 }, { "epoch": 0.62, "grad_norm": 0.20955954492092133, "learning_rate": 0.00028637904721555966, "loss": 0.9261, "step": 5130 }, { "epoch": 0.62, "grad_norm": 0.18898724019527435, "learning_rate": 0.00028633616859657045, "loss": 1.0134, "step": 5135 }, { "epoch": 0.62, "grad_norm": 0.20803610980510712, "learning_rate": 0.0002862932258132527, "loss": 0.9414, "step": 5140 }, { "epoch": 0.62, "grad_norm": 0.21270735561847687, "learning_rate": 0.00028625021888581685, "loss": 0.9779, "step": 5145 }, { "epoch": 0.62, "grad_norm": 0.20544709265232086, "learning_rate": 0.0002862071478345031, "loss": 0.9707, "step": 5150 }, { "epoch": 0.62, "grad_norm": 0.19069305062294006, "learning_rate": 0.00028616401267958237, "loss": 0.9954, "step": 5155 }, { "epoch": 0.62, "grad_norm": 0.19101087749004364, "learning_rate": 0.00028612081344135546, "loss": 0.9348, "step": 5160 }, { "epoch": 0.62, "grad_norm": 0.17791791260242462, "learning_rate": 0.0002860775501401532, "loss": 0.9205, "step": 5165 }, { "epoch": 0.62, "grad_norm": 0.21044310927391052, "learning_rate": 0.00028603422279633694, "loss": 0.9649, "step": 5170 }, { "epoch": 0.62, "grad_norm": 0.1965063214302063, "learning_rate": 0.0002859908314302978, "loss": 0.9648, "step": 5175 }, { "epoch": 0.62, "grad_norm": 0.19343961775302887, "learning_rate": 0.00028594737606245726, "loss": 0.932, "step": 5180 }, { "epoch": 0.62, "grad_norm": 0.18944412469863892, "learning_rate": 0.00028590385671326695, "loss": 0.9144, "step": 5185 }, { "epoch": 0.63, "grad_norm": 0.20036911964416504, "learning_rate": 0.0002858602734032084, "loss": 0.965, "step": 5190 }, { "epoch": 0.63, "grad_norm": 0.21332001686096191, "learning_rate": 0.00028581662615279345, "loss": 0.9766, "step": 5195 }, { "epoch": 0.63, "grad_norm": 0.17393194139003754, "learning_rate": 0.00028577291498256384, "loss": 0.9954, "step": 5200 }, { "epoch": 0.63, "grad_norm": 0.1827279031276703, "learning_rate": 0.0002857291399130916, "loss": 0.9646, "step": 5205 }, { "epoch": 0.63, "grad_norm": 0.190265491604805, "learning_rate": 0.0002856853009649787, "loss": 1.0151, "step": 5210 }, { "epoch": 0.63, "grad_norm": 0.18640612065792084, "learning_rate": 0.0002856413981588572, "loss": 1.0085, "step": 5215 }, { "epoch": 0.63, "grad_norm": 0.21478848159313202, "learning_rate": 0.00028559743151538913, "loss": 0.9588, "step": 5220 }, { "epoch": 0.63, "grad_norm": 0.18051999807357788, "learning_rate": 0.00028555340105526676, "loss": 0.9699, "step": 5225 }, { "epoch": 0.63, "grad_norm": 0.1969325840473175, "learning_rate": 0.0002855093067992123, "loss": 0.9939, "step": 5230 }, { "epoch": 0.63, "grad_norm": 0.2040921300649643, "learning_rate": 0.00028546514876797796, "loss": 0.8854, "step": 5235 }, { "epoch": 0.63, "grad_norm": 0.1841362863779068, "learning_rate": 0.0002854209269823459, "loss": 0.9255, "step": 5240 }, { "epoch": 0.63, "grad_norm": 0.19539733231067657, "learning_rate": 0.0002853766414631285, "loss": 1.0226, "step": 5245 }, { "epoch": 0.63, "grad_norm": 0.19817125797271729, "learning_rate": 0.0002853322922311678, "loss": 0.9626, "step": 5250 }, { "epoch": 0.63, "grad_norm": 0.21229805052280426, "learning_rate": 0.0002852878793073362, "loss": 0.957, "step": 5255 }, { "epoch": 0.63, "grad_norm": 0.19763095676898956, "learning_rate": 0.0002852434027125358, "loss": 0.8457, "step": 5260 }, { "epoch": 0.63, "grad_norm": 0.19969575107097626, "learning_rate": 0.00028519886246769884, "loss": 1.0371, "step": 5265 }, { "epoch": 0.63, "grad_norm": 0.1981430947780609, "learning_rate": 0.0002851542585937873, "loss": 0.9438, "step": 5270 }, { "epoch": 0.64, "grad_norm": 0.20760881900787354, "learning_rate": 0.0002851095911117934, "loss": 1.0244, "step": 5275 }, { "epoch": 0.64, "grad_norm": 0.21313069760799408, "learning_rate": 0.00028506486004273903, "loss": 1.0125, "step": 5280 }, { "epoch": 0.64, "grad_norm": 0.18353518843650818, "learning_rate": 0.00028502006540767616, "loss": 0.9716, "step": 5285 }, { "epoch": 0.64, "grad_norm": 0.1756833791732788, "learning_rate": 0.0002849752072276867, "loss": 1.0393, "step": 5290 }, { "epoch": 0.64, "grad_norm": 0.20792143046855927, "learning_rate": 0.00028493028552388223, "loss": 1.0315, "step": 5295 }, { "epoch": 0.64, "grad_norm": 0.2053552269935608, "learning_rate": 0.00028488530031740454, "loss": 1.0352, "step": 5300 }, { "epoch": 0.64, "grad_norm": 0.1828060746192932, "learning_rate": 0.00028484025162942516, "loss": 0.8927, "step": 5305 }, { "epoch": 0.64, "grad_norm": 0.19744126498699188, "learning_rate": 0.0002847951394811454, "loss": 0.9503, "step": 5310 }, { "epoch": 0.64, "grad_norm": 0.22091755270957947, "learning_rate": 0.0002847499638937966, "loss": 0.9118, "step": 5315 }, { "epoch": 0.64, "grad_norm": 0.19735994935035706, "learning_rate": 0.0002847047248886399, "loss": 0.9983, "step": 5320 }, { "epoch": 0.64, "grad_norm": 0.17844171822071075, "learning_rate": 0.00028465942248696624, "loss": 0.9512, "step": 5325 }, { "epoch": 0.64, "grad_norm": 0.20808811485767365, "learning_rate": 0.00028461405671009645, "loss": 0.9539, "step": 5330 }, { "epoch": 0.64, "grad_norm": 0.18508221209049225, "learning_rate": 0.00028456862757938117, "loss": 0.9328, "step": 5335 }, { "epoch": 0.64, "grad_norm": 0.1841544657945633, "learning_rate": 0.0002845231351162009, "loss": 0.9095, "step": 5340 }, { "epoch": 0.64, "grad_norm": 0.1901453733444214, "learning_rate": 0.0002844775793419659, "loss": 0.8944, "step": 5345 }, { "epoch": 0.64, "grad_norm": 0.2000284641981125, "learning_rate": 0.00028443196027811617, "loss": 0.9468, "step": 5350 }, { "epoch": 0.65, "grad_norm": 0.2096180021762848, "learning_rate": 0.0002843862779461216, "loss": 0.9388, "step": 5355 }, { "epoch": 0.65, "grad_norm": 0.18580293655395508, "learning_rate": 0.00028434053236748175, "loss": 0.9352, "step": 5360 }, { "epoch": 0.65, "grad_norm": 0.21264111995697021, "learning_rate": 0.00028429472356372606, "loss": 1.0138, "step": 5365 }, { "epoch": 0.65, "grad_norm": 0.19658392667770386, "learning_rate": 0.00028424885155641373, "loss": 0.9843, "step": 5370 }, { "epoch": 0.65, "grad_norm": 0.19093391299247742, "learning_rate": 0.00028420291636713354, "loss": 0.9271, "step": 5375 }, { "epoch": 0.65, "grad_norm": 0.21088851988315582, "learning_rate": 0.00028415691801750417, "loss": 1.0146, "step": 5380 }, { "epoch": 0.65, "grad_norm": 0.17928850650787354, "learning_rate": 0.000284110856529174, "loss": 0.9973, "step": 5385 }, { "epoch": 0.65, "grad_norm": 0.18797273933887482, "learning_rate": 0.000284064731923821, "loss": 1.0008, "step": 5390 }, { "epoch": 0.65, "grad_norm": 0.19398821890354156, "learning_rate": 0.00028401854422315306, "loss": 0.8467, "step": 5395 }, { "epoch": 0.65, "grad_norm": 0.20564605295658112, "learning_rate": 0.0002839722934489076, "loss": 0.9809, "step": 5400 }, { "epoch": 0.65, "grad_norm": 0.2124805599451065, "learning_rate": 0.0002839259796228517, "loss": 0.9094, "step": 5405 }, { "epoch": 0.65, "grad_norm": 0.16906094551086426, "learning_rate": 0.0002838796027667823, "loss": 0.9083, "step": 5410 }, { "epoch": 0.65, "grad_norm": 0.21367835998535156, "learning_rate": 0.0002838331629025258, "loss": 0.9278, "step": 5415 }, { "epoch": 0.65, "grad_norm": 0.20226390659809113, "learning_rate": 0.00028378666005193846, "loss": 0.9629, "step": 5420 }, { "epoch": 0.65, "grad_norm": 0.21728989481925964, "learning_rate": 0.0002837400942369059, "loss": 0.952, "step": 5425 }, { "epoch": 0.65, "grad_norm": 0.19252964854240417, "learning_rate": 0.0002836934654793436, "loss": 0.9497, "step": 5430 }, { "epoch": 0.65, "grad_norm": 0.21155524253845215, "learning_rate": 0.00028364677380119665, "loss": 0.9298, "step": 5435 }, { "epoch": 0.66, "grad_norm": 0.18080827593803406, "learning_rate": 0.0002836000192244397, "loss": 0.9502, "step": 5440 }, { "epoch": 0.66, "grad_norm": 0.19508054852485657, "learning_rate": 0.000283553201771077, "loss": 0.9299, "step": 5445 }, { "epoch": 0.66, "grad_norm": 0.19555960595607758, "learning_rate": 0.00028350632146314234, "loss": 0.8338, "step": 5450 }, { "epoch": 0.66, "grad_norm": 0.19276316463947296, "learning_rate": 0.00028345937832269924, "loss": 1.0148, "step": 5455 }, { "epoch": 0.66, "grad_norm": 0.19060495495796204, "learning_rate": 0.0002834123723718406, "loss": 0.9975, "step": 5460 }, { "epoch": 0.66, "grad_norm": 0.18638461828231812, "learning_rate": 0.00028336530363268903, "loss": 1.1051, "step": 5465 }, { "epoch": 0.66, "grad_norm": 0.21244901418685913, "learning_rate": 0.00028331817212739666, "loss": 0.9824, "step": 5470 }, { "epoch": 0.66, "grad_norm": 0.1781817078590393, "learning_rate": 0.0002832709778781451, "loss": 0.9437, "step": 5475 }, { "epoch": 0.66, "grad_norm": 0.21162748336791992, "learning_rate": 0.00028322372090714565, "loss": 0.8513, "step": 5480 }, { "epoch": 0.66, "grad_norm": 0.20514048635959625, "learning_rate": 0.00028317640123663886, "loss": 0.9249, "step": 5485 }, { "epoch": 0.66, "grad_norm": 0.1924484670162201, "learning_rate": 0.000283129018888895, "loss": 0.9966, "step": 5490 }, { "epoch": 0.66, "grad_norm": 0.2287726253271103, "learning_rate": 0.00028308157388621375, "loss": 1.0219, "step": 5495 }, { "epoch": 0.66, "grad_norm": 0.1756589561700821, "learning_rate": 0.00028303406625092433, "loss": 0.9488, "step": 5500 }, { "epoch": 0.66, "grad_norm": 0.23016351461410522, "learning_rate": 0.00028298649600538546, "loss": 0.9064, "step": 5505 }, { "epoch": 0.66, "grad_norm": 0.17148062586784363, "learning_rate": 0.0002829388631719852, "loss": 1.0173, "step": 5510 }, { "epoch": 0.66, "grad_norm": 0.2095944881439209, "learning_rate": 0.0002828911677731411, "loss": 0.9834, "step": 5515 }, { "epoch": 0.67, "grad_norm": 0.1759607195854187, "learning_rate": 0.00028284340983130037, "loss": 0.935, "step": 5520 }, { "epoch": 0.67, "grad_norm": 0.2132364809513092, "learning_rate": 0.0002827955893689393, "loss": 0.9313, "step": 5525 }, { "epoch": 0.67, "grad_norm": 0.18752391636371613, "learning_rate": 0.00028274770640856394, "loss": 0.8606, "step": 5530 }, { "epoch": 0.67, "grad_norm": 0.1804865151643753, "learning_rate": 0.00028269976097270946, "loss": 1.0291, "step": 5535 }, { "epoch": 0.67, "grad_norm": 0.17835399508476257, "learning_rate": 0.0002826517530839407, "loss": 0.9643, "step": 5540 }, { "epoch": 0.67, "grad_norm": 0.21218432486057281, "learning_rate": 0.0002826036827648517, "loss": 0.9989, "step": 5545 }, { "epoch": 0.67, "grad_norm": 0.20228761434555054, "learning_rate": 0.0002825555500380659, "loss": 0.9734, "step": 5550 }, { "epoch": 0.67, "grad_norm": 0.19665905833244324, "learning_rate": 0.0002825073549262363, "loss": 0.9381, "step": 5555 }, { "epoch": 0.67, "grad_norm": 0.20172518491744995, "learning_rate": 0.000282459097452045, "loss": 1.0319, "step": 5560 }, { "epoch": 0.67, "grad_norm": 0.19320924580097198, "learning_rate": 0.0002824107776382036, "loss": 1.0349, "step": 5565 }, { "epoch": 0.67, "grad_norm": 0.1959851086139679, "learning_rate": 0.00028236239550745305, "loss": 0.9762, "step": 5570 }, { "epoch": 0.67, "grad_norm": 0.18430382013320923, "learning_rate": 0.00028231395108256353, "loss": 1.0082, "step": 5575 }, { "epoch": 0.67, "grad_norm": 0.18709762394428253, "learning_rate": 0.0002822654443863346, "loss": 0.9993, "step": 5580 }, { "epoch": 0.67, "grad_norm": 0.20324410498142242, "learning_rate": 0.00028221687544159515, "loss": 0.9235, "step": 5585 }, { "epoch": 0.67, "grad_norm": 0.1890849769115448, "learning_rate": 0.0002821682442712033, "loss": 1.0188, "step": 5590 }, { "epoch": 0.67, "grad_norm": 0.19836723804473877, "learning_rate": 0.00028211955089804664, "loss": 0.9934, "step": 5595 }, { "epoch": 0.67, "grad_norm": 0.17568111419677734, "learning_rate": 0.00028207079534504166, "loss": 0.8919, "step": 5600 }, { "epoch": 0.68, "grad_norm": 0.20651723444461823, "learning_rate": 0.0002820219776351345, "loss": 0.9338, "step": 5605 }, { "epoch": 0.68, "grad_norm": 0.18715310096740723, "learning_rate": 0.00028197309779130037, "loss": 0.9869, "step": 5610 }, { "epoch": 0.68, "grad_norm": 0.20862345397472382, "learning_rate": 0.0002819241558365437, "loss": 0.9206, "step": 5615 }, { "epoch": 0.68, "grad_norm": 0.19458311796188354, "learning_rate": 0.00028187515179389826, "loss": 0.9267, "step": 5620 }, { "epoch": 0.68, "grad_norm": 0.17458118498325348, "learning_rate": 0.0002818260856864269, "loss": 0.879, "step": 5625 }, { "epoch": 0.68, "grad_norm": 0.19164584577083588, "learning_rate": 0.0002817769575372218, "loss": 0.9952, "step": 5630 }, { "epoch": 0.68, "grad_norm": 0.1988450139760971, "learning_rate": 0.00028172776736940436, "loss": 0.8229, "step": 5635 }, { "epoch": 0.68, "grad_norm": 0.2000226527452469, "learning_rate": 0.000281678515206125, "loss": 0.9074, "step": 5640 }, { "epoch": 0.68, "grad_norm": 0.21437396109104156, "learning_rate": 0.0002816292010705635, "loss": 0.9317, "step": 5645 }, { "epoch": 0.68, "grad_norm": 0.18313203752040863, "learning_rate": 0.0002815798249859287, "loss": 0.9046, "step": 5650 }, { "epoch": 0.68, "grad_norm": 0.17462490499019623, "learning_rate": 0.00028153038697545867, "loss": 1.0031, "step": 5655 }, { "epoch": 0.68, "grad_norm": 0.2010510116815567, "learning_rate": 0.0002814808870624205, "loss": 0.9259, "step": 5660 }, { "epoch": 0.68, "grad_norm": 0.21494421362876892, "learning_rate": 0.00028143132527011055, "loss": 0.961, "step": 5665 }, { "epoch": 0.68, "grad_norm": 0.20886385440826416, "learning_rate": 0.00028138170162185424, "loss": 0.863, "step": 5670 }, { "epoch": 0.68, "grad_norm": 0.204929918050766, "learning_rate": 0.00028133201614100604, "loss": 0.948, "step": 5675 }, { "epoch": 0.68, "grad_norm": 0.21892932057380676, "learning_rate": 0.0002812822688509497, "loss": 0.9707, "step": 5680 }, { "epoch": 0.68, "grad_norm": 0.2091946005821228, "learning_rate": 0.00028123245977509784, "loss": 0.9417, "step": 5685 }, { "epoch": 0.69, "grad_norm": 0.21841882169246674, "learning_rate": 0.0002811825889368924, "loss": 1.0073, "step": 5690 }, { "epoch": 0.69, "grad_norm": 0.17423883080482483, "learning_rate": 0.0002811326563598041, "loss": 0.9885, "step": 5695 }, { "epoch": 0.69, "grad_norm": 0.22584623098373413, "learning_rate": 0.0002810826620673329, "loss": 0.9202, "step": 5700 }, { "epoch": 0.69, "grad_norm": 0.2022750824689865, "learning_rate": 0.00028103260608300785, "loss": 1.1009, "step": 5705 }, { "epoch": 0.69, "grad_norm": 0.20764531195163727, "learning_rate": 0.00028098248843038694, "loss": 0.9689, "step": 5710 }, { "epoch": 0.69, "grad_norm": 0.2145623415708542, "learning_rate": 0.00028093230913305716, "loss": 0.9474, "step": 5715 }, { "epoch": 0.69, "grad_norm": 0.1971713900566101, "learning_rate": 0.00028088206821463456, "loss": 0.8979, "step": 5720 }, { "epoch": 0.69, "grad_norm": 0.18897688388824463, "learning_rate": 0.00028083176569876426, "loss": 0.9777, "step": 5725 }, { "epoch": 0.69, "grad_norm": 0.20765312016010284, "learning_rate": 0.00028078140160912017, "loss": 0.8788, "step": 5730 }, { "epoch": 0.69, "grad_norm": 0.2097761631011963, "learning_rate": 0.00028073097596940537, "loss": 0.9324, "step": 5735 }, { "epoch": 0.69, "grad_norm": 0.19260716438293457, "learning_rate": 0.0002806804888033519, "loss": 0.8419, "step": 5740 }, { "epoch": 0.69, "grad_norm": 0.209737628698349, "learning_rate": 0.0002806299401347206, "loss": 0.8578, "step": 5745 }, { "epoch": 0.69, "grad_norm": 0.1828380972146988, "learning_rate": 0.00028057932998730136, "loss": 0.9546, "step": 5750 }, { "epoch": 0.69, "grad_norm": 0.19403524696826935, "learning_rate": 0.00028052865838491304, "loss": 1.0046, "step": 5755 }, { "epoch": 0.69, "grad_norm": 0.21876457333564758, "learning_rate": 0.00028047792535140336, "loss": 0.9358, "step": 5760 }, { "epoch": 0.69, "grad_norm": 0.182986319065094, "learning_rate": 0.00028042713091064894, "loss": 0.9362, "step": 5765 }, { "epoch": 0.7, "grad_norm": 0.1893165558576584, "learning_rate": 0.00028037627508655546, "loss": 0.9647, "step": 5770 }, { "epoch": 0.7, "grad_norm": 0.20949877798557281, "learning_rate": 0.0002803253579030572, "loss": 0.9254, "step": 5775 }, { "epoch": 0.7, "grad_norm": 0.20711813867092133, "learning_rate": 0.0002802743793841175, "loss": 0.9423, "step": 5780 }, { "epoch": 0.7, "grad_norm": 0.20353513956069946, "learning_rate": 0.00028022333955372863, "loss": 0.9021, "step": 5785 }, { "epoch": 0.7, "grad_norm": 0.17357411980628967, "learning_rate": 0.0002801722384359116, "loss": 1.0085, "step": 5790 }, { "epoch": 0.7, "grad_norm": 0.20761722326278687, "learning_rate": 0.0002801210760547162, "loss": 0.9346, "step": 5795 }, { "epoch": 0.7, "grad_norm": 0.2074555903673172, "learning_rate": 0.00028006985243422133, "loss": 0.9919, "step": 5800 }, { "epoch": 0.7, "grad_norm": 0.17981496453285217, "learning_rate": 0.00028001856759853435, "loss": 0.9016, "step": 5805 }, { "epoch": 0.7, "grad_norm": 0.19092728197574615, "learning_rate": 0.0002799672215717917, "loss": 0.9511, "step": 5810 }, { "epoch": 0.7, "grad_norm": 0.20729589462280273, "learning_rate": 0.0002799158143781585, "loss": 0.9397, "step": 5815 }, { "epoch": 0.7, "grad_norm": 0.19325435161590576, "learning_rate": 0.0002798643460418286, "loss": 1.0048, "step": 5820 }, { "epoch": 0.7, "grad_norm": 0.2112487405538559, "learning_rate": 0.0002798128165870249, "loss": 1.0018, "step": 5825 }, { "epoch": 0.7, "grad_norm": 0.20866331458091736, "learning_rate": 0.0002797612260379987, "loss": 1.0327, "step": 5830 }, { "epoch": 0.7, "grad_norm": 0.2057882696390152, "learning_rate": 0.0002797095744190303, "loss": 0.9205, "step": 5835 }, { "epoch": 0.7, "grad_norm": 0.22236908972263336, "learning_rate": 0.00027965786175442866, "loss": 0.9787, "step": 5840 }, { "epoch": 0.7, "grad_norm": 0.22123856842517853, "learning_rate": 0.0002796060880685315, "loss": 0.9991, "step": 5845 }, { "epoch": 0.7, "grad_norm": 0.19182252883911133, "learning_rate": 0.0002795542533857052, "loss": 0.9982, "step": 5850 }, { "epoch": 0.71, "grad_norm": 0.1859193593263626, "learning_rate": 0.00027950235773034485, "loss": 0.8689, "step": 5855 }, { "epoch": 0.71, "grad_norm": 0.20256547629833221, "learning_rate": 0.0002794504011268744, "loss": 0.9262, "step": 5860 }, { "epoch": 0.71, "grad_norm": 0.19035175442695618, "learning_rate": 0.0002793983835997462, "loss": 0.8788, "step": 5865 }, { "epoch": 0.71, "grad_norm": 0.22139105200767517, "learning_rate": 0.00027934630517344145, "loss": 0.9814, "step": 5870 }, { "epoch": 0.71, "grad_norm": 0.19183175265789032, "learning_rate": 0.00027929416587247014, "loss": 0.8917, "step": 5875 }, { "epoch": 0.71, "grad_norm": 0.2111898511648178, "learning_rate": 0.00027924196572137055, "loss": 0.9915, "step": 5880 }, { "epoch": 0.71, "grad_norm": 0.21190297603607178, "learning_rate": 0.00027918970474470996, "loss": 1.0738, "step": 5885 }, { "epoch": 0.71, "grad_norm": 0.18288253247737885, "learning_rate": 0.00027913738296708404, "loss": 0.9588, "step": 5890 }, { "epoch": 0.71, "grad_norm": 0.2102237343788147, "learning_rate": 0.0002790850004131171, "loss": 0.8978, "step": 5895 }, { "epoch": 0.71, "grad_norm": 0.22349348664283752, "learning_rate": 0.0002790325571074623, "loss": 0.8554, "step": 5900 }, { "epoch": 0.71, "grad_norm": 0.19603189826011658, "learning_rate": 0.000278980053074801, "loss": 0.8539, "step": 5905 }, { "epoch": 0.71, "grad_norm": 0.19490835070610046, "learning_rate": 0.0002789274883398435, "loss": 0.9656, "step": 5910 }, { "epoch": 0.71, "grad_norm": 0.19723361730575562, "learning_rate": 0.0002788748629273284, "loss": 0.8647, "step": 5915 }, { "epoch": 0.71, "grad_norm": 0.224490687251091, "learning_rate": 0.00027882217686202304, "loss": 1.0071, "step": 5920 }, { "epoch": 0.71, "grad_norm": 0.19954900443553925, "learning_rate": 0.0002787694301687231, "loss": 1.0931, "step": 5925 }, { "epoch": 0.71, "grad_norm": 0.20417670905590057, "learning_rate": 0.00027871662287225316, "loss": 0.9013, "step": 5930 }, { "epoch": 0.72, "grad_norm": 0.20717184245586395, "learning_rate": 0.00027866375499746587, "loss": 1.0245, "step": 5935 }, { "epoch": 0.72, "grad_norm": 0.1946130394935608, "learning_rate": 0.0002786108265692427, "loss": 1.0055, "step": 5940 }, { "epoch": 0.72, "grad_norm": 0.19675332307815552, "learning_rate": 0.0002785578376124935, "loss": 0.9285, "step": 5945 }, { "epoch": 0.72, "grad_norm": 0.20337584614753723, "learning_rate": 0.00027850478815215666, "loss": 0.9615, "step": 5950 }, { "epoch": 0.72, "grad_norm": 0.2144574224948883, "learning_rate": 0.000278451678213199, "loss": 0.9641, "step": 5955 }, { "epoch": 0.72, "grad_norm": 0.19476327300071716, "learning_rate": 0.0002783985078206158, "loss": 0.8523, "step": 5960 }, { "epoch": 0.72, "grad_norm": 0.21178047358989716, "learning_rate": 0.00027834527699943087, "loss": 0.9104, "step": 5965 }, { "epoch": 0.72, "grad_norm": 0.20542141795158386, "learning_rate": 0.00027829198577469636, "loss": 0.9744, "step": 5970 }, { "epoch": 0.72, "grad_norm": 0.18979620933532715, "learning_rate": 0.0002782386341714929, "loss": 0.9429, "step": 5975 }, { "epoch": 0.72, "grad_norm": 0.18054763972759247, "learning_rate": 0.00027818522221492953, "loss": 0.9203, "step": 5980 }, { "epoch": 0.72, "grad_norm": 0.1968124806880951, "learning_rate": 0.0002781317499301437, "loss": 0.974, "step": 5985 }, { "epoch": 0.72, "grad_norm": 0.2041236311197281, "learning_rate": 0.0002780782173423012, "loss": 0.9027, "step": 5990 }, { "epoch": 0.72, "grad_norm": 0.19817684590816498, "learning_rate": 0.0002780246244765963, "loss": 0.9473, "step": 5995 }, { "epoch": 0.72, "grad_norm": 0.19076892733573914, "learning_rate": 0.0002779709713582515, "loss": 0.9915, "step": 6000 }, { "epoch": 0.72, "grad_norm": 0.1970764398574829, "learning_rate": 0.00027791725801251785, "loss": 0.9481, "step": 6005 }, { "epoch": 0.72, "grad_norm": 0.20971329510211945, "learning_rate": 0.00027786348446467453, "loss": 0.996, "step": 6010 }, { "epoch": 0.72, "grad_norm": 0.18908850848674774, "learning_rate": 0.00027780965074002925, "loss": 0.9819, "step": 6015 }, { "epoch": 0.73, "grad_norm": 0.1883561760187149, "learning_rate": 0.0002777557568639179, "loss": 0.915, "step": 6020 }, { "epoch": 0.73, "grad_norm": 0.20280209183692932, "learning_rate": 0.0002777018028617047, "loss": 0.8848, "step": 6025 }, { "epoch": 0.73, "grad_norm": 0.23292230069637299, "learning_rate": 0.00027764778875878225, "loss": 1.0372, "step": 6030 }, { "epoch": 0.73, "grad_norm": 0.20740561187267303, "learning_rate": 0.0002775937145805714, "loss": 0.8555, "step": 6035 }, { "epoch": 0.73, "grad_norm": 0.22304105758666992, "learning_rate": 0.0002775395803525211, "loss": 1.0331, "step": 6040 }, { "epoch": 0.73, "grad_norm": 0.24742744863033295, "learning_rate": 0.0002774853861001089, "loss": 0.9875, "step": 6045 }, { "epoch": 0.73, "grad_norm": 0.19389699399471283, "learning_rate": 0.0002774311318488404, "loss": 0.9054, "step": 6050 }, { "epoch": 0.73, "grad_norm": 0.20694305002689362, "learning_rate": 0.00027737681762424936, "loss": 0.9755, "step": 6055 }, { "epoch": 0.73, "grad_norm": 0.21031732857227325, "learning_rate": 0.0002773224434518978, "loss": 0.9667, "step": 6060 }, { "epoch": 0.73, "grad_norm": 0.22336505353450775, "learning_rate": 0.0002772680093573762, "loss": 0.9697, "step": 6065 }, { "epoch": 0.73, "grad_norm": 0.2182660698890686, "learning_rate": 0.0002772135153663029, "loss": 1.0385, "step": 6070 }, { "epoch": 0.73, "grad_norm": 0.1784467101097107, "learning_rate": 0.00027715896150432473, "loss": 0.868, "step": 6075 }, { "epoch": 0.73, "grad_norm": 0.2188597470521927, "learning_rate": 0.0002771043477971164, "loss": 0.8924, "step": 6080 }, { "epoch": 0.73, "grad_norm": 0.2050686776638031, "learning_rate": 0.000277049674270381, "loss": 0.9832, "step": 6085 }, { "epoch": 0.73, "grad_norm": 0.1878337264060974, "learning_rate": 0.00027699494094984964, "loss": 0.935, "step": 6090 }, { "epoch": 0.73, "grad_norm": 0.20712505280971527, "learning_rate": 0.00027694014786128175, "loss": 0.9392, "step": 6095 }, { "epoch": 0.73, "grad_norm": 0.2205507606267929, "learning_rate": 0.00027688529503046473, "loss": 0.9543, "step": 6100 }, { "epoch": 0.74, "grad_norm": 0.21329660713672638, "learning_rate": 0.0002768303824832141, "loss": 0.9957, "step": 6105 }, { "epoch": 0.74, "grad_norm": 0.22187063097953796, "learning_rate": 0.00027677541024537363, "loss": 0.8959, "step": 6110 }, { "epoch": 0.74, "grad_norm": 0.2496599406003952, "learning_rate": 0.00027672037834281497, "loss": 0.9699, "step": 6115 }, { "epoch": 0.74, "grad_norm": 0.20457853376865387, "learning_rate": 0.000276665286801438, "loss": 0.9096, "step": 6120 }, { "epoch": 0.74, "grad_norm": 0.2040635645389557, "learning_rate": 0.00027661013564717063, "loss": 0.9936, "step": 6125 }, { "epoch": 0.74, "grad_norm": 0.203216552734375, "learning_rate": 0.00027655492490596885, "loss": 0.9246, "step": 6130 }, { "epoch": 0.74, "grad_norm": 0.18687255680561066, "learning_rate": 0.0002764996546038167, "loss": 0.8891, "step": 6135 }, { "epoch": 0.74, "grad_norm": 0.2031632959842682, "learning_rate": 0.00027644432476672614, "loss": 1.0533, "step": 6140 }, { "epoch": 0.74, "grad_norm": 0.19905006885528564, "learning_rate": 0.00027638893542073726, "loss": 0.9701, "step": 6145 }, { "epoch": 0.74, "grad_norm": 0.21555280685424805, "learning_rate": 0.0002763334865919181, "loss": 0.8389, "step": 6150 }, { "epoch": 0.74, "grad_norm": 0.2201685756444931, "learning_rate": 0.00027627797830636475, "loss": 0.9749, "step": 6155 }, { "epoch": 0.74, "grad_norm": 0.20311476290225983, "learning_rate": 0.00027622241059020123, "loss": 0.914, "step": 6160 }, { "epoch": 0.74, "grad_norm": 0.2248125523328781, "learning_rate": 0.0002761667834695796, "loss": 0.9121, "step": 6165 }, { "epoch": 0.74, "grad_norm": 0.19576215744018555, "learning_rate": 0.00027611109697067975, "loss": 0.8951, "step": 6170 }, { "epoch": 0.74, "grad_norm": 0.22087323665618896, "learning_rate": 0.00027605535111970974, "loss": 0.8982, "step": 6175 }, { "epoch": 0.74, "grad_norm": 0.19735217094421387, "learning_rate": 0.0002759995459429053, "loss": 1.0688, "step": 6180 }, { "epoch": 0.75, "grad_norm": 0.20120544731616974, "learning_rate": 0.00027594368146653013, "loss": 0.8408, "step": 6185 }, { "epoch": 0.75, "grad_norm": 0.18868596851825714, "learning_rate": 0.0002758877577168761, "loss": 0.9014, "step": 6190 }, { "epoch": 0.75, "grad_norm": 0.19567087292671204, "learning_rate": 0.00027583177472026264, "loss": 0.9612, "step": 6195 }, { "epoch": 0.75, "grad_norm": 0.19370317459106445, "learning_rate": 0.00027577573250303725, "loss": 0.9728, "step": 6200 }, { "epoch": 0.75, "grad_norm": 0.20244130492210388, "learning_rate": 0.00027571963109157533, "loss": 1.0169, "step": 6205 }, { "epoch": 0.75, "grad_norm": 0.19868247210979462, "learning_rate": 0.0002756634705122799, "loss": 0.9771, "step": 6210 }, { "epoch": 0.75, "grad_norm": 0.20510122179985046, "learning_rate": 0.00027560725079158214, "loss": 0.958, "step": 6215 }, { "epoch": 0.75, "grad_norm": 0.21019263565540314, "learning_rate": 0.00027555097195594086, "loss": 0.9748, "step": 6220 }, { "epoch": 0.75, "grad_norm": 0.21073760092258453, "learning_rate": 0.0002754946340318428, "loss": 0.9534, "step": 6225 }, { "epoch": 0.75, "grad_norm": 0.23775875568389893, "learning_rate": 0.0002754382370458024, "loss": 0.9429, "step": 6230 }, { "epoch": 0.75, "grad_norm": 0.19734768569469452, "learning_rate": 0.00027538178102436194, "loss": 0.9563, "step": 6235 }, { "epoch": 0.75, "grad_norm": 0.20335164666175842, "learning_rate": 0.00027532526599409154, "loss": 0.9426, "step": 6240 }, { "epoch": 0.75, "grad_norm": 0.20909333229064941, "learning_rate": 0.000275268691981589, "loss": 0.9798, "step": 6245 }, { "epoch": 0.75, "grad_norm": 0.21021656692028046, "learning_rate": 0.00027521205901348, "loss": 0.9096, "step": 6250 }, { "epoch": 0.75, "grad_norm": 0.17861585319042206, "learning_rate": 0.0002751553671164179, "loss": 0.9817, "step": 6255 }, { "epoch": 0.75, "grad_norm": 0.2392309159040451, "learning_rate": 0.00027509861631708373, "loss": 0.9609, "step": 6260 }, { "epoch": 0.75, "grad_norm": 0.2181014120578766, "learning_rate": 0.0002750418066421863, "loss": 0.9862, "step": 6265 }, { "epoch": 0.76, "grad_norm": 0.21585321426391602, "learning_rate": 0.0002749849381184622, "loss": 0.9432, "step": 6270 }, { "epoch": 0.76, "grad_norm": 0.203776016831398, "learning_rate": 0.00027492801077267563, "loss": 0.9256, "step": 6275 }, { "epoch": 0.76, "grad_norm": 0.19271011650562286, "learning_rate": 0.00027487102463161835, "loss": 0.9535, "step": 6280 }, { "epoch": 0.76, "grad_norm": 0.20827655494213104, "learning_rate": 0.00027481397972211013, "loss": 1.0035, "step": 6285 }, { "epoch": 0.76, "grad_norm": 0.2051791101694107, "learning_rate": 0.0002747568760709981, "loss": 0.9953, "step": 6290 }, { "epoch": 0.76, "grad_norm": 0.2057846039533615, "learning_rate": 0.0002746997137051571, "loss": 0.9733, "step": 6295 }, { "epoch": 0.76, "grad_norm": 0.1999838501214981, "learning_rate": 0.00027464249265148966, "loss": 0.9024, "step": 6300 }, { "epoch": 0.76, "grad_norm": 0.22202569246292114, "learning_rate": 0.000274585212936926, "loss": 1.0046, "step": 6305 }, { "epoch": 0.76, "grad_norm": 0.20919722318649292, "learning_rate": 0.00027452787458842376, "loss": 0.9343, "step": 6310 }, { "epoch": 0.76, "grad_norm": 0.19470633566379547, "learning_rate": 0.0002744704776329683, "loss": 0.9979, "step": 6315 }, { "epoch": 0.76, "grad_norm": 0.21001891791820526, "learning_rate": 0.0002744130220975725, "loss": 0.9753, "step": 6320 }, { "epoch": 0.76, "grad_norm": 0.21184568107128143, "learning_rate": 0.0002743555080092769, "loss": 1.0369, "step": 6325 }, { "epoch": 0.76, "grad_norm": 0.18605372309684753, "learning_rate": 0.00027429793539514953, "loss": 0.9481, "step": 6330 }, { "epoch": 0.76, "grad_norm": 0.23986585438251495, "learning_rate": 0.0002742403042822859, "loss": 0.9951, "step": 6335 }, { "epoch": 0.76, "grad_norm": 0.20543424785137177, "learning_rate": 0.00027418261469780924, "loss": 0.9394, "step": 6340 }, { "epoch": 0.76, "grad_norm": 0.2040044069290161, "learning_rate": 0.00027412486666887007, "loss": 0.9354, "step": 6345 }, { "epoch": 0.77, "grad_norm": 0.22892433404922485, "learning_rate": 0.0002740670602226466, "loss": 0.9778, "step": 6350 }, { "epoch": 0.77, "grad_norm": 0.19521504640579224, "learning_rate": 0.00027400919538634444, "loss": 0.958, "step": 6355 }, { "epoch": 0.77, "grad_norm": 0.23373307287693024, "learning_rate": 0.00027395127218719675, "loss": 0.9722, "step": 6360 }, { "epoch": 0.77, "grad_norm": 0.1768098771572113, "learning_rate": 0.0002738932906524641, "loss": 0.9762, "step": 6365 }, { "epoch": 0.77, "grad_norm": 0.22005042433738708, "learning_rate": 0.00027383525080943447, "loss": 0.8488, "step": 6370 }, { "epoch": 0.77, "grad_norm": 0.1887035220861435, "learning_rate": 0.00027377715268542334, "loss": 0.9401, "step": 6375 }, { "epoch": 0.77, "grad_norm": 0.192814439535141, "learning_rate": 0.0002737189963077737, "loss": 0.8912, "step": 6380 }, { "epoch": 0.77, "grad_norm": 0.2182081937789917, "learning_rate": 0.00027366078170385573, "loss": 0.9403, "step": 6385 }, { "epoch": 0.77, "grad_norm": 0.1897241324186325, "learning_rate": 0.0002736025089010673, "loss": 0.9351, "step": 6390 }, { "epoch": 0.77, "grad_norm": 0.21122263371944427, "learning_rate": 0.0002735441779268335, "loss": 1.0048, "step": 6395 }, { "epoch": 0.77, "grad_norm": 0.19820435345172882, "learning_rate": 0.00027348578880860677, "loss": 0.9342, "step": 6400 }, { "epoch": 0.77, "grad_norm": 0.1979910135269165, "learning_rate": 0.0002734273415738669, "loss": 0.9044, "step": 6405 }, { "epoch": 0.77, "grad_norm": 0.19765061140060425, "learning_rate": 0.0002733688362501213, "loss": 1.0196, "step": 6410 }, { "epoch": 0.77, "grad_norm": 0.19544093310832977, "learning_rate": 0.0002733102728649044, "loss": 0.8445, "step": 6415 }, { "epoch": 0.77, "grad_norm": 0.20427776873111725, "learning_rate": 0.00027325165144577804, "loss": 0.924, "step": 6420 }, { "epoch": 0.77, "grad_norm": 0.26921671628952026, "learning_rate": 0.0002731929720203315, "loss": 0.9489, "step": 6425 }, { "epoch": 0.77, "grad_norm": 0.21001920104026794, "learning_rate": 0.00027313423461618116, "loss": 0.9385, "step": 6430 }, { "epoch": 0.78, "grad_norm": 0.21902385354042053, "learning_rate": 0.0002730754392609708, "loss": 0.9673, "step": 6435 }, { "epoch": 0.78, "grad_norm": 0.22075380384922028, "learning_rate": 0.0002730165859823716, "loss": 0.8973, "step": 6440 }, { "epoch": 0.78, "grad_norm": 0.22786179184913635, "learning_rate": 0.0002729576748080818, "loss": 0.9234, "step": 6445 }, { "epoch": 0.78, "grad_norm": 0.20489779114723206, "learning_rate": 0.00027289870576582695, "loss": 0.92, "step": 6450 }, { "epoch": 0.78, "grad_norm": 0.19824951887130737, "learning_rate": 0.0002728396788833598, "loss": 0.9119, "step": 6455 }, { "epoch": 0.78, "grad_norm": 0.23278149962425232, "learning_rate": 0.0002727805941884603, "loss": 1.0518, "step": 6460 }, { "epoch": 0.78, "grad_norm": 0.22010761499404907, "learning_rate": 0.00027272145170893585, "loss": 0.9255, "step": 6465 }, { "epoch": 0.78, "grad_norm": 0.22987917065620422, "learning_rate": 0.00027266225147262073, "loss": 0.9481, "step": 6470 }, { "epoch": 0.78, "grad_norm": 0.19295156002044678, "learning_rate": 0.00027260299350737656, "loss": 0.9497, "step": 6475 }, { "epoch": 0.78, "grad_norm": 0.18979798257350922, "learning_rate": 0.0002725436778410922, "loss": 0.8827, "step": 6480 }, { "epoch": 0.78, "grad_norm": 0.21442236006259918, "learning_rate": 0.00027248430450168345, "loss": 0.8832, "step": 6485 }, { "epoch": 0.78, "grad_norm": 0.2078474760055542, "learning_rate": 0.0002724248735170934, "loss": 0.9101, "step": 6490 }, { "epoch": 0.78, "grad_norm": 0.1883726716041565, "learning_rate": 0.00027236538491529235, "loss": 0.8445, "step": 6495 }, { "epoch": 0.78, "grad_norm": 0.1775909662246704, "learning_rate": 0.0002723058387242775, "loss": 0.9911, "step": 6500 }, { "epoch": 0.78, "grad_norm": 0.2114265412092209, "learning_rate": 0.0002722462349720733, "loss": 1.0032, "step": 6505 }, { "epoch": 0.78, "grad_norm": 0.18201524019241333, "learning_rate": 0.0002721865736867312, "loss": 1.0234, "step": 6510 }, { "epoch": 0.78, "grad_norm": 0.2337150126695633, "learning_rate": 0.00027212685489632986, "loss": 0.9353, "step": 6515 }, { "epoch": 0.79, "grad_norm": 0.19889463484287262, "learning_rate": 0.0002720670786289749, "loss": 0.8353, "step": 6520 }, { "epoch": 0.79, "grad_norm": 0.19570806622505188, "learning_rate": 0.000272007244912799, "loss": 0.9103, "step": 6525 }, { "epoch": 0.79, "grad_norm": 0.216067835688591, "learning_rate": 0.0002719473537759619, "loss": 0.9829, "step": 6530 }, { "epoch": 0.79, "grad_norm": 0.23183125257492065, "learning_rate": 0.0002718874052466504, "loss": 0.9251, "step": 6535 }, { "epoch": 0.79, "grad_norm": 0.22221823036670685, "learning_rate": 0.00027182739935307826, "loss": 0.9561, "step": 6540 }, { "epoch": 0.79, "grad_norm": 0.22014914453029633, "learning_rate": 0.00027176733612348616, "loss": 0.9561, "step": 6545 }, { "epoch": 0.79, "grad_norm": 0.20240789651870728, "learning_rate": 0.0002717072155861419, "loss": 0.9529, "step": 6550 }, { "epoch": 0.79, "grad_norm": 0.181975319981575, "learning_rate": 0.0002716470377693403, "loss": 0.8618, "step": 6555 }, { "epoch": 0.79, "grad_norm": 0.18764451146125793, "learning_rate": 0.0002715868027014029, "loss": 1.0196, "step": 6560 }, { "epoch": 0.79, "grad_norm": 0.20951347053050995, "learning_rate": 0.0002715265104106784, "loss": 0.952, "step": 6565 }, { "epoch": 0.79, "grad_norm": 0.2056897133588791, "learning_rate": 0.0002714661609255423, "loss": 0.9513, "step": 6570 }, { "epoch": 0.79, "grad_norm": 0.22014079988002777, "learning_rate": 0.0002714057542743971, "loss": 0.9351, "step": 6575 }, { "epoch": 0.79, "grad_norm": 0.20651625096797943, "learning_rate": 0.00027134529048567223, "loss": 0.9362, "step": 6580 }, { "epoch": 0.79, "grad_norm": 0.2232450395822525, "learning_rate": 0.00027128476958782386, "loss": 1.0102, "step": 6585 }, { "epoch": 0.79, "grad_norm": 0.2110331654548645, "learning_rate": 0.00027122419160933515, "loss": 0.9414, "step": 6590 }, { "epoch": 0.79, "grad_norm": 0.20717830955982208, "learning_rate": 0.0002711635565787162, "loss": 0.9147, "step": 6595 }, { "epoch": 0.8, "grad_norm": 0.20119911432266235, "learning_rate": 0.00027110286452450375, "loss": 0.892, "step": 6600 }, { "epoch": 0.8, "grad_norm": 0.22192524373531342, "learning_rate": 0.0002710421154752616, "loss": 0.9916, "step": 6605 }, { "epoch": 0.8, "grad_norm": 0.19518253207206726, "learning_rate": 0.0002709813094595802, "loss": 0.8616, "step": 6610 }, { "epoch": 0.8, "grad_norm": 0.21804095804691315, "learning_rate": 0.0002709204465060769, "loss": 0.939, "step": 6615 }, { "epoch": 0.8, "grad_norm": 0.23485125601291656, "learning_rate": 0.00027085952664339586, "loss": 0.9235, "step": 6620 }, { "epoch": 0.8, "grad_norm": 0.21770937740802765, "learning_rate": 0.00027079854990020793, "loss": 0.9558, "step": 6625 }, { "epoch": 0.8, "grad_norm": 0.19482004642486572, "learning_rate": 0.00027073751630521083, "loss": 0.9604, "step": 6630 }, { "epoch": 0.8, "grad_norm": 0.1984790414571762, "learning_rate": 0.000270676425887129, "loss": 0.9706, "step": 6635 }, { "epoch": 0.8, "grad_norm": 0.1953192949295044, "learning_rate": 0.0002706152786747136, "loss": 1.0016, "step": 6640 }, { "epoch": 0.8, "grad_norm": 0.19475625455379486, "learning_rate": 0.00027055407469674255, "loss": 0.9146, "step": 6645 }, { "epoch": 0.8, "grad_norm": 0.2099728286266327, "learning_rate": 0.00027049281398202046, "loss": 0.9263, "step": 6650 }, { "epoch": 0.8, "grad_norm": 0.2410629391670227, "learning_rate": 0.00027043149655937864, "loss": 0.9945, "step": 6655 }, { "epoch": 0.8, "grad_norm": 0.21162976324558258, "learning_rate": 0.0002703701224576752, "loss": 0.8995, "step": 6660 }, { "epoch": 0.8, "grad_norm": 0.20428724586963654, "learning_rate": 0.0002703086917057947, "loss": 0.899, "step": 6665 }, { "epoch": 0.8, "grad_norm": 0.22366644442081451, "learning_rate": 0.0002702472043326486, "loss": 1.0311, "step": 6670 }, { "epoch": 0.8, "grad_norm": 0.20753750205039978, "learning_rate": 0.00027018566036717483, "loss": 0.9162, "step": 6675 }, { "epoch": 0.8, "grad_norm": 0.20095977187156677, "learning_rate": 0.00027012405983833806, "loss": 0.9609, "step": 6680 }, { "epoch": 0.81, "grad_norm": 0.1888018697500229, "learning_rate": 0.00027006240277512955, "loss": 0.9741, "step": 6685 }, { "epoch": 0.81, "grad_norm": 0.21836860477924347, "learning_rate": 0.0002700006892065671, "loss": 0.9016, "step": 6690 }, { "epoch": 0.81, "grad_norm": 0.23613397777080536, "learning_rate": 0.0002699389191616952, "loss": 0.8804, "step": 6695 }, { "epoch": 0.81, "grad_norm": 0.2188320904970169, "learning_rate": 0.00026987709266958497, "loss": 0.8731, "step": 6700 }, { "epoch": 0.81, "grad_norm": 0.21378405392169952, "learning_rate": 0.0002698152097593339, "loss": 1.0018, "step": 6705 }, { "epoch": 0.81, "grad_norm": 0.2068474143743515, "learning_rate": 0.0002697532704600662, "loss": 0.8768, "step": 6710 }, { "epoch": 0.81, "grad_norm": 0.20965330302715302, "learning_rate": 0.0002696912748009325, "loss": 1.071, "step": 6715 }, { "epoch": 0.81, "grad_norm": 0.2051534354686737, "learning_rate": 0.00026962922281111, "loss": 0.8599, "step": 6720 }, { "epoch": 0.81, "grad_norm": 0.20214852690696716, "learning_rate": 0.0002695671145198026, "loss": 0.974, "step": 6725 }, { "epoch": 0.81, "grad_norm": 0.23140518367290497, "learning_rate": 0.00026950494995624035, "loss": 0.8391, "step": 6730 }, { "epoch": 0.81, "grad_norm": 0.22693967819213867, "learning_rate": 0.00026944272914968, "loss": 0.9452, "step": 6735 }, { "epoch": 0.81, "grad_norm": 0.19972123205661774, "learning_rate": 0.00026938045212940477, "loss": 0.8612, "step": 6740 }, { "epoch": 0.81, "grad_norm": 0.1956896334886551, "learning_rate": 0.00026931811892472423, "loss": 0.9701, "step": 6745 }, { "epoch": 0.81, "grad_norm": 0.21115869283676147, "learning_rate": 0.00026925572956497455, "loss": 0.8717, "step": 6750 }, { "epoch": 0.81, "grad_norm": 0.22253437340259552, "learning_rate": 0.00026919328407951814, "loss": 0.9054, "step": 6755 }, { "epoch": 0.81, "grad_norm": 0.2643227279186249, "learning_rate": 0.0002691307824977439, "loss": 0.961, "step": 6760 }, { "epoch": 0.82, "grad_norm": 0.20802360773086548, "learning_rate": 0.0002690682248490673, "loss": 0.8992, "step": 6765 }, { "epoch": 0.82, "grad_norm": 0.25810569524765015, "learning_rate": 0.00026900561116292995, "loss": 0.9011, "step": 6770 }, { "epoch": 0.82, "grad_norm": 0.20976845920085907, "learning_rate": 0.0002689429414687999, "loss": 0.9485, "step": 6775 }, { "epoch": 0.82, "grad_norm": 0.17168466746807098, "learning_rate": 0.0002688802157961716, "loss": 1.0081, "step": 6780 }, { "epoch": 0.82, "grad_norm": 0.21908418834209442, "learning_rate": 0.00026881743417456585, "loss": 0.9562, "step": 6785 }, { "epoch": 0.82, "grad_norm": 0.2043706625699997, "learning_rate": 0.0002687545966335298, "loss": 1.0096, "step": 6790 }, { "epoch": 0.82, "grad_norm": 0.20396995544433594, "learning_rate": 0.00026869170320263685, "loss": 1.0249, "step": 6795 }, { "epoch": 0.82, "grad_norm": 0.2096068263053894, "learning_rate": 0.00026862875391148676, "loss": 1.0383, "step": 6800 }, { "epoch": 0.82, "grad_norm": 0.23566322028636932, "learning_rate": 0.0002685657487897055, "loss": 0.9589, "step": 6805 }, { "epoch": 0.82, "grad_norm": 0.20193132758140564, "learning_rate": 0.0002685026878669455, "loss": 0.9232, "step": 6810 }, { "epoch": 0.82, "grad_norm": 0.21133491396903992, "learning_rate": 0.0002684395711728851, "loss": 0.8744, "step": 6815 }, { "epoch": 0.82, "grad_norm": 0.2161945253610611, "learning_rate": 0.00026837639873722934, "loss": 0.9788, "step": 6820 }, { "epoch": 0.82, "grad_norm": 0.21006715297698975, "learning_rate": 0.0002683131705897092, "loss": 0.9007, "step": 6825 }, { "epoch": 0.82, "grad_norm": 0.19389329850673676, "learning_rate": 0.0002682498867600819, "loss": 0.9501, "step": 6830 }, { "epoch": 0.82, "grad_norm": 0.2149161845445633, "learning_rate": 0.00026818654727813086, "loss": 0.9366, "step": 6835 }, { "epoch": 0.82, "grad_norm": 0.19821734726428986, "learning_rate": 0.0002681231521736659, "loss": 0.9437, "step": 6840 }, { "epoch": 0.82, "grad_norm": 0.21618692576885223, "learning_rate": 0.00026805970147652277, "loss": 0.8672, "step": 6845 }, { "epoch": 0.83, "grad_norm": 0.21498163044452667, "learning_rate": 0.00026799619521656346, "loss": 1.0109, "step": 6850 }, { "epoch": 0.83, "grad_norm": 0.2292798012495041, "learning_rate": 0.00026793263342367616, "loss": 0.9201, "step": 6855 }, { "epoch": 0.83, "grad_norm": 0.19892308115959167, "learning_rate": 0.0002678690161277752, "loss": 0.9206, "step": 6860 }, { "epoch": 0.83, "grad_norm": 0.18660472333431244, "learning_rate": 0.00026780534335880084, "loss": 0.9114, "step": 6865 }, { "epoch": 0.83, "grad_norm": 0.2231675237417221, "learning_rate": 0.00026774161514671975, "loss": 0.9933, "step": 6870 }, { "epoch": 0.83, "grad_norm": 0.2067699432373047, "learning_rate": 0.0002676778315215245, "loss": 1.0153, "step": 6875 }, { "epoch": 0.83, "grad_norm": 0.19282564520835876, "learning_rate": 0.00026761399251323375, "loss": 0.8656, "step": 6880 }, { "epoch": 0.83, "grad_norm": 0.21129785478115082, "learning_rate": 0.0002675500981518923, "loss": 0.9499, "step": 6885 }, { "epoch": 0.83, "grad_norm": 0.19059066474437714, "learning_rate": 0.0002674861484675709, "loss": 1.0097, "step": 6890 }, { "epoch": 0.83, "grad_norm": 0.23170576989650726, "learning_rate": 0.0002674221434903665, "loss": 0.8877, "step": 6895 }, { "epoch": 0.83, "grad_norm": 0.21638576686382294, "learning_rate": 0.0002673580832504019, "loss": 0.987, "step": 6900 }, { "epoch": 0.83, "grad_norm": 0.22118517756462097, "learning_rate": 0.00026729396777782597, "loss": 0.9849, "step": 6905 }, { "epoch": 0.83, "grad_norm": 0.20184585452079773, "learning_rate": 0.0002672297971028136, "loss": 0.9106, "step": 6910 }, { "epoch": 0.83, "grad_norm": 0.21392621099948883, "learning_rate": 0.0002671655712555656, "loss": 1.0239, "step": 6915 }, { "epoch": 0.83, "grad_norm": 0.2100532501935959, "learning_rate": 0.0002671012902663089, "loss": 0.994, "step": 6920 }, { "epoch": 0.83, "grad_norm": 0.21326220035552979, "learning_rate": 0.0002670369541652961, "loss": 0.9117, "step": 6925 }, { "epoch": 0.83, "grad_norm": 0.2225876897573471, "learning_rate": 0.000266972562982806, "loss": 0.9071, "step": 6930 }, { "epoch": 0.84, "grad_norm": 0.18916994333267212, "learning_rate": 0.00026690811674914323, "loss": 0.9641, "step": 6935 }, { "epoch": 0.84, "grad_norm": 0.2092379480600357, "learning_rate": 0.0002668436154946383, "loss": 1.061, "step": 6940 }, { "epoch": 0.84, "grad_norm": 0.18153078854084015, "learning_rate": 0.0002667790592496477, "loss": 1.0148, "step": 6945 }, { "epoch": 0.84, "grad_norm": 0.20708273351192474, "learning_rate": 0.00026671444804455375, "loss": 0.9417, "step": 6950 }, { "epoch": 0.84, "grad_norm": 0.2154036909341812, "learning_rate": 0.0002666497819097645, "loss": 0.8497, "step": 6955 }, { "epoch": 0.84, "grad_norm": 0.22335532307624817, "learning_rate": 0.0002665850608757141, "loss": 0.884, "step": 6960 }, { "epoch": 0.84, "grad_norm": 0.2135249823331833, "learning_rate": 0.0002665202849728624, "loss": 0.9287, "step": 6965 }, { "epoch": 0.84, "grad_norm": 0.2231944501399994, "learning_rate": 0.000266455454231695, "loss": 0.9385, "step": 6970 }, { "epoch": 0.84, "grad_norm": 0.19477687776088715, "learning_rate": 0.0002663905686827235, "loss": 0.9468, "step": 6975 }, { "epoch": 0.84, "grad_norm": 0.21901415288448334, "learning_rate": 0.0002663256283564852, "loss": 0.8921, "step": 6980 }, { "epoch": 0.84, "grad_norm": 0.21255704760551453, "learning_rate": 0.00026626063328354316, "loss": 0.9014, "step": 6985 }, { "epoch": 0.84, "grad_norm": 0.20495054125785828, "learning_rate": 0.0002661955834944861, "loss": 1.0034, "step": 6990 }, { "epoch": 0.84, "grad_norm": 0.1916796863079071, "learning_rate": 0.0002661304790199288, "loss": 1.0317, "step": 6995 }, { "epoch": 0.84, "grad_norm": 0.21294309198856354, "learning_rate": 0.0002660653198905115, "loss": 0.9128, "step": 7000 }, { "epoch": 0.84, "grad_norm": 0.22985853254795074, "learning_rate": 0.00026600010613690023, "loss": 0.9521, "step": 7005 }, { "epoch": 0.84, "grad_norm": 0.24661943316459656, "learning_rate": 0.00026593483778978677, "loss": 0.9456, "step": 7010 }, { "epoch": 0.85, "grad_norm": 0.21790087223052979, "learning_rate": 0.0002658695148798886, "loss": 0.9558, "step": 7015 }, { "epoch": 0.85, "grad_norm": 0.18873614072799683, "learning_rate": 0.00026580413743794874, "loss": 0.914, "step": 7020 }, { "epoch": 0.85, "grad_norm": 0.21589027345180511, "learning_rate": 0.0002657387054947361, "loss": 0.9285, "step": 7025 }, { "epoch": 0.85, "grad_norm": 0.1877526193857193, "learning_rate": 0.0002656732190810451, "loss": 0.9537, "step": 7030 }, { "epoch": 0.85, "grad_norm": 0.22506104409694672, "learning_rate": 0.0002656076782276958, "loss": 0.8938, "step": 7035 }, { "epoch": 0.85, "grad_norm": 0.23874376714229584, "learning_rate": 0.000265542082965534, "loss": 0.8486, "step": 7040 }, { "epoch": 0.85, "grad_norm": 0.20064063370227814, "learning_rate": 0.00026547643332543077, "loss": 0.98, "step": 7045 }, { "epoch": 0.85, "grad_norm": 0.22124479711055756, "learning_rate": 0.0002654107293382833, "loss": 0.9002, "step": 7050 }, { "epoch": 0.85, "grad_norm": 0.1762978434562683, "learning_rate": 0.00026534497103501383, "loss": 0.9397, "step": 7055 }, { "epoch": 0.85, "grad_norm": 0.21579968929290771, "learning_rate": 0.0002652791584465706, "loss": 0.9606, "step": 7060 }, { "epoch": 0.85, "grad_norm": 0.2093055099248886, "learning_rate": 0.000265213291603927, "loss": 0.9967, "step": 7065 }, { "epoch": 0.85, "grad_norm": 0.2238216996192932, "learning_rate": 0.00026514737053808234, "loss": 0.9985, "step": 7070 }, { "epoch": 0.85, "grad_norm": 0.20095250010490417, "learning_rate": 0.0002650813952800611, "loss": 0.8892, "step": 7075 }, { "epoch": 0.85, "grad_norm": 0.2064996361732483, "learning_rate": 0.00026501536586091357, "loss": 0.9211, "step": 7080 }, { "epoch": 0.85, "grad_norm": 0.20481903851032257, "learning_rate": 0.0002649492823117153, "loss": 0.8601, "step": 7085 }, { "epoch": 0.85, "grad_norm": 0.19760817289352417, "learning_rate": 0.0002648831446635674, "loss": 0.9275, "step": 7090 }, { "epoch": 0.85, "grad_norm": 0.2292260080575943, "learning_rate": 0.0002648169529475965, "loss": 1.0424, "step": 7095 }, { "epoch": 0.86, "grad_norm": 0.2114565074443817, "learning_rate": 0.0002647507071949546, "loss": 0.874, "step": 7100 }, { "epoch": 0.86, "grad_norm": 0.19804421067237854, "learning_rate": 0.00026468440743681915, "loss": 0.9876, "step": 7105 }, { "epoch": 0.86, "grad_norm": 0.21388956904411316, "learning_rate": 0.000264618053704393, "loss": 0.8863, "step": 7110 }, { "epoch": 0.86, "grad_norm": 0.19273947179317474, "learning_rate": 0.0002645516460289044, "loss": 1.0307, "step": 7115 }, { "epoch": 0.86, "grad_norm": 0.22840675711631775, "learning_rate": 0.000264485184441607, "loss": 0.8876, "step": 7120 }, { "epoch": 0.86, "grad_norm": 0.22079254686832428, "learning_rate": 0.00026441866897378, "loss": 0.9526, "step": 7125 }, { "epoch": 0.86, "grad_norm": 0.1948045939207077, "learning_rate": 0.00026435209965672756, "loss": 0.9624, "step": 7130 }, { "epoch": 0.86, "grad_norm": 0.19133198261260986, "learning_rate": 0.0002642854765217795, "loss": 0.9241, "step": 7135 }, { "epoch": 0.86, "grad_norm": 0.2186049222946167, "learning_rate": 0.00026421879960029096, "loss": 0.9452, "step": 7140 }, { "epoch": 0.86, "grad_norm": 0.19075119495391846, "learning_rate": 0.00026415206892364216, "loss": 0.9007, "step": 7145 }, { "epoch": 0.86, "grad_norm": 0.20891107618808746, "learning_rate": 0.00026408528452323885, "loss": 0.9266, "step": 7150 }, { "epoch": 0.86, "grad_norm": 0.20901614427566528, "learning_rate": 0.000264018446430512, "loss": 0.9023, "step": 7155 }, { "epoch": 0.86, "grad_norm": 0.20765335857868195, "learning_rate": 0.0002639515546769179, "loss": 0.9053, "step": 7160 }, { "epoch": 0.86, "grad_norm": 0.21316955983638763, "learning_rate": 0.0002638846092939379, "loss": 0.8916, "step": 7165 }, { "epoch": 0.86, "grad_norm": 0.20992538332939148, "learning_rate": 0.00026381761031307873, "loss": 0.968, "step": 7170 }, { "epoch": 0.86, "grad_norm": 0.2142067402601242, "learning_rate": 0.0002637505577658725, "loss": 0.9669, "step": 7175 }, { "epoch": 0.87, "grad_norm": 0.2190820276737213, "learning_rate": 0.0002636834516838761, "loss": 0.9586, "step": 7180 }, { "epoch": 0.87, "grad_norm": 0.18383167684078217, "learning_rate": 0.0002636162920986721, "loss": 0.9392, "step": 7185 }, { "epoch": 0.87, "grad_norm": 0.24156947433948517, "learning_rate": 0.00026354907904186796, "loss": 0.942, "step": 7190 }, { "epoch": 0.87, "grad_norm": 0.2070099264383316, "learning_rate": 0.00026348181254509635, "loss": 0.9375, "step": 7195 }, { "epoch": 0.87, "grad_norm": 0.22052322328090668, "learning_rate": 0.00026341449264001516, "loss": 0.8665, "step": 7200 }, { "epoch": 0.87, "grad_norm": 0.20919539034366608, "learning_rate": 0.00026334711935830735, "loss": 0.8609, "step": 7205 }, { "epoch": 0.87, "grad_norm": 0.24203932285308838, "learning_rate": 0.00026327969273168104, "loss": 0.9312, "step": 7210 }, { "epoch": 0.87, "grad_norm": 0.19879262149333954, "learning_rate": 0.00026321221279186944, "loss": 0.7982, "step": 7215 }, { "epoch": 0.87, "grad_norm": 0.190317302942276, "learning_rate": 0.0002631446795706308, "loss": 0.9479, "step": 7220 }, { "epoch": 0.87, "grad_norm": 0.20319460332393646, "learning_rate": 0.0002630770930997486, "loss": 0.9041, "step": 7225 }, { "epoch": 0.87, "grad_norm": 0.20878246426582336, "learning_rate": 0.00026300945341103113, "loss": 0.9042, "step": 7230 }, { "epoch": 0.87, "grad_norm": 0.208158478140831, "learning_rate": 0.000262941760536312, "loss": 0.91, "step": 7235 }, { "epoch": 0.87, "grad_norm": 0.2144281417131424, "learning_rate": 0.0002628740145074497, "loss": 0.8517, "step": 7240 }, { "epoch": 0.87, "grad_norm": 0.1934666633605957, "learning_rate": 0.0002628062153563277, "loss": 0.9417, "step": 7245 }, { "epoch": 0.87, "grad_norm": 0.2240288108587265, "learning_rate": 0.0002627383631148546, "loss": 0.9912, "step": 7250 }, { "epoch": 0.87, "grad_norm": 0.2270900011062622, "learning_rate": 0.00026267045781496384, "loss": 0.8963, "step": 7255 }, { "epoch": 0.87, "grad_norm": 0.2111656367778778, "learning_rate": 0.00026260249948861406, "loss": 0.9598, "step": 7260 }, { "epoch": 0.88, "grad_norm": 0.21797975897789001, "learning_rate": 0.0002625344881677885, "loss": 0.9216, "step": 7265 }, { "epoch": 0.88, "grad_norm": 0.20986701548099518, "learning_rate": 0.00026246642388449575, "loss": 0.8921, "step": 7270 }, { "epoch": 0.88, "grad_norm": 0.21102838218212128, "learning_rate": 0.00026239830667076897, "loss": 1.0191, "step": 7275 }, { "epoch": 0.88, "grad_norm": 0.2326158732175827, "learning_rate": 0.00026233013655866646, "loss": 0.9563, "step": 7280 }, { "epoch": 0.88, "grad_norm": 0.1865602284669876, "learning_rate": 0.0002622619135802713, "loss": 0.974, "step": 7285 }, { "epoch": 0.88, "grad_norm": 0.1943947672843933, "learning_rate": 0.00026219363776769155, "loss": 0.9563, "step": 7290 }, { "epoch": 0.88, "grad_norm": 0.21199694275856018, "learning_rate": 0.00026212530915306, "loss": 0.9788, "step": 7295 }, { "epoch": 0.88, "grad_norm": 0.20368671417236328, "learning_rate": 0.0002620569277685344, "loss": 0.9533, "step": 7300 }, { "epoch": 0.88, "grad_norm": 0.2417079508304596, "learning_rate": 0.00026198849364629723, "loss": 0.9253, "step": 7305 }, { "epoch": 0.88, "grad_norm": 0.20021148025989532, "learning_rate": 0.00026192000681855604, "loss": 0.9328, "step": 7310 }, { "epoch": 0.88, "grad_norm": 0.21492332220077515, "learning_rate": 0.00026185146731754285, "loss": 0.9173, "step": 7315 }, { "epoch": 0.88, "grad_norm": 0.22613677382469177, "learning_rate": 0.00026178287517551464, "loss": 0.9277, "step": 7320 }, { "epoch": 0.88, "grad_norm": 0.21383176743984222, "learning_rate": 0.0002617142304247532, "loss": 0.9311, "step": 7325 }, { "epoch": 0.88, "grad_norm": 0.20949169993400574, "learning_rate": 0.00026164553309756497, "loss": 0.9854, "step": 7330 }, { "epoch": 0.88, "grad_norm": 0.23615580797195435, "learning_rate": 0.00026157678322628127, "loss": 0.9295, "step": 7335 }, { "epoch": 0.88, "grad_norm": 0.18629109859466553, "learning_rate": 0.00026150798084325803, "loss": 0.9045, "step": 7340 }, { "epoch": 0.88, "grad_norm": 0.2189890593290329, "learning_rate": 0.00026143912598087593, "loss": 0.9756, "step": 7345 }, { "epoch": 0.89, "grad_norm": 0.1957770138978958, "learning_rate": 0.00026137021867154043, "loss": 0.8535, "step": 7350 }, { "epoch": 0.89, "grad_norm": 0.20852722227573395, "learning_rate": 0.00026130125894768146, "loss": 0.8852, "step": 7355 }, { "epoch": 0.89, "grad_norm": 0.2296961098909378, "learning_rate": 0.0002612322468417538, "loss": 0.929, "step": 7360 }, { "epoch": 0.89, "grad_norm": 0.21166066825389862, "learning_rate": 0.00026116318238623694, "loss": 1.0121, "step": 7365 }, { "epoch": 0.89, "grad_norm": 0.20803911983966827, "learning_rate": 0.0002610940656136348, "loss": 0.9132, "step": 7370 }, { "epoch": 0.89, "grad_norm": 0.26100292801856995, "learning_rate": 0.0002610248965564761, "loss": 0.9956, "step": 7375 }, { "epoch": 0.89, "grad_norm": 0.19871069490909576, "learning_rate": 0.000260955675247314, "loss": 0.904, "step": 7380 }, { "epoch": 0.89, "grad_norm": 0.21508464217185974, "learning_rate": 0.0002608864017187264, "loss": 0.8962, "step": 7385 }, { "epoch": 0.89, "grad_norm": 0.22008737921714783, "learning_rate": 0.0002608170760033158, "loss": 0.8767, "step": 7390 }, { "epoch": 0.89, "grad_norm": 0.2107255458831787, "learning_rate": 0.0002607476981337091, "loss": 0.9574, "step": 7395 }, { "epoch": 0.89, "grad_norm": 0.2372090220451355, "learning_rate": 0.00026067826814255777, "loss": 0.9463, "step": 7400 }, { "epoch": 0.89, "grad_norm": 0.20857949554920197, "learning_rate": 0.000260608786062538, "loss": 0.9372, "step": 7405 }, { "epoch": 0.89, "grad_norm": 0.21467188000679016, "learning_rate": 0.0002605392519263503, "loss": 0.9589, "step": 7410 }, { "epoch": 0.89, "grad_norm": 0.20547367632389069, "learning_rate": 0.0002604696657667197, "loss": 0.9673, "step": 7415 }, { "epoch": 0.89, "grad_norm": 0.23634222149848938, "learning_rate": 0.00026040002761639586, "loss": 0.8479, "step": 7420 }, { "epoch": 0.89, "grad_norm": 0.19742855429649353, "learning_rate": 0.0002603303375081527, "loss": 0.9117, "step": 7425 }, { "epoch": 0.9, "grad_norm": 0.2211044728755951, "learning_rate": 0.0002602605954747888, "loss": 0.9479, "step": 7430 }, { "epoch": 0.9, "grad_norm": 0.20085452497005463, "learning_rate": 0.000260190801549127, "loss": 0.96, "step": 7435 }, { "epoch": 0.9, "grad_norm": 0.20444773137569427, "learning_rate": 0.0002601209557640147, "loss": 0.8778, "step": 7440 }, { "epoch": 0.9, "grad_norm": 0.21985496580600739, "learning_rate": 0.00026005105815232364, "loss": 0.9513, "step": 7445 }, { "epoch": 0.9, "grad_norm": 0.22137680649757385, "learning_rate": 0.0002599811087469498, "loss": 1.0321, "step": 7450 }, { "epoch": 0.9, "grad_norm": 0.20409676432609558, "learning_rate": 0.0002599111075808139, "loss": 0.9039, "step": 7455 }, { "epoch": 0.9, "grad_norm": 0.20805245637893677, "learning_rate": 0.0002598410546868608, "loss": 0.9834, "step": 7460 }, { "epoch": 0.9, "grad_norm": 0.23089753091335297, "learning_rate": 0.00025977095009805957, "loss": 0.9394, "step": 7465 }, { "epoch": 0.9, "grad_norm": 0.2073964923620224, "learning_rate": 0.0002597007938474039, "loss": 0.8757, "step": 7470 }, { "epoch": 0.9, "grad_norm": 0.22039000689983368, "learning_rate": 0.00025963058596791157, "loss": 0.8823, "step": 7475 }, { "epoch": 0.9, "grad_norm": 0.20339925587177277, "learning_rate": 0.00025956032649262475, "loss": 0.9434, "step": 7480 }, { "epoch": 0.9, "grad_norm": 0.2380754053592682, "learning_rate": 0.0002594900154546099, "loss": 0.9431, "step": 7485 }, { "epoch": 0.9, "grad_norm": 0.22166424989700317, "learning_rate": 0.00025941965288695776, "loss": 0.8625, "step": 7490 }, { "epoch": 0.9, "grad_norm": 0.19690659642219543, "learning_rate": 0.00025934923882278325, "loss": 0.8757, "step": 7495 }, { "epoch": 0.9, "grad_norm": 0.225230872631073, "learning_rate": 0.00025927877329522554, "loss": 0.9952, "step": 7500 }, { "epoch": 0.9, "grad_norm": 0.23189792037010193, "learning_rate": 0.00025920825633744815, "loss": 1.0034, "step": 7505 }, { "epoch": 0.9, "grad_norm": 0.22297634184360504, "learning_rate": 0.0002591376879826386, "loss": 0.9525, "step": 7510 }, { "epoch": 0.91, "grad_norm": 0.2259618490934372, "learning_rate": 0.00025906706826400863, "loss": 0.9325, "step": 7515 }, { "epoch": 0.91, "grad_norm": 0.21782998740673065, "learning_rate": 0.0002589963972147945, "loss": 0.9086, "step": 7520 }, { "epoch": 0.91, "grad_norm": 0.2169419527053833, "learning_rate": 0.000258925674868256, "loss": 0.9848, "step": 7525 }, { "epoch": 0.91, "grad_norm": 0.20681601762771606, "learning_rate": 0.00025885490125767774, "loss": 0.9753, "step": 7530 }, { "epoch": 0.91, "grad_norm": 0.21519158780574799, "learning_rate": 0.00025878407641636794, "loss": 0.9679, "step": 7535 }, { "epoch": 0.91, "grad_norm": 0.22115248441696167, "learning_rate": 0.00025871320037765917, "loss": 0.8951, "step": 7540 }, { "epoch": 0.91, "grad_norm": 0.21856175363063812, "learning_rate": 0.0002586422731749081, "loss": 0.9302, "step": 7545 }, { "epoch": 0.91, "grad_norm": 0.21080365777015686, "learning_rate": 0.0002585712948414953, "loss": 0.9291, "step": 7550 }, { "epoch": 0.91, "grad_norm": 0.210664302110672, "learning_rate": 0.0002585002654108257, "loss": 0.9287, "step": 7555 }, { "epoch": 0.91, "grad_norm": 0.19781038165092468, "learning_rate": 0.0002584291849163279, "loss": 0.9384, "step": 7560 }, { "epoch": 0.91, "grad_norm": 0.22275352478027344, "learning_rate": 0.0002583580533914549, "loss": 0.824, "step": 7565 }, { "epoch": 0.91, "grad_norm": 0.2083931863307953, "learning_rate": 0.00025828687086968354, "loss": 0.9764, "step": 7570 }, { "epoch": 0.91, "grad_norm": 0.20877456665039062, "learning_rate": 0.00025821563738451464, "loss": 0.9347, "step": 7575 }, { "epoch": 0.91, "grad_norm": 0.1873340606689453, "learning_rate": 0.00025814435296947307, "loss": 0.9142, "step": 7580 }, { "epoch": 0.91, "grad_norm": 0.2477707713842392, "learning_rate": 0.0002580730176581076, "loss": 0.9027, "step": 7585 }, { "epoch": 0.91, "grad_norm": 0.23362557590007782, "learning_rate": 0.000258001631483991, "loss": 0.8038, "step": 7590 }, { "epoch": 0.92, "grad_norm": 0.22810354828834534, "learning_rate": 0.00025793019448072007, "loss": 0.9889, "step": 7595 }, { "epoch": 0.92, "grad_norm": 0.2291039526462555, "learning_rate": 0.0002578587066819153, "loss": 0.9264, "step": 7600 }, { "epoch": 0.92, "grad_norm": 0.22235728800296783, "learning_rate": 0.00025778716812122136, "loss": 0.9701, "step": 7605 }, { "epoch": 0.92, "grad_norm": 0.21946272253990173, "learning_rate": 0.00025771557883230657, "loss": 0.8935, "step": 7610 }, { "epoch": 0.92, "grad_norm": 0.21064096689224243, "learning_rate": 0.00025764393884886324, "loss": 0.9104, "step": 7615 }, { "epoch": 0.92, "grad_norm": 0.2479812055826187, "learning_rate": 0.00025757224820460755, "loss": 0.8872, "step": 7620 }, { "epoch": 0.92, "grad_norm": 0.2186814844608307, "learning_rate": 0.0002575005069332795, "loss": 0.9364, "step": 7625 }, { "epoch": 0.92, "grad_norm": 0.2108563780784607, "learning_rate": 0.00025742871506864295, "loss": 0.8766, "step": 7630 }, { "epoch": 0.92, "grad_norm": 0.213986337184906, "learning_rate": 0.0002573568726444854, "loss": 0.9053, "step": 7635 }, { "epoch": 0.92, "grad_norm": 0.23144182562828064, "learning_rate": 0.0002572849796946184, "loss": 0.9933, "step": 7640 }, { "epoch": 0.92, "grad_norm": 0.2001713067293167, "learning_rate": 0.00025721303625287717, "loss": 0.9321, "step": 7645 }, { "epoch": 0.92, "grad_norm": 0.19496455788612366, "learning_rate": 0.00025714104235312064, "loss": 0.861, "step": 7650 }, { "epoch": 0.92, "grad_norm": 0.19990824162960052, "learning_rate": 0.0002570689980292315, "loss": 0.9007, "step": 7655 }, { "epoch": 0.92, "grad_norm": 0.2162327915430069, "learning_rate": 0.0002569969033151163, "loss": 0.9936, "step": 7660 }, { "epoch": 0.92, "grad_norm": 0.20773164927959442, "learning_rate": 0.00025692475824470504, "loss": 0.9124, "step": 7665 }, { "epoch": 0.92, "grad_norm": 0.19005447626113892, "learning_rate": 0.0002568525628519518, "loss": 1.0014, "step": 7670 }, { "epoch": 0.92, "grad_norm": 0.18565884232521057, "learning_rate": 0.00025678031717083394, "loss": 0.8888, "step": 7675 }, { "epoch": 0.93, "grad_norm": 0.21810932457447052, "learning_rate": 0.0002567080212353528, "loss": 0.8517, "step": 7680 }, { "epoch": 0.93, "grad_norm": 0.21368250250816345, "learning_rate": 0.00025663567507953314, "loss": 0.9296, "step": 7685 }, { "epoch": 0.93, "grad_norm": 0.19680459797382355, "learning_rate": 0.0002565632787374236, "loss": 1.0182, "step": 7690 }, { "epoch": 0.93, "grad_norm": 0.23982003331184387, "learning_rate": 0.00025649083224309617, "loss": 0.9503, "step": 7695 }, { "epoch": 0.93, "grad_norm": 0.24432729184627533, "learning_rate": 0.00025641833563064666, "loss": 0.903, "step": 7700 }, { "epoch": 0.93, "grad_norm": 0.20875149965286255, "learning_rate": 0.00025634578893419434, "loss": 0.8951, "step": 7705 }, { "epoch": 0.93, "grad_norm": 0.21281814575195312, "learning_rate": 0.00025627319218788215, "loss": 0.9059, "step": 7710 }, { "epoch": 0.93, "grad_norm": 0.226210355758667, "learning_rate": 0.0002562005454258765, "loss": 0.9195, "step": 7715 }, { "epoch": 0.93, "grad_norm": 0.22133582830429077, "learning_rate": 0.0002561278486823673, "loss": 0.8901, "step": 7720 }, { "epoch": 0.93, "grad_norm": 0.24061179161071777, "learning_rate": 0.00025605510199156817, "loss": 0.9548, "step": 7725 }, { "epoch": 0.93, "grad_norm": 0.20201779901981354, "learning_rate": 0.000255982305387716, "loss": 0.8928, "step": 7730 }, { "epoch": 0.93, "grad_norm": 0.23118892312049866, "learning_rate": 0.00025590945890507146, "loss": 0.9098, "step": 7735 }, { "epoch": 0.93, "grad_norm": 0.3037243187427521, "learning_rate": 0.00025583656257791834, "loss": 0.8707, "step": 7740 }, { "epoch": 0.93, "grad_norm": 0.2243233621120453, "learning_rate": 0.0002557636164405641, "loss": 0.8642, "step": 7745 }, { "epoch": 0.93, "grad_norm": 0.1916697919368744, "learning_rate": 0.0002556906205273398, "loss": 0.9297, "step": 7750 }, { "epoch": 0.93, "grad_norm": 0.18961936235427856, "learning_rate": 0.00025561757487259953, "loss": 0.8665, "step": 7755 }, { "epoch": 0.93, "grad_norm": 0.21484404802322388, "learning_rate": 0.000255544479510721, "loss": 0.8957, "step": 7760 }, { "epoch": 0.94, "grad_norm": 0.2340548038482666, "learning_rate": 0.0002554713344761055, "loss": 0.8395, "step": 7765 }, { "epoch": 0.94, "grad_norm": 0.20856556296348572, "learning_rate": 0.00025539813980317733, "loss": 0.9225, "step": 7770 }, { "epoch": 0.94, "grad_norm": 0.21993188560009003, "learning_rate": 0.00025532489552638446, "loss": 0.9975, "step": 7775 }, { "epoch": 0.94, "grad_norm": 0.21606536209583282, "learning_rate": 0.000255251601680198, "loss": 0.9617, "step": 7780 }, { "epoch": 0.94, "grad_norm": 0.23577193915843964, "learning_rate": 0.00025517825829911246, "loss": 0.9713, "step": 7785 }, { "epoch": 0.94, "grad_norm": 0.2203446924686432, "learning_rate": 0.0002551048654176457, "loss": 0.984, "step": 7790 }, { "epoch": 0.94, "grad_norm": 0.23059968650341034, "learning_rate": 0.0002550314230703389, "loss": 0.9334, "step": 7795 }, { "epoch": 0.94, "grad_norm": 0.2229011207818985, "learning_rate": 0.0002549579312917564, "loss": 1.0343, "step": 7800 }, { "epoch": 0.94, "grad_norm": 0.18573442101478577, "learning_rate": 0.0002548843901164859, "loss": 0.8456, "step": 7805 }, { "epoch": 0.94, "grad_norm": 0.2154776155948639, "learning_rate": 0.00025481079957913826, "loss": 0.983, "step": 7810 }, { "epoch": 0.94, "grad_norm": 0.20241975784301758, "learning_rate": 0.0002547371597143477, "loss": 1.0003, "step": 7815 }, { "epoch": 0.94, "grad_norm": 0.2103583961725235, "learning_rate": 0.0002546634705567716, "loss": 0.9616, "step": 7820 }, { "epoch": 0.94, "grad_norm": 0.1959349662065506, "learning_rate": 0.0002545897321410905, "loss": 1.0201, "step": 7825 }, { "epoch": 0.94, "grad_norm": 0.22472964227199554, "learning_rate": 0.00025451594450200804, "loss": 0.997, "step": 7830 }, { "epoch": 0.94, "grad_norm": 0.2131294161081314, "learning_rate": 0.0002544421076742513, "loss": 0.9673, "step": 7835 }, { "epoch": 0.94, "grad_norm": 0.21508903801441193, "learning_rate": 0.00025436822169257027, "loss": 0.9775, "step": 7840 }, { "epoch": 0.95, "grad_norm": 0.2155522108078003, "learning_rate": 0.00025429428659173815, "loss": 0.7657, "step": 7845 }, { "epoch": 0.95, "grad_norm": 0.22796915471553802, "learning_rate": 0.00025422030240655123, "loss": 0.9161, "step": 7850 }, { "epoch": 0.95, "grad_norm": 0.19367897510528564, "learning_rate": 0.000254146269171829, "loss": 0.9141, "step": 7855 }, { "epoch": 0.95, "grad_norm": 0.2098265439271927, "learning_rate": 0.00025407218692241384, "loss": 1.0136, "step": 7860 }, { "epoch": 0.95, "grad_norm": 0.20742662250995636, "learning_rate": 0.00025399805569317145, "loss": 1.0097, "step": 7865 }, { "epoch": 0.95, "grad_norm": 0.19661657512187958, "learning_rate": 0.00025392387551899034, "loss": 0.8654, "step": 7870 }, { "epoch": 0.95, "grad_norm": 0.2003818154335022, "learning_rate": 0.0002538496464347822, "loss": 0.9325, "step": 7875 }, { "epoch": 0.95, "grad_norm": 0.21950916945934296, "learning_rate": 0.0002537753684754817, "loss": 0.9218, "step": 7880 }, { "epoch": 0.95, "grad_norm": 0.2205251157283783, "learning_rate": 0.00025370104167604657, "loss": 0.9599, "step": 7885 }, { "epoch": 0.95, "grad_norm": 0.21949486434459686, "learning_rate": 0.00025362666607145744, "loss": 0.8522, "step": 7890 }, { "epoch": 0.95, "grad_norm": 0.20743881165981293, "learning_rate": 0.00025355224169671786, "loss": 0.9354, "step": 7895 }, { "epoch": 0.95, "grad_norm": 0.19251248240470886, "learning_rate": 0.0002534777685868545, "loss": 0.9454, "step": 7900 }, { "epoch": 0.95, "grad_norm": 0.22613319754600525, "learning_rate": 0.00025340324677691685, "loss": 0.929, "step": 7905 }, { "epoch": 0.95, "grad_norm": 0.206687793135643, "learning_rate": 0.00025332867630197735, "loss": 0.8953, "step": 7910 }, { "epoch": 0.95, "grad_norm": 0.19443422555923462, "learning_rate": 0.0002532540571971313, "loss": 0.9731, "step": 7915 }, { "epoch": 0.95, "grad_norm": 0.20105896890163422, "learning_rate": 0.00025317938949749705, "loss": 0.924, "step": 7920 }, { "epoch": 0.95, "grad_norm": 0.2265363335609436, "learning_rate": 0.0002531046732382156, "loss": 0.9835, "step": 7925 }, { "epoch": 0.96, "grad_norm": 0.19356288015842438, "learning_rate": 0.00025302990845445087, "loss": 1.0531, "step": 7930 }, { "epoch": 0.96, "grad_norm": 0.1920010894536972, "learning_rate": 0.00025295509518138975, "loss": 0.9466, "step": 7935 }, { "epoch": 0.96, "grad_norm": 0.22366103529930115, "learning_rate": 0.00025288023345424176, "loss": 0.9589, "step": 7940 }, { "epoch": 0.96, "grad_norm": 0.22013331949710846, "learning_rate": 0.00025280532330823944, "loss": 1.0145, "step": 7945 }, { "epoch": 0.96, "grad_norm": 0.24709923565387726, "learning_rate": 0.00025273036477863785, "loss": 1.0086, "step": 7950 }, { "epoch": 0.96, "grad_norm": 0.2514353096485138, "learning_rate": 0.00025265535790071505, "loss": 0.9726, "step": 7955 }, { "epoch": 0.96, "grad_norm": 0.2117491513490677, "learning_rate": 0.0002525803027097717, "loss": 0.8978, "step": 7960 }, { "epoch": 0.96, "grad_norm": 0.21775074303150177, "learning_rate": 0.0002525051992411314, "loss": 0.8716, "step": 7965 }, { "epoch": 0.96, "grad_norm": 0.20569385588169098, "learning_rate": 0.0002524300475301402, "loss": 0.8722, "step": 7970 }, { "epoch": 0.96, "grad_norm": 0.19833216071128845, "learning_rate": 0.00025235484761216697, "loss": 0.859, "step": 7975 }, { "epoch": 0.96, "grad_norm": 0.19562938809394836, "learning_rate": 0.00025227959952260344, "loss": 0.9029, "step": 7980 }, { "epoch": 0.96, "grad_norm": 0.19179534912109375, "learning_rate": 0.00025220430329686377, "loss": 0.9367, "step": 7985 }, { "epoch": 0.96, "grad_norm": 0.20224545896053314, "learning_rate": 0.0002521289589703848, "loss": 0.8961, "step": 7990 }, { "epoch": 0.96, "grad_norm": 0.2402665615081787, "learning_rate": 0.0002520535665786262, "loss": 0.8604, "step": 7995 }, { "epoch": 0.96, "grad_norm": 0.24251817166805267, "learning_rate": 0.00025197812615707007, "loss": 0.9031, "step": 8000 }, { "epoch": 0.96, "grad_norm": 0.23049946129322052, "learning_rate": 0.00025190263774122113, "loss": 0.8619, "step": 8005 }, { "epoch": 0.97, "grad_norm": 0.2142931967973709, "learning_rate": 0.0002518271013666068, "loss": 0.8672, "step": 8010 }, { "epoch": 0.97, "grad_norm": 0.21534845232963562, "learning_rate": 0.0002517515170687771, "loss": 0.9955, "step": 8015 }, { "epoch": 0.97, "grad_norm": 0.19614648818969727, "learning_rate": 0.0002516758848833043, "loss": 0.8943, "step": 8020 }, { "epoch": 0.97, "grad_norm": 0.21292729675769806, "learning_rate": 0.0002516002048457835, "loss": 0.9365, "step": 8025 }, { "epoch": 0.97, "grad_norm": 0.20765379071235657, "learning_rate": 0.0002515244769918323, "loss": 0.8914, "step": 8030 }, { "epoch": 0.97, "grad_norm": 0.24531885981559753, "learning_rate": 0.00025144870135709077, "loss": 0.9454, "step": 8035 }, { "epoch": 0.97, "grad_norm": 0.21342940628528595, "learning_rate": 0.0002513728779772213, "loss": 0.9629, "step": 8040 }, { "epoch": 0.97, "grad_norm": 0.20636995136737823, "learning_rate": 0.00025129700688790896, "loss": 0.9343, "step": 8045 }, { "epoch": 0.97, "grad_norm": 0.20889271795749664, "learning_rate": 0.00025122108812486124, "loss": 0.9349, "step": 8050 }, { "epoch": 0.97, "grad_norm": 0.2028619647026062, "learning_rate": 0.000251145121723808, "loss": 0.9775, "step": 8055 }, { "epoch": 0.97, "grad_norm": 0.20538374781608582, "learning_rate": 0.0002510691077205015, "loss": 0.8841, "step": 8060 }, { "epoch": 0.97, "grad_norm": 0.23155823349952698, "learning_rate": 0.0002509930461507166, "loss": 0.9182, "step": 8065 }, { "epoch": 0.97, "grad_norm": 0.21618753671646118, "learning_rate": 0.00025091693705025023, "loss": 0.8398, "step": 8070 }, { "epoch": 0.97, "grad_norm": 0.2337218075990677, "learning_rate": 0.00025084078045492194, "loss": 0.9557, "step": 8075 }, { "epoch": 0.97, "grad_norm": 0.2220161408185959, "learning_rate": 0.0002507645764005736, "loss": 1.008, "step": 8080 }, { "epoch": 0.97, "grad_norm": 0.20406211912631989, "learning_rate": 0.00025068832492306924, "loss": 0.8859, "step": 8085 }, { "epoch": 0.97, "grad_norm": 0.22060614824295044, "learning_rate": 0.0002506120260582955, "loss": 0.936, "step": 8090 }, { "epoch": 0.98, "grad_norm": 0.2314573973417282, "learning_rate": 0.000250535679842161, "loss": 0.9284, "step": 8095 }, { "epoch": 0.98, "grad_norm": 0.22508397698402405, "learning_rate": 0.00025045928631059694, "loss": 0.9302, "step": 8100 }, { "epoch": 0.98, "grad_norm": 0.2177351713180542, "learning_rate": 0.00025038284549955655, "loss": 0.9366, "step": 8105 }, { "epoch": 0.98, "grad_norm": 0.2541705369949341, "learning_rate": 0.0002503063574450155, "loss": 0.9312, "step": 8110 }, { "epoch": 0.98, "grad_norm": 0.22795630991458893, "learning_rate": 0.0002502298221829715, "loss": 0.8796, "step": 8115 }, { "epoch": 0.98, "grad_norm": 0.23147457838058472, "learning_rate": 0.0002501532397494447, "loss": 0.9392, "step": 8120 }, { "epoch": 0.98, "grad_norm": 0.21217665076255798, "learning_rate": 0.0002500766101804773, "loss": 0.9313, "step": 8125 }, { "epoch": 0.98, "grad_norm": 0.2149830311536789, "learning_rate": 0.0002499999335121337, "loss": 0.8611, "step": 8130 }, { "epoch": 0.98, "grad_norm": 0.22693344950675964, "learning_rate": 0.0002499232097805004, "loss": 0.9367, "step": 8135 }, { "epoch": 0.98, "grad_norm": 0.23371291160583496, "learning_rate": 0.0002498464390216864, "loss": 0.9207, "step": 8140 }, { "epoch": 0.98, "grad_norm": 0.2417604625225067, "learning_rate": 0.00024976962127182224, "loss": 0.9299, "step": 8145 }, { "epoch": 0.98, "grad_norm": 0.2232208102941513, "learning_rate": 0.00024969275656706115, "loss": 0.9032, "step": 8150 }, { "epoch": 0.98, "grad_norm": 0.23492024838924408, "learning_rate": 0.0002496158449435781, "loss": 0.8478, "step": 8155 }, { "epoch": 0.98, "grad_norm": 0.22294215857982635, "learning_rate": 0.00024953888643757026, "loss": 0.9258, "step": 8160 }, { "epoch": 0.98, "grad_norm": 0.20344236493110657, "learning_rate": 0.0002494618810852569, "loss": 0.8963, "step": 8165 }, { "epoch": 0.98, "grad_norm": 0.1954520046710968, "learning_rate": 0.0002493848289228793, "loss": 0.8931, "step": 8170 }, { "epoch": 0.98, "grad_norm": 0.2296024113893509, "learning_rate": 0.00024930772998670074, "loss": 0.931, "step": 8175 }, { "epoch": 0.99, "grad_norm": 0.19687330722808838, "learning_rate": 0.00024923058431300653, "loss": 0.8976, "step": 8180 }, { "epoch": 0.99, "grad_norm": 0.20918093621730804, "learning_rate": 0.000249153391938104, "loss": 0.9273, "step": 8185 }, { "epoch": 0.99, "grad_norm": 0.2444303333759308, "learning_rate": 0.0002490761528983224, "loss": 0.9052, "step": 8190 }, { "epoch": 0.99, "grad_norm": 0.2560185194015503, "learning_rate": 0.00024899886723001307, "loss": 0.8845, "step": 8195 }, { "epoch": 0.99, "grad_norm": 0.20742972195148468, "learning_rate": 0.00024892153496954917, "loss": 0.8958, "step": 8200 }, { "epoch": 0.99, "grad_norm": 0.25156137347221375, "learning_rate": 0.0002488441561533258, "loss": 0.862, "step": 8205 }, { "epoch": 0.99, "grad_norm": 0.22066602110862732, "learning_rate": 0.00024876673081776, "loss": 0.9099, "step": 8210 }, { "epoch": 0.99, "grad_norm": 0.2297365367412567, "learning_rate": 0.00024868925899929084, "loss": 0.8339, "step": 8215 }, { "epoch": 0.99, "grad_norm": 0.23268838226795197, "learning_rate": 0.0002486117407343789, "loss": 0.8629, "step": 8220 }, { "epoch": 0.99, "grad_norm": 0.20442204177379608, "learning_rate": 0.0002485341760595071, "loss": 0.929, "step": 8225 }, { "epoch": 0.99, "grad_norm": 0.2177409529685974, "learning_rate": 0.0002484565650111798, "loss": 0.9028, "step": 8230 }, { "epoch": 0.99, "grad_norm": 0.2090945988893509, "learning_rate": 0.0002483789076259233, "loss": 0.9519, "step": 8235 }, { "epoch": 0.99, "grad_norm": 0.20811286568641663, "learning_rate": 0.00024830120394028586, "loss": 0.9343, "step": 8240 }, { "epoch": 0.99, "grad_norm": 0.21751175820827484, "learning_rate": 0.0002482234539908374, "loss": 0.9037, "step": 8245 }, { "epoch": 0.99, "grad_norm": 0.20378360152244568, "learning_rate": 0.0002481456578141695, "loss": 0.9319, "step": 8250 }, { "epoch": 0.99, "grad_norm": 0.21400323510169983, "learning_rate": 0.00024806781544689575, "loss": 0.8737, "step": 8255 }, { "epoch": 1.0, "grad_norm": 0.19827169179916382, "learning_rate": 0.00024798992692565136, "loss": 0.9123, "step": 8260 }, { "epoch": 1.0, "grad_norm": 0.23193618655204773, "learning_rate": 0.00024791199228709317, "loss": 0.9127, "step": 8265 }, { "epoch": 1.0, "grad_norm": 0.24364005029201508, "learning_rate": 0.00024783401156789985, "loss": 0.9055, "step": 8270 }, { "epoch": 1.0, "grad_norm": 0.23433059453964233, "learning_rate": 0.00024775598480477175, "loss": 1.01, "step": 8275 }, { "epoch": 1.0, "grad_norm": 0.20962095260620117, "learning_rate": 0.0002476779120344308, "loss": 0.8084, "step": 8280 }, { "epoch": 1.0, "grad_norm": 0.2257934808731079, "learning_rate": 0.00024759979329362067, "loss": 0.9176, "step": 8285 }, { "epoch": 1.0, "grad_norm": 0.21964751183986664, "learning_rate": 0.0002475216286191067, "loss": 0.8642, "step": 8290 }, { "epoch": 1.0, "grad_norm": 0.21779011189937592, "learning_rate": 0.00024744341804767555, "loss": 0.9282, "step": 8295 }, { "epoch": 1.0, "grad_norm": 0.21334509551525116, "learning_rate": 0.000247365161616136, "loss": 0.8526, "step": 8300 }, { "epoch": 1.0, "grad_norm": 0.21431876718997955, "learning_rate": 0.00024728685936131794, "loss": 0.8295, "step": 8305 }, { "epoch": 1.0, "grad_norm": 0.22023451328277588, "learning_rate": 0.0002472085113200731, "loss": 0.9086, "step": 8310 }, { "epoch": 1.0, "grad_norm": 0.20858727395534515, "learning_rate": 0.0002471301175292746, "loss": 0.9956, "step": 8315 }, { "epoch": 1.0, "grad_norm": 0.19900618493556976, "learning_rate": 0.00024705167802581727, "loss": 0.9715, "step": 8320 }, { "epoch": 1.0, "grad_norm": 0.21103012561798096, "learning_rate": 0.0002469731928466172, "loss": 0.8762, "step": 8325 }, { "epoch": 1.0, "grad_norm": 0.2619311809539795, "learning_rate": 0.0002468946620286122, "loss": 0.9099, "step": 8330 }, { "epoch": 1.0, "grad_norm": 0.22611823678016663, "learning_rate": 0.0002468160856087615, "loss": 0.9334, "step": 8335 }, { "epoch": 1.0, "grad_norm": 0.20762339234352112, "learning_rate": 0.0002467374636240458, "loss": 0.9715, "step": 8340 }, { "epoch": 1.01, "grad_norm": 0.20186099410057068, "learning_rate": 0.0002466587961114671, "loss": 0.8772, "step": 8345 }, { "epoch": 1.01, "grad_norm": 0.2584278881549835, "learning_rate": 0.000246580083108049, "loss": 0.9342, "step": 8350 }, { "epoch": 1.01, "grad_norm": 0.20932774245738983, "learning_rate": 0.0002465013246508365, "loss": 0.9424, "step": 8355 }, { "epoch": 1.01, "grad_norm": 0.22265595197677612, "learning_rate": 0.0002464225207768959, "loss": 0.8854, "step": 8360 }, { "epoch": 1.01, "grad_norm": 0.24274423718452454, "learning_rate": 0.0002463436715233149, "loss": 0.9185, "step": 8365 }, { "epoch": 1.01, "grad_norm": 0.22588522732257843, "learning_rate": 0.0002462647769272027, "loss": 0.8415, "step": 8370 }, { "epoch": 1.01, "grad_norm": 0.21932215988636017, "learning_rate": 0.00024618583702568954, "loss": 0.9426, "step": 8375 }, { "epoch": 1.01, "grad_norm": 0.2024673968553543, "learning_rate": 0.0002461068518559273, "loss": 0.833, "step": 8380 }, { "epoch": 1.01, "grad_norm": 0.22020402550697327, "learning_rate": 0.00024602782145508885, "loss": 0.8884, "step": 8385 }, { "epoch": 1.01, "grad_norm": 0.24328754842281342, "learning_rate": 0.00024594874586036876, "loss": 0.8266, "step": 8390 }, { "epoch": 1.01, "grad_norm": 0.23017850518226624, "learning_rate": 0.00024586962510898244, "loss": 0.96, "step": 8395 }, { "epoch": 1.01, "grad_norm": 0.22810372710227966, "learning_rate": 0.0002457904592381668, "loss": 0.9248, "step": 8400 }, { "epoch": 1.01, "grad_norm": 0.23655059933662415, "learning_rate": 0.00024571124828518003, "loss": 0.9204, "step": 8405 }, { "epoch": 1.01, "grad_norm": 0.24605585634708405, "learning_rate": 0.0002456319922873013, "loss": 0.8909, "step": 8410 }, { "epoch": 1.01, "grad_norm": 0.22107426822185516, "learning_rate": 0.00024555269128183116, "loss": 0.8827, "step": 8415 }, { "epoch": 1.01, "grad_norm": 0.24414613842964172, "learning_rate": 0.00024547334530609124, "loss": 0.8416, "step": 8420 }, { "epoch": 1.02, "grad_norm": 0.23650889098644257, "learning_rate": 0.00024539395439742453, "loss": 0.8868, "step": 8425 }, { "epoch": 1.02, "grad_norm": 0.23410624265670776, "learning_rate": 0.000245314518593195, "loss": 1.0127, "step": 8430 }, { "epoch": 1.02, "grad_norm": 0.2250613421201706, "learning_rate": 0.0002452350379307876, "loss": 0.9244, "step": 8435 }, { "epoch": 1.02, "grad_norm": 0.22231152653694153, "learning_rate": 0.00024515551244760865, "loss": 0.9534, "step": 8440 }, { "epoch": 1.02, "grad_norm": 0.2265365868806839, "learning_rate": 0.0002450759421810856, "loss": 0.8905, "step": 8445 }, { "epoch": 1.02, "grad_norm": 0.21347838640213013, "learning_rate": 0.0002449963271686668, "loss": 0.9037, "step": 8450 }, { "epoch": 1.02, "grad_norm": 0.23790526390075684, "learning_rate": 0.0002449166674478217, "loss": 0.8819, "step": 8455 }, { "epoch": 1.02, "grad_norm": 0.2057124823331833, "learning_rate": 0.0002448369630560408, "loss": 0.9506, "step": 8460 }, { "epoch": 1.02, "grad_norm": 0.22661954164505005, "learning_rate": 0.00024475721403083566, "loss": 0.8741, "step": 8465 }, { "epoch": 1.02, "grad_norm": 0.2240198701620102, "learning_rate": 0.0002446774204097388, "loss": 0.8722, "step": 8470 }, { "epoch": 1.02, "grad_norm": 0.19682292640209198, "learning_rate": 0.0002445975822303038, "loss": 0.8477, "step": 8475 }, { "epoch": 1.02, "grad_norm": 0.20487092435359955, "learning_rate": 0.00024451769953010504, "loss": 0.8088, "step": 8480 }, { "epoch": 1.02, "grad_norm": 0.23260267078876495, "learning_rate": 0.00024443777234673807, "loss": 0.8667, "step": 8485 }, { "epoch": 1.02, "grad_norm": 0.2150842696428299, "learning_rate": 0.00024435780071781926, "loss": 0.8324, "step": 8490 }, { "epoch": 1.02, "grad_norm": 0.24172092974185944, "learning_rate": 0.00024427778468098587, "loss": 0.8618, "step": 8495 }, { "epoch": 1.02, "grad_norm": 0.23562046885490417, "learning_rate": 0.0002441977242738962, "loss": 0.9769, "step": 8500 }, { "epoch": 1.02, "grad_norm": 0.2472938448190689, "learning_rate": 0.00024411761953422922, "loss": 0.8776, "step": 8505 }, { "epoch": 1.03, "grad_norm": 0.23211072385311127, "learning_rate": 0.000244037470499685, "loss": 0.8499, "step": 8510 }, { "epoch": 1.03, "grad_norm": 0.2152828425168991, "learning_rate": 0.00024395727720798424, "loss": 0.8764, "step": 8515 }, { "epoch": 1.03, "grad_norm": 0.21999038755893707, "learning_rate": 0.0002438770396968686, "loss": 0.8729, "step": 8520 }, { "epoch": 1.03, "grad_norm": 0.21073397994041443, "learning_rate": 0.0002437967580041005, "loss": 0.8665, "step": 8525 }, { "epoch": 1.03, "grad_norm": 0.21868467330932617, "learning_rate": 0.00024371643216746324, "loss": 0.8653, "step": 8530 }, { "epoch": 1.03, "grad_norm": 0.22046944499015808, "learning_rate": 0.00024363606222476075, "loss": 0.8832, "step": 8535 }, { "epoch": 1.03, "grad_norm": 0.23462894558906555, "learning_rate": 0.00024355564821381784, "loss": 0.8693, "step": 8540 }, { "epoch": 1.03, "grad_norm": 0.20430710911750793, "learning_rate": 0.00024347519017247995, "loss": 0.9157, "step": 8545 }, { "epoch": 1.03, "grad_norm": 0.2538154721260071, "learning_rate": 0.00024339468813861336, "loss": 0.8323, "step": 8550 }, { "epoch": 1.03, "grad_norm": 0.2316758930683136, "learning_rate": 0.000243314142150105, "loss": 0.8574, "step": 8555 }, { "epoch": 1.03, "grad_norm": 0.239527627825737, "learning_rate": 0.0002432335522448625, "loss": 0.9551, "step": 8560 }, { "epoch": 1.03, "grad_norm": 0.21243155002593994, "learning_rate": 0.00024315291846081406, "loss": 0.9939, "step": 8565 }, { "epoch": 1.03, "grad_norm": 0.23001927137374878, "learning_rate": 0.00024307224083590874, "loss": 0.8392, "step": 8570 }, { "epoch": 1.03, "grad_norm": 0.21947364509105682, "learning_rate": 0.00024299151940811606, "loss": 0.8661, "step": 8575 }, { "epoch": 1.03, "grad_norm": 0.23186787962913513, "learning_rate": 0.0002429107542154261, "loss": 0.8679, "step": 8580 }, { "epoch": 1.03, "grad_norm": 0.2559497058391571, "learning_rate": 0.00024282994529584983, "loss": 0.8598, "step": 8585 }, { "epoch": 1.04, "grad_norm": 0.23815637826919556, "learning_rate": 0.00024274909268741848, "loss": 0.8349, "step": 8590 }, { "epoch": 1.04, "grad_norm": 0.2084992676973343, "learning_rate": 0.00024266819642818405, "loss": 0.8345, "step": 8595 }, { "epoch": 1.04, "grad_norm": 0.2263403832912445, "learning_rate": 0.0002425872565562189, "loss": 0.8879, "step": 8600 }, { "epoch": 1.04, "grad_norm": 0.23247919976711273, "learning_rate": 0.00024250627310961614, "loss": 0.7877, "step": 8605 }, { "epoch": 1.04, "grad_norm": 0.21611854434013367, "learning_rate": 0.00024242524612648917, "loss": 0.8724, "step": 8610 }, { "epoch": 1.04, "grad_norm": 0.2197057604789734, "learning_rate": 0.00024234417564497206, "loss": 0.8138, "step": 8615 }, { "epoch": 1.04, "grad_norm": 0.2542968988418579, "learning_rate": 0.00024226306170321924, "loss": 0.9583, "step": 8620 }, { "epoch": 1.04, "grad_norm": 0.21506857872009277, "learning_rate": 0.00024218190433940558, "loss": 0.9456, "step": 8625 }, { "epoch": 1.04, "grad_norm": 0.21320503950119019, "learning_rate": 0.0002421007035917265, "loss": 0.9033, "step": 8630 }, { "epoch": 1.04, "grad_norm": 0.23590131103992462, "learning_rate": 0.00024201945949839775, "loss": 0.9297, "step": 8635 }, { "epoch": 1.04, "grad_norm": 0.23142153024673462, "learning_rate": 0.0002419381720976555, "loss": 0.9025, "step": 8640 }, { "epoch": 1.04, "grad_norm": 0.2024351954460144, "learning_rate": 0.00024185684142775623, "loss": 0.9095, "step": 8645 }, { "epoch": 1.04, "grad_norm": 0.2539699077606201, "learning_rate": 0.00024177546752697697, "loss": 0.8968, "step": 8650 }, { "epoch": 1.04, "grad_norm": 0.22273805737495422, "learning_rate": 0.00024169405043361492, "loss": 0.8366, "step": 8655 }, { "epoch": 1.04, "grad_norm": 0.2413063645362854, "learning_rate": 0.00024161259018598764, "loss": 0.8781, "step": 8660 }, { "epoch": 1.04, "grad_norm": 0.21893925964832306, "learning_rate": 0.00024153108682243307, "loss": 0.8827, "step": 8665 }, { "epoch": 1.04, "grad_norm": 0.23175576329231262, "learning_rate": 0.00024144954038130936, "loss": 0.8959, "step": 8670 }, { "epoch": 1.05, "grad_norm": 0.2520297169685364, "learning_rate": 0.00024136795090099502, "loss": 0.9922, "step": 8675 }, { "epoch": 1.05, "grad_norm": 0.21685057878494263, "learning_rate": 0.00024128631841988877, "loss": 0.9727, "step": 8680 }, { "epoch": 1.05, "grad_norm": 0.22948361933231354, "learning_rate": 0.00024120464297640955, "loss": 0.8525, "step": 8685 }, { "epoch": 1.05, "grad_norm": 0.24964188039302826, "learning_rate": 0.00024112292460899652, "loss": 0.8168, "step": 8690 }, { "epoch": 1.05, "grad_norm": 0.27692463994026184, "learning_rate": 0.00024104116335610905, "loss": 0.957, "step": 8695 }, { "epoch": 1.05, "grad_norm": 0.24565111100673676, "learning_rate": 0.00024095935925622675, "loss": 0.8839, "step": 8700 }, { "epoch": 1.05, "grad_norm": 0.20882341265678406, "learning_rate": 0.00024087751234784933, "loss": 0.9486, "step": 8705 }, { "epoch": 1.05, "grad_norm": 0.23391057550907135, "learning_rate": 0.0002407956226694966, "loss": 0.9281, "step": 8710 }, { "epoch": 1.05, "grad_norm": 0.2399512678384781, "learning_rate": 0.00024071369025970867, "loss": 0.7803, "step": 8715 }, { "epoch": 1.05, "grad_norm": 0.21823789179325104, "learning_rate": 0.00024063171515704556, "loss": 0.8684, "step": 8720 }, { "epoch": 1.05, "grad_norm": 0.21568119525909424, "learning_rate": 0.0002405496974000875, "loss": 0.7765, "step": 8725 }, { "epoch": 1.05, "grad_norm": 0.2163381576538086, "learning_rate": 0.00024046763702743478, "loss": 0.8652, "step": 8730 }, { "epoch": 1.05, "grad_norm": 0.22184047102928162, "learning_rate": 0.00024038553407770778, "loss": 0.8446, "step": 8735 }, { "epoch": 1.05, "grad_norm": 0.22127588093280792, "learning_rate": 0.00024030338858954678, "loss": 0.8153, "step": 8740 }, { "epoch": 1.05, "grad_norm": 0.25217580795288086, "learning_rate": 0.0002402212006016123, "loss": 0.8407, "step": 8745 }, { "epoch": 1.05, "grad_norm": 0.22362253069877625, "learning_rate": 0.0002401389701525846, "loss": 0.7756, "step": 8750 }, { "epoch": 1.05, "grad_norm": 0.2407907396554947, "learning_rate": 0.00024005669728116417, "loss": 0.8441, "step": 8755 }, { "epoch": 1.06, "grad_norm": 0.23379315435886383, "learning_rate": 0.00023997438202607124, "loss": 0.8697, "step": 8760 }, { "epoch": 1.06, "grad_norm": 0.23524151742458344, "learning_rate": 0.00023989202442604621, "loss": 0.9377, "step": 8765 }, { "epoch": 1.06, "grad_norm": 0.22088152170181274, "learning_rate": 0.00023980962451984928, "loss": 0.8814, "step": 8770 }, { "epoch": 1.06, "grad_norm": 0.24777436256408691, "learning_rate": 0.00023972718234626056, "loss": 0.7183, "step": 8775 }, { "epoch": 1.06, "grad_norm": 0.22386722266674042, "learning_rate": 0.00023964469794408006, "loss": 0.9437, "step": 8780 }, { "epoch": 1.06, "grad_norm": 0.2572818100452423, "learning_rate": 0.0002395621713521277, "loss": 0.9118, "step": 8785 }, { "epoch": 1.06, "grad_norm": 0.23130157589912415, "learning_rate": 0.00023947960260924326, "loss": 0.8401, "step": 8790 }, { "epoch": 1.06, "grad_norm": 0.26878032088279724, "learning_rate": 0.0002393969917542863, "loss": 0.9485, "step": 8795 }, { "epoch": 1.06, "grad_norm": 0.21913166344165802, "learning_rate": 0.00023931433882613617, "loss": 0.8719, "step": 8800 }, { "epoch": 1.06, "grad_norm": 0.2042398601770401, "learning_rate": 0.00023923164386369225, "loss": 0.8471, "step": 8805 }, { "epoch": 1.06, "grad_norm": NaN, "learning_rate": 0.00023916545765519838, "loss": 1.0099, "step": 8810 }, { "epoch": 1.06, "grad_norm": 0.2109871208667755, "learning_rate": 0.0002390826871291146, "loss": 0.9514, "step": 8815 }, { "epoch": 1.06, "grad_norm": 0.24620996415615082, "learning_rate": 0.00023899987467775985, "loss": 0.794, "step": 8820 }, { "epoch": 1.06, "grad_norm": 0.21983186900615692, "learning_rate": 0.00023891702034010856, "loss": 0.8971, "step": 8825 }, { "epoch": 1.06, "grad_norm": 0.23902229964733124, "learning_rate": 0.00023883412415515458, "loss": 0.874, "step": 8830 }, { "epoch": 1.06, "grad_norm": 0.2428964078426361, "learning_rate": 0.0002387511861619117, "loss": 0.9071, "step": 8835 }, { "epoch": 1.07, "grad_norm": 0.22053857147693634, "learning_rate": 0.00023866820639941328, "loss": 0.8574, "step": 8840 }, { "epoch": 1.07, "grad_norm": 0.20824211835861206, "learning_rate": 0.0002385851849067124, "loss": 0.9146, "step": 8845 }, { "epoch": 1.07, "grad_norm": 0.21827155351638794, "learning_rate": 0.0002385021217228816, "loss": 0.8853, "step": 8850 }, { "epoch": 1.07, "grad_norm": 0.24931156635284424, "learning_rate": 0.0002384190168870133, "loss": 0.8394, "step": 8855 }, { "epoch": 1.07, "grad_norm": 0.23989886045455933, "learning_rate": 0.00023833587043821933, "loss": 0.8461, "step": 8860 }, { "epoch": 1.07, "grad_norm": 0.25101611018180847, "learning_rate": 0.00023825268241563121, "loss": 0.8247, "step": 8865 }, { "epoch": 1.07, "grad_norm": 0.22489690780639648, "learning_rate": 0.00023816945285839994, "loss": 0.918, "step": 8870 }, { "epoch": 1.07, "grad_norm": 0.22989100217819214, "learning_rate": 0.00023808618180569613, "loss": 0.8747, "step": 8875 }, { "epoch": 1.07, "grad_norm": 0.22614827752113342, "learning_rate": 0.00023800286929670996, "loss": 0.8885, "step": 8880 }, { "epoch": 1.07, "grad_norm": 0.213409423828125, "learning_rate": 0.00023791951537065098, "loss": 0.9846, "step": 8885 }, { "epoch": 1.07, "grad_norm": 0.22785250842571259, "learning_rate": 0.00023783612006674835, "loss": 0.9043, "step": 8890 }, { "epoch": 1.07, "grad_norm": 0.2558668553829193, "learning_rate": 0.00023775268342425071, "loss": 0.8575, "step": 8895 }, { "epoch": 1.07, "grad_norm": 0.2358073592185974, "learning_rate": 0.00023766920548242597, "loss": 0.8788, "step": 8900 }, { "epoch": 1.07, "grad_norm": 0.2330961525440216, "learning_rate": 0.00023758568628056185, "loss": 0.8552, "step": 8905 }, { "epoch": 1.07, "grad_norm": 0.26288071274757385, "learning_rate": 0.00023750212585796506, "loss": 0.9431, "step": 8910 }, { "epoch": 1.07, "grad_norm": 0.22953157126903534, "learning_rate": 0.000237418524253962, "loss": 0.8415, "step": 8915 }, { "epoch": 1.07, "grad_norm": 0.23770654201507568, "learning_rate": 0.00023733488150789832, "loss": 0.9387, "step": 8920 }, { "epoch": 1.08, "grad_norm": 0.24671205878257751, "learning_rate": 0.00023725119765913915, "loss": 0.8804, "step": 8925 }, { "epoch": 1.08, "grad_norm": 0.2374102622270584, "learning_rate": 0.00023716747274706886, "loss": 1.0063, "step": 8930 }, { "epoch": 1.08, "grad_norm": 0.24478651583194733, "learning_rate": 0.0002370837068110911, "loss": 0.9604, "step": 8935 }, { "epoch": 1.08, "grad_norm": 0.22993561625480652, "learning_rate": 0.00023699989989062892, "loss": 0.861, "step": 8940 }, { "epoch": 1.08, "grad_norm": 0.2684665620326996, "learning_rate": 0.0002369160520251248, "loss": 0.8511, "step": 8945 }, { "epoch": 1.08, "grad_norm": 0.25119537115097046, "learning_rate": 0.00023683216325404009, "loss": 0.8144, "step": 8950 }, { "epoch": 1.08, "grad_norm": 0.22731202840805054, "learning_rate": 0.0002367482336168558, "loss": 0.8949, "step": 8955 }, { "epoch": 1.08, "grad_norm": 0.23801319301128387, "learning_rate": 0.00023666426315307188, "loss": 0.9336, "step": 8960 }, { "epoch": 1.08, "grad_norm": 0.24626471102237701, "learning_rate": 0.00023658025190220774, "loss": 0.8794, "step": 8965 }, { "epoch": 1.08, "grad_norm": 0.235481858253479, "learning_rate": 0.00023649619990380184, "loss": 0.8264, "step": 8970 }, { "epoch": 1.08, "grad_norm": 0.2578223645687103, "learning_rate": 0.00023641210719741175, "loss": 0.9651, "step": 8975 }, { "epoch": 1.08, "grad_norm": 0.24087263643741608, "learning_rate": 0.00023632797382261441, "loss": 0.8688, "step": 8980 }, { "epoch": 1.08, "grad_norm": 0.2264793962240219, "learning_rate": 0.00023624379981900572, "loss": 0.8988, "step": 8985 }, { "epoch": 1.08, "grad_norm": 0.21337303519248962, "learning_rate": 0.00023615958522620078, "loss": 0.8212, "step": 8990 }, { "epoch": 1.08, "grad_norm": 0.24498280882835388, "learning_rate": 0.00023607533008383373, "loss": 0.8061, "step": 8995 }, { "epoch": 1.08, "grad_norm": 0.23145338892936707, "learning_rate": 0.00023599103443155788, "loss": 0.8282, "step": 9000 }, { "epoch": 1.09, "grad_norm": 0.2749665379524231, "learning_rate": 0.00023590669830904554, "loss": 0.9067, "step": 9005 }, { "epoch": 1.09, "grad_norm": 0.23750852048397064, "learning_rate": 0.00023582232175598812, "loss": 0.9132, "step": 9010 }, { "epoch": 1.09, "grad_norm": 0.22623078525066376, "learning_rate": 0.000235737904812096, "loss": 0.9233, "step": 9015 }, { "epoch": 1.09, "grad_norm": 0.24327704310417175, "learning_rate": 0.00023567034220227885, "loss": 0.9254, "step": 9020 }, { "epoch": 1.09, "grad_norm": 0.23452426493167877, "learning_rate": 0.00023558585265501518, "loss": 0.789, "step": 9025 }, { "epoch": 1.09, "grad_norm": 0.24461832642555237, "learning_rate": 0.00023550132282820706, "loss": 0.9243, "step": 9030 }, { "epoch": 1.09, "grad_norm": 0.22294314205646515, "learning_rate": 0.00023541675276163697, "loss": 0.8918, "step": 9035 }, { "epoch": 1.09, "grad_norm": 0.2230527549982071, "learning_rate": 0.00023533214249510647, "loss": 0.8376, "step": 9040 }, { "epoch": 1.09, "grad_norm": 0.26348450779914856, "learning_rate": 0.00023524749206843586, "loss": 0.9469, "step": 9045 }, { "epoch": 1.09, "grad_norm": 0.2523627281188965, "learning_rate": 0.00023516280152146454, "loss": 0.881, "step": 9050 }, { "epoch": 1.09, "grad_norm": 0.2316828817129135, "learning_rate": 0.00023507807089405064, "loss": 0.8915, "step": 9055 }, { "epoch": 1.09, "grad_norm": 0.21704962849617004, "learning_rate": 0.00023499330022607124, "loss": 0.8677, "step": 9060 }, { "epoch": 1.09, "grad_norm": 0.26538321375846863, "learning_rate": 0.0002349084895574222, "loss": 0.8538, "step": 9065 }, { "epoch": 1.09, "grad_norm": 0.21919500827789307, "learning_rate": 0.00023482363892801827, "loss": 0.8222, "step": 9070 }, { "epoch": 1.09, "grad_norm": 0.27171579003334045, "learning_rate": 0.00023473874837779294, "loss": 0.8996, "step": 9075 }, { "epoch": 1.09, "grad_norm": 0.20920461416244507, "learning_rate": 0.0002346538179466985, "loss": 0.8708, "step": 9080 }, { "epoch": 1.09, "grad_norm": 0.23751945793628693, "learning_rate": 0.00023456884767470614, "loss": 0.9058, "step": 9085 }, { "epoch": 1.1, "grad_norm": 0.23487117886543274, "learning_rate": 0.0002344838376018056, "loss": 0.9029, "step": 9090 }, { "epoch": 1.1, "grad_norm": 0.20502355694770813, "learning_rate": 0.00023439878776800542, "loss": 0.9057, "step": 9095 }, { "epoch": 1.1, "grad_norm": 0.26424431800842285, "learning_rate": 0.00023431369821333293, "loss": 0.9132, "step": 9100 }, { "epoch": 1.1, "grad_norm": 0.2559512257575989, "learning_rate": 0.00023422856897783412, "loss": 0.8964, "step": 9105 }, { "epoch": 1.1, "grad_norm": 0.21677064895629883, "learning_rate": 0.0002341434001015736, "loss": 0.851, "step": 9110 }, { "epoch": 1.1, "grad_norm": 0.2509647607803345, "learning_rate": 0.00023405819162463466, "loss": 0.8652, "step": 9115 }, { "epoch": 1.1, "grad_norm": 0.20928482711315155, "learning_rate": 0.00023397294358711924, "loss": 0.8989, "step": 9120 }, { "epoch": 1.1, "grad_norm": 0.2391272932291031, "learning_rate": 0.00023388765602914792, "loss": 1.0029, "step": 9125 }, { "epoch": 1.1, "grad_norm": 0.21468204259872437, "learning_rate": 0.0002338023289908599, "loss": 0.817, "step": 9130 }, { "epoch": 1.1, "grad_norm": 0.2177235335111618, "learning_rate": 0.00023371696251241279, "loss": 0.8765, "step": 9135 }, { "epoch": 1.1, "grad_norm": 0.2142833024263382, "learning_rate": 0.000233631556633983, "loss": 0.8559, "step": 9140 }, { "epoch": 1.1, "grad_norm": 0.22310085594654083, "learning_rate": 0.00023354611139576536, "loss": 0.8448, "step": 9145 }, { "epoch": 1.1, "grad_norm": 0.2540875971317291, "learning_rate": 0.00023346062683797324, "loss": 0.7871, "step": 9150 }, { "epoch": 1.1, "grad_norm": 0.2475668340921402, "learning_rate": 0.0002333751030008384, "loss": 0.923, "step": 9155 }, { "epoch": 1.1, "grad_norm": 0.24676388502120972, "learning_rate": 0.0002332895399246114, "loss": 0.792, "step": 9160 }, { "epoch": 1.1, "grad_norm": 0.22518321871757507, "learning_rate": 0.00023320393764956086, "loss": 0.8875, "step": 9165 }, { "epoch": 1.1, "grad_norm": 0.24502132833003998, "learning_rate": 0.00023311829621597418, "loss": 0.9119, "step": 9170 }, { "epoch": 1.11, "grad_norm": 0.21606354415416718, "learning_rate": 0.00023303261566415704, "loss": 0.908, "step": 9175 }, { "epoch": 1.11, "grad_norm": 0.2341851145029068, "learning_rate": 0.00023294689603443352, "loss": 0.8705, "step": 9180 }, { "epoch": 1.11, "grad_norm": 0.23818925023078918, "learning_rate": 0.00023286113736714612, "loss": 0.9589, "step": 9185 }, { "epoch": 1.11, "grad_norm": 0.2910539507865906, "learning_rate": 0.0002327753397026558, "loss": 0.8729, "step": 9190 }, { "epoch": 1.11, "grad_norm": 0.24293243885040283, "learning_rate": 0.0002326895030813417, "loss": 0.8785, "step": 9195 }, { "epoch": 1.11, "grad_norm": 0.22408191859722137, "learning_rate": 0.0002326036275436014, "loss": 0.8641, "step": 9200 }, { "epoch": 1.11, "grad_norm": 0.2291467785835266, "learning_rate": 0.00023251771312985084, "loss": 0.8185, "step": 9205 }, { "epoch": 1.11, "grad_norm": 0.24368716776371002, "learning_rate": 0.0002324317598805241, "loss": 0.8201, "step": 9210 }, { "epoch": 1.11, "grad_norm": 0.24222686886787415, "learning_rate": 0.00023234576783607373, "loss": 0.9468, "step": 9215 }, { "epoch": 1.11, "grad_norm": 0.23429812490940094, "learning_rate": 0.00023225973703697037, "loss": 0.9275, "step": 9220 }, { "epoch": 1.11, "grad_norm": 0.23759447038173676, "learning_rate": 0.000232173667523703, "loss": 0.8471, "step": 9225 }, { "epoch": 1.11, "grad_norm": 0.2510417401790619, "learning_rate": 0.00023208755933677881, "loss": 0.8939, "step": 9230 }, { "epoch": 1.11, "grad_norm": 0.23719042539596558, "learning_rate": 0.00023200141251672314, "loss": 0.8509, "step": 9235 }, { "epoch": 1.11, "grad_norm": 0.21512798964977264, "learning_rate": 0.0002319152271040796, "loss": 0.8789, "step": 9240 }, { "epoch": 1.11, "grad_norm": 0.2508089542388916, "learning_rate": 0.00023182900313940979, "loss": 0.8954, "step": 9245 }, { "epoch": 1.11, "grad_norm": 0.22558368742465973, "learning_rate": 0.00023174274066329367, "loss": 0.9115, "step": 9250 }, { "epoch": 1.12, "grad_norm": 0.2333495169878006, "learning_rate": 0.00023165643971632924, "loss": 0.9437, "step": 9255 }, { "epoch": 1.12, "grad_norm": 0.27782970666885376, "learning_rate": 0.00023157010033913252, "loss": 0.9699, "step": 9260 }, { "epoch": 1.12, "grad_norm": 0.2564479410648346, "learning_rate": 0.0002314837225723377, "loss": 0.8747, "step": 9265 }, { "epoch": 1.12, "grad_norm": 0.2404770702123642, "learning_rate": 0.0002313973064565971, "loss": 0.891, "step": 9270 }, { "epoch": 1.12, "grad_norm": 0.22722746431827545, "learning_rate": 0.00023131085203258092, "loss": 0.8386, "step": 9275 }, { "epoch": 1.12, "grad_norm": 0.21286796033382416, "learning_rate": 0.00023122435934097755, "loss": 0.9262, "step": 9280 }, { "epoch": 1.12, "grad_norm": 0.22385799884796143, "learning_rate": 0.00023113782842249328, "loss": 0.857, "step": 9285 }, { "epoch": 1.12, "grad_norm": 0.222075417637825, "learning_rate": 0.00023105125931785245, "loss": 0.894, "step": 9290 }, { "epoch": 1.12, "grad_norm": 0.23088407516479492, "learning_rate": 0.00023096465206779736, "loss": 0.8705, "step": 9295 }, { "epoch": 1.12, "grad_norm": 0.2188183218240738, "learning_rate": 0.00023087800671308826, "loss": 0.8935, "step": 9300 }, { "epoch": 1.12, "grad_norm": 0.2047409862279892, "learning_rate": 0.0002307913232945033, "loss": 0.8515, "step": 9305 }, { "epoch": 1.12, "grad_norm": 0.23035292327404022, "learning_rate": 0.00023070460185283862, "loss": 0.9546, "step": 9310 }, { "epoch": 1.12, "grad_norm": 0.2521415054798126, "learning_rate": 0.00023061784242890817, "loss": 0.8708, "step": 9315 }, { "epoch": 1.12, "grad_norm": 0.2409847378730774, "learning_rate": 0.00023053104506354387, "loss": 0.857, "step": 9320 }, { "epoch": 1.12, "grad_norm": 0.31675606966018677, "learning_rate": 0.0002304442097975954, "loss": 0.9459, "step": 9325 }, { "epoch": 1.12, "grad_norm": 0.30147692561149597, "learning_rate": 0.00023035733667193034, "loss": 0.8456, "step": 9330 }, { "epoch": 1.12, "grad_norm": 0.24489833414554596, "learning_rate": 0.00023027042572743405, "loss": 0.8259, "step": 9335 }, { "epoch": 1.13, "grad_norm": 0.2613428831100464, "learning_rate": 0.00023018347700500973, "loss": 1.0109, "step": 9340 }, { "epoch": 1.13, "grad_norm": 0.24886786937713623, "learning_rate": 0.00023009649054557828, "loss": 0.8927, "step": 9345 }, { "epoch": 1.13, "grad_norm": 0.2450346052646637, "learning_rate": 0.00023000946639007848, "loss": 0.9878, "step": 9350 }, { "epoch": 1.13, "grad_norm": 0.2285212278366089, "learning_rate": 0.00022992240457946674, "loss": 0.8819, "step": 9355 }, { "epoch": 1.13, "grad_norm": 0.22266454994678497, "learning_rate": 0.00022983530515471732, "loss": 0.8673, "step": 9360 }, { "epoch": 1.13, "grad_norm": 0.228920578956604, "learning_rate": 0.000229748168156822, "loss": 0.9104, "step": 9365 }, { "epoch": 1.13, "grad_norm": 0.22747859358787537, "learning_rate": 0.00022966099362679038, "loss": 0.8806, "step": 9370 }, { "epoch": 1.13, "grad_norm": 0.24505122005939484, "learning_rate": 0.00022957378160564973, "loss": 0.9054, "step": 9375 }, { "epoch": 1.13, "grad_norm": 0.22118686139583588, "learning_rate": 0.00022948653213444487, "loss": 0.8939, "step": 9380 }, { "epoch": 1.13, "grad_norm": 0.21411919593811035, "learning_rate": 0.00022939924525423834, "loss": 0.9658, "step": 9385 }, { "epoch": 1.13, "grad_norm": 0.25308582186698914, "learning_rate": 0.00022931192100611022, "loss": 0.9179, "step": 9390 }, { "epoch": 1.13, "grad_norm": 0.24361754953861237, "learning_rate": 0.0002292245594311582, "loss": 0.95, "step": 9395 }, { "epoch": 1.13, "grad_norm": 0.23733997344970703, "learning_rate": 0.00022913716057049757, "loss": 0.8376, "step": 9400 }, { "epoch": 1.13, "grad_norm": 0.23433391749858856, "learning_rate": 0.0002290497244652611, "loss": 0.9291, "step": 9405 }, { "epoch": 1.13, "grad_norm": 0.22158537805080414, "learning_rate": 0.00022896225115659913, "loss": 0.8927, "step": 9410 }, { "epoch": 1.13, "grad_norm": 0.21899032592773438, "learning_rate": 0.00022887474068567952, "loss": 0.8168, "step": 9415 }, { "epoch": 1.14, "grad_norm": 0.23201830685138702, "learning_rate": 0.0002287871930936876, "loss": 0.9301, "step": 9420 }, { "epoch": 1.14, "grad_norm": 0.22301717102527618, "learning_rate": 0.00022869960842182614, "loss": 0.8611, "step": 9425 }, { "epoch": 1.14, "grad_norm": 0.2502361238002777, "learning_rate": 0.00022861198671131542, "loss": 0.9485, "step": 9430 }, { "epoch": 1.14, "grad_norm": 0.24774372577667236, "learning_rate": 0.00022852432800339313, "loss": 0.8949, "step": 9435 }, { "epoch": 1.14, "grad_norm": 0.2235736846923828, "learning_rate": 0.00022843663233931442, "loss": 0.8333, "step": 9440 }, { "epoch": 1.14, "grad_norm": 0.23626188933849335, "learning_rate": 0.0002283488997603517, "loss": 0.8102, "step": 9445 }, { "epoch": 1.14, "grad_norm": 0.25719600915908813, "learning_rate": 0.00022826113030779486, "loss": 0.8408, "step": 9450 }, { "epoch": 1.14, "grad_norm": 0.2473081797361374, "learning_rate": 0.00022817332402295113, "loss": 0.81, "step": 9455 }, { "epoch": 1.14, "grad_norm": 0.20688697695732117, "learning_rate": 0.00022808548094714506, "loss": 0.896, "step": 9460 }, { "epoch": 1.14, "grad_norm": 0.26973938941955566, "learning_rate": 0.0002279976011217185, "loss": 0.878, "step": 9465 }, { "epoch": 1.14, "grad_norm": 0.24812696874141693, "learning_rate": 0.00022790968458803065, "loss": 0.7833, "step": 9470 }, { "epoch": 1.14, "grad_norm": 0.22464975714683533, "learning_rate": 0.00022782173138745793, "loss": 0.8938, "step": 9475 }, { "epoch": 1.14, "grad_norm": 0.23503178358078003, "learning_rate": 0.00022773374156139406, "loss": 0.8603, "step": 9480 }, { "epoch": 1.14, "grad_norm": 0.236893430352211, "learning_rate": 0.00022764571515124994, "loss": 0.8637, "step": 9485 }, { "epoch": 1.14, "grad_norm": 0.24112752079963684, "learning_rate": 0.00022755765219845372, "loss": 0.893, "step": 9490 }, { "epoch": 1.14, "grad_norm": 0.2311786562204361, "learning_rate": 0.00022746955274445077, "loss": 0.8524, "step": 9495 }, { "epoch": 1.14, "grad_norm": 0.22384856641292572, "learning_rate": 0.0002273814168307036, "loss": 0.8552, "step": 9500 }, { "epoch": 1.15, "grad_norm": 0.2306017130613327, "learning_rate": 0.00022729324449869198, "loss": 0.9581, "step": 9505 }, { "epoch": 1.15, "grad_norm": 0.23824800550937653, "learning_rate": 0.0002272050357899126, "loss": 0.8444, "step": 9510 }, { "epoch": 1.15, "grad_norm": 0.22070693969726562, "learning_rate": 0.00022711679074587953, "loss": 0.9218, "step": 9515 }, { "epoch": 1.15, "grad_norm": 0.25660625100135803, "learning_rate": 0.0002270285094081237, "loss": 0.9183, "step": 9520 }, { "epoch": 1.15, "grad_norm": 0.22683514654636383, "learning_rate": 0.00022694019181819337, "loss": 0.8944, "step": 9525 }, { "epoch": 1.15, "grad_norm": 0.2356630563735962, "learning_rate": 0.0002268518380176536, "loss": 0.86, "step": 9530 }, { "epoch": 1.15, "grad_norm": 0.2331274300813675, "learning_rate": 0.00022676344804808675, "loss": 0.9143, "step": 9535 }, { "epoch": 1.15, "grad_norm": 0.22305701673030853, "learning_rate": 0.00022667502195109198, "loss": 0.8527, "step": 9540 }, { "epoch": 1.15, "grad_norm": 0.23889246582984924, "learning_rate": 0.00022658655976828557, "loss": 0.9313, "step": 9545 }, { "epoch": 1.15, "grad_norm": 0.25855275988578796, "learning_rate": 0.00022649806154130078, "loss": 0.9187, "step": 9550 }, { "epoch": 1.15, "grad_norm": 0.21481330692768097, "learning_rate": 0.00022640952731178786, "loss": 0.8408, "step": 9555 }, { "epoch": 1.15, "grad_norm": 0.2593211531639099, "learning_rate": 0.00022632095712141382, "loss": 0.8638, "step": 9560 }, { "epoch": 1.15, "grad_norm": 0.2373592108488083, "learning_rate": 0.0002262323510118629, "loss": 0.8963, "step": 9565 }, { "epoch": 1.15, "grad_norm": 0.2230243980884552, "learning_rate": 0.0002261437090248359, "loss": 0.8791, "step": 9570 }, { "epoch": 1.15, "grad_norm": 0.21514588594436646, "learning_rate": 0.00022605503120205086, "loss": 0.8647, "step": 9575 }, { "epoch": 1.15, "grad_norm": 0.22944727540016174, "learning_rate": 0.0002259663175852424, "loss": 0.9185, "step": 9580 }, { "epoch": 1.15, "grad_norm": 0.2357485294342041, "learning_rate": 0.00022587756821616214, "loss": 0.8481, "step": 9585 }, { "epoch": 1.16, "grad_norm": 0.21694447100162506, "learning_rate": 0.00022578878313657844, "loss": 0.9085, "step": 9590 }, { "epoch": 1.16, "grad_norm": 0.23980222642421722, "learning_rate": 0.00022569996238827654, "loss": 0.8727, "step": 9595 }, { "epoch": 1.16, "grad_norm": 0.21908119320869446, "learning_rate": 0.00022561110601305838, "loss": 0.8425, "step": 9600 }, { "epoch": 1.16, "grad_norm": 0.2655925452709198, "learning_rate": 0.0002255222140527428, "loss": 0.9436, "step": 9605 }, { "epoch": 1.16, "grad_norm": 0.25513389706611633, "learning_rate": 0.0002254332865491653, "loss": 0.9691, "step": 9610 }, { "epoch": 1.16, "grad_norm": 0.2476007342338562, "learning_rate": 0.000225344323544178, "loss": 0.9306, "step": 9615 }, { "epoch": 1.16, "grad_norm": 0.2203802466392517, "learning_rate": 0.00022525532507965004, "loss": 0.9796, "step": 9620 }, { "epoch": 1.16, "grad_norm": 0.21193300187587738, "learning_rate": 0.0002251662911974669, "loss": 0.8913, "step": 9625 }, { "epoch": 1.16, "grad_norm": 0.21106500923633575, "learning_rate": 0.0002250772219395309, "loss": 0.8984, "step": 9630 }, { "epoch": 1.16, "grad_norm": 0.24506857991218567, "learning_rate": 0.00022498811734776103, "loss": 0.8861, "step": 9635 }, { "epoch": 1.16, "grad_norm": 0.22095774114131927, "learning_rate": 0.0002248989774640929, "loss": 0.8482, "step": 9640 }, { "epoch": 1.16, "grad_norm": 0.24854597449302673, "learning_rate": 0.0002248098023304786, "loss": 0.8741, "step": 9645 }, { "epoch": 1.16, "grad_norm": 0.22818954288959503, "learning_rate": 0.00022472059198888698, "loss": 0.8486, "step": 9650 }, { "epoch": 1.16, "grad_norm": 0.24147124588489532, "learning_rate": 0.0002246313464813034, "loss": 0.9675, "step": 9655 }, { "epoch": 1.16, "grad_norm": 0.22634311020374298, "learning_rate": 0.00022454206584972971, "loss": 0.8038, "step": 9660 }, { "epoch": 1.16, "grad_norm": 0.23374037444591522, "learning_rate": 0.00022445275013618444, "loss": 0.9025, "step": 9665 }, { "epoch": 1.17, "grad_norm": 0.22729557752609253, "learning_rate": 0.00022436339938270236, "loss": 0.8884, "step": 9670 }, { "epoch": 1.17, "grad_norm": 0.24079249799251556, "learning_rate": 0.00022427401363133502, "loss": 0.8695, "step": 9675 }, { "epoch": 1.17, "grad_norm": 0.22827574610710144, "learning_rate": 0.0002241845929241503, "loss": 0.8461, "step": 9680 }, { "epoch": 1.17, "grad_norm": 0.22621886432170868, "learning_rate": 0.00022409513730323256, "loss": 1.0162, "step": 9685 }, { "epoch": 1.17, "grad_norm": 0.24320857226848602, "learning_rate": 0.00022400564681068264, "loss": 0.899, "step": 9690 }, { "epoch": 1.17, "grad_norm": 0.24638378620147705, "learning_rate": 0.00022391612148861764, "loss": 0.8847, "step": 9695 }, { "epoch": 1.17, "grad_norm": 0.25771698355674744, "learning_rate": 0.00022382656137917117, "loss": 0.9079, "step": 9700 }, { "epoch": 1.17, "grad_norm": 0.2110559642314911, "learning_rate": 0.0002237369665244932, "loss": 0.9551, "step": 9705 }, { "epoch": 1.17, "grad_norm": 0.220799520611763, "learning_rate": 0.00022364733696675007, "loss": 0.9265, "step": 9710 }, { "epoch": 1.17, "grad_norm": 0.23551765084266663, "learning_rate": 0.00022355767274812442, "loss": 0.9568, "step": 9715 }, { "epoch": 1.17, "grad_norm": 0.22385844588279724, "learning_rate": 0.0002234679739108152, "loss": 0.8029, "step": 9720 }, { "epoch": 1.17, "grad_norm": 0.22753435373306274, "learning_rate": 0.00022337824049703764, "loss": 0.9675, "step": 9725 }, { "epoch": 1.17, "grad_norm": 0.2253807932138443, "learning_rate": 0.00022328847254902333, "loss": 0.8846, "step": 9730 }, { "epoch": 1.17, "grad_norm": 0.20921418070793152, "learning_rate": 0.00022319867010901998, "loss": 0.8575, "step": 9735 }, { "epoch": 1.17, "grad_norm": 0.22992300987243652, "learning_rate": 0.0002231088332192916, "loss": 0.8715, "step": 9740 }, { "epoch": 1.17, "grad_norm": 0.23045524954795837, "learning_rate": 0.00022301896192211847, "loss": 0.8612, "step": 9745 }, { "epoch": 1.17, "grad_norm": 0.2747400999069214, "learning_rate": 0.00022292905625979694, "loss": 0.8996, "step": 9750 }, { "epoch": 1.18, "grad_norm": 0.2662562131881714, "learning_rate": 0.0002228391162746397, "loss": 0.8955, "step": 9755 }, { "epoch": 1.18, "grad_norm": 0.2570030689239502, "learning_rate": 0.00022274914200897533, "loss": 0.9389, "step": 9760 }, { "epoch": 1.18, "grad_norm": 0.21170185506343842, "learning_rate": 0.0002226591335051489, "loss": 0.8996, "step": 9765 }, { "epoch": 1.18, "grad_norm": 0.2330474704504013, "learning_rate": 0.00022256909080552127, "loss": 0.855, "step": 9770 }, { "epoch": 1.18, "grad_norm": 0.23178677260875702, "learning_rate": 0.00022247901395246956, "loss": 0.908, "step": 9775 }, { "epoch": 1.18, "grad_norm": 0.2605157494544983, "learning_rate": 0.00022238890298838696, "loss": 0.9045, "step": 9780 }, { "epoch": 1.18, "grad_norm": 0.2496998906135559, "learning_rate": 0.00022229875795568262, "loss": 0.8768, "step": 9785 }, { "epoch": 1.18, "grad_norm": 0.24557536840438843, "learning_rate": 0.00022220857889678177, "loss": 0.8546, "step": 9790 }, { "epoch": 1.18, "grad_norm": 0.22727739810943604, "learning_rate": 0.00022211836585412582, "loss": 0.8804, "step": 9795 }, { "epoch": 1.18, "grad_norm": 0.21401318907737732, "learning_rate": 0.00022202811887017188, "loss": 0.8564, "step": 9800 }, { "epoch": 1.18, "grad_norm": 0.25362133979797363, "learning_rate": 0.00022193783798739325, "loss": 0.925, "step": 9805 }, { "epoch": 1.18, "grad_norm": 0.24248819053173065, "learning_rate": 0.00022184752324827902, "loss": 0.8071, "step": 9810 }, { "epoch": 1.18, "grad_norm": 0.2690879702568054, "learning_rate": 0.0002217571746953344, "loss": 0.926, "step": 9815 }, { "epoch": 1.18, "grad_norm": 0.24345597624778748, "learning_rate": 0.00022166679237108037, "loss": 0.9282, "step": 9820 }, { "epoch": 1.18, "grad_norm": 0.26057952642440796, "learning_rate": 0.0002215763763180539, "loss": 0.9823, "step": 9825 }, { "epoch": 1.18, "grad_norm": 0.23440931737422943, "learning_rate": 0.00022148592657880768, "loss": 0.8077, "step": 9830 }, { "epoch": 1.19, "grad_norm": 0.258858323097229, "learning_rate": 0.00022139544319591052, "loss": 0.8348, "step": 9835 }, { "epoch": 1.19, "grad_norm": 0.24155326187610626, "learning_rate": 0.00022130492621194681, "loss": 0.854, "step": 9840 }, { "epoch": 1.19, "grad_norm": 0.23018091917037964, "learning_rate": 0.00022121437566951686, "loss": 1.018, "step": 9845 }, { "epoch": 1.19, "grad_norm": 0.21002855896949768, "learning_rate": 0.00022112379161123673, "loss": 0.8137, "step": 9850 }, { "epoch": 1.19, "grad_norm": 0.28612980246543884, "learning_rate": 0.00022103317407973837, "loss": 0.8465, "step": 9855 }, { "epoch": 1.19, "grad_norm": 0.20760983228683472, "learning_rate": 0.00022094252311766929, "loss": 0.9207, "step": 9860 }, { "epoch": 1.19, "grad_norm": 0.23730003833770752, "learning_rate": 0.00022085183876769293, "loss": 0.9247, "step": 9865 }, { "epoch": 1.19, "grad_norm": 0.2215205430984497, "learning_rate": 0.00022076112107248833, "loss": 0.901, "step": 9870 }, { "epoch": 1.19, "grad_norm": 0.2527283728122711, "learning_rate": 0.00022067037007475026, "loss": 0.9053, "step": 9875 }, { "epoch": 1.19, "grad_norm": 0.20407399535179138, "learning_rate": 0.00022057958581718915, "loss": 0.9464, "step": 9880 }, { "epoch": 1.19, "grad_norm": 0.23334871232509613, "learning_rate": 0.00022048876834253103, "loss": 0.8626, "step": 9885 }, { "epoch": 1.19, "grad_norm": 0.22306807339191437, "learning_rate": 0.00022039791769351772, "loss": 0.8706, "step": 9890 }, { "epoch": 1.19, "grad_norm": 0.21715085208415985, "learning_rate": 0.00022030703391290646, "loss": 0.847, "step": 9895 }, { "epoch": 1.19, "grad_norm": 0.24487926065921783, "learning_rate": 0.00022021611704347026, "loss": 0.9132, "step": 9900 }, { "epoch": 1.19, "grad_norm": 0.24505330622196198, "learning_rate": 0.00022012516712799756, "loss": 0.9286, "step": 9905 }, { "epoch": 1.19, "grad_norm": 0.2754554748535156, "learning_rate": 0.00022003418420929243, "loss": 0.885, "step": 9910 }, { "epoch": 1.19, "grad_norm": 0.2651885449886322, "learning_rate": 0.00021994316833017443, "loss": 0.9331, "step": 9915 }, { "epoch": 1.2, "grad_norm": 0.25558674335479736, "learning_rate": 0.00021987033192416743, "loss": 1.0044, "step": 9920 }, { "epoch": 1.2, "grad_norm": 0.2470509111881256, "learning_rate": 0.00021977925682426118, "loss": 0.8667, "step": 9925 }, { "epoch": 1.2, "grad_norm": 0.22508101165294647, "learning_rate": 0.00021968814888391935, "loss": 0.9218, "step": 9930 }, { "epoch": 1.2, "grad_norm": 0.22299574315547943, "learning_rate": 0.00021959700814602035, "loss": 0.9041, "step": 9935 }, { "epoch": 1.2, "grad_norm": 0.25101420283317566, "learning_rate": 0.0002195058346534581, "loss": 0.8797, "step": 9940 }, { "epoch": 1.2, "grad_norm": 0.23478543758392334, "learning_rate": 0.00021941462844914182, "loss": 0.9141, "step": 9945 }, { "epoch": 1.2, "grad_norm": 0.25883424282073975, "learning_rate": 0.00021932338957599625, "loss": 0.9015, "step": 9950 }, { "epoch": 1.2, "grad_norm": 0.22861811518669128, "learning_rate": 0.00021923211807696133, "loss": 0.8923, "step": 9955 }, { "epoch": 1.2, "grad_norm": 0.24520093202590942, "learning_rate": 0.00021914081399499258, "loss": 0.8958, "step": 9960 }, { "epoch": 1.2, "grad_norm": 0.23438018560409546, "learning_rate": 0.00021904947737306065, "loss": 1.023, "step": 9965 }, { "epoch": 1.2, "grad_norm": 0.24439287185668945, "learning_rate": 0.0002189581082541516, "loss": 0.9076, "step": 9970 }, { "epoch": 1.2, "grad_norm": 0.23474149405956268, "learning_rate": 0.00021886670668126674, "loss": 0.8026, "step": 9975 }, { "epoch": 1.2, "grad_norm": 0.25041329860687256, "learning_rate": 0.00021877527269742277, "loss": 0.9282, "step": 9980 }, { "epoch": 1.2, "grad_norm": 0.2247781604528427, "learning_rate": 0.00021868380634565147, "loss": 0.907, "step": 9985 }, { "epoch": 1.2, "grad_norm": 0.22819451987743378, "learning_rate": 0.00021859230766900004, "loss": 0.8245, "step": 9990 }, { "epoch": 1.2, "grad_norm": 0.2385624200105667, "learning_rate": 0.00021850077671053072, "loss": 0.9177, "step": 9995 }, { "epoch": 1.2, "grad_norm": 0.21760748326778412, "learning_rate": 0.00021840921351332107, "loss": 0.8566, "step": 10000 }, { "epoch": 1.21, "grad_norm": 0.22546535730361938, "learning_rate": 0.00021831761812046363, "loss": 0.8671, "step": 10005 }, { "epoch": 1.21, "grad_norm": 0.2704804539680481, "learning_rate": 0.00021822599057506648, "loss": 0.8348, "step": 10010 }, { "epoch": 1.21, "grad_norm": 0.2751447558403015, "learning_rate": 0.00021813433092025236, "loss": 0.9228, "step": 10015 }, { "epoch": 1.21, "grad_norm": 0.241837278008461, "learning_rate": 0.00021804263919915947, "loss": 0.9471, "step": 10020 }, { "epoch": 1.21, "grad_norm": 0.24710319936275482, "learning_rate": 0.00021795091545494092, "loss": 0.959, "step": 10025 }, { "epoch": 1.21, "grad_norm": 0.2651970684528351, "learning_rate": 0.000217859159730765, "loss": 0.8134, "step": 10030 }, { "epoch": 1.21, "grad_norm": 0.2539055347442627, "learning_rate": 0.00021776737206981498, "loss": 0.9223, "step": 10035 }, { "epoch": 1.21, "grad_norm": 0.31177031993865967, "learning_rate": 0.0002176755525152892, "loss": 0.9064, "step": 10040 }, { "epoch": 1.21, "grad_norm": 0.22030460834503174, "learning_rate": 0.00021758370111040094, "loss": 0.8532, "step": 10045 }, { "epoch": 1.21, "grad_norm": 0.2701607346534729, "learning_rate": 0.00021749181789837858, "loss": 0.8357, "step": 10050 }, { "epoch": 1.21, "grad_norm": 0.24372340738773346, "learning_rate": 0.00021739990292246535, "loss": 0.929, "step": 10055 }, { "epoch": 1.21, "grad_norm": 0.26125940680503845, "learning_rate": 0.00021730795622591952, "loss": 0.9623, "step": 10060 }, { "epoch": 1.21, "grad_norm": 0.23417995870113373, "learning_rate": 0.00021721597785201427, "loss": 0.7909, "step": 10065 }, { "epoch": 1.21, "grad_norm": 0.22926466166973114, "learning_rate": 0.00021712396784403772, "loss": 0.8785, "step": 10070 }, { "epoch": 1.21, "grad_norm": 0.24475359916687012, "learning_rate": 0.00021703192624529272, "loss": 0.902, "step": 10075 }, { "epoch": 1.21, "grad_norm": 0.2308359593153, "learning_rate": 0.0002169398530990972, "loss": 0.9083, "step": 10080 }, { "epoch": 1.22, "grad_norm": 0.27559104561805725, "learning_rate": 0.00021684774844878376, "loss": 0.8948, "step": 10085 }, { "epoch": 1.22, "grad_norm": 0.22297874093055725, "learning_rate": 0.0002167556123377, "loss": 0.8743, "step": 10090 }, { "epoch": 1.22, "grad_norm": 0.25241953134536743, "learning_rate": 0.00021666344480920818, "loss": 0.8831, "step": 10095 }, { "epoch": 1.22, "grad_norm": 0.2630932033061981, "learning_rate": 0.0002165712459066854, "loss": 0.8839, "step": 10100 }, { "epoch": 1.22, "grad_norm": 0.24986648559570312, "learning_rate": 0.00021647901567352357, "loss": 0.9109, "step": 10105 }, { "epoch": 1.22, "grad_norm": 0.24380378425121307, "learning_rate": 0.00021638675415312924, "loss": 0.9033, "step": 10110 }, { "epoch": 1.22, "grad_norm": 0.22641584277153015, "learning_rate": 0.00021629446138892377, "loss": 0.9176, "step": 10115 }, { "epoch": 1.22, "grad_norm": 0.2435508668422699, "learning_rate": 0.0002162021374243432, "loss": 0.8718, "step": 10120 }, { "epoch": 1.22, "grad_norm": 0.25140294432640076, "learning_rate": 0.00021610978230283823, "loss": 0.9289, "step": 10125 }, { "epoch": 1.22, "grad_norm": 0.23567943274974823, "learning_rate": 0.0002160173960678743, "loss": 0.8465, "step": 10130 }, { "epoch": 1.22, "grad_norm": 0.2589253783226013, "learning_rate": 0.00021592497876293137, "loss": 0.9386, "step": 10135 }, { "epoch": 1.22, "grad_norm": 0.2508923411369324, "learning_rate": 0.0002158325304315042, "loss": 0.8488, "step": 10140 }, { "epoch": 1.22, "grad_norm": 0.2547130584716797, "learning_rate": 0.00021574005111710192, "loss": 0.8462, "step": 10145 }, { "epoch": 1.22, "grad_norm": 0.22627414762973785, "learning_rate": 0.00021564754086324844, "loss": 0.8633, "step": 10150 }, { "epoch": 1.22, "grad_norm": 0.21264758706092834, "learning_rate": 0.00021555499971348215, "loss": 0.8839, "step": 10155 }, { "epoch": 1.22, "grad_norm": 0.21670518815517426, "learning_rate": 0.00021546242771135597, "loss": 0.8753, "step": 10160 }, { "epoch": 1.22, "grad_norm": 0.21713611483573914, "learning_rate": 0.0002153698249004374, "loss": 0.838, "step": 10165 }, { "epoch": 1.23, "grad_norm": 0.20740459859371185, "learning_rate": 0.00021527719132430833, "loss": 0.9157, "step": 10170 }, { "epoch": 1.23, "grad_norm": 0.23185713589191437, "learning_rate": 0.00021518452702656528, "loss": 0.8678, "step": 10175 }, { "epoch": 1.23, "grad_norm": 0.24864669144153595, "learning_rate": 0.00021509183205081905, "loss": 0.8134, "step": 10180 }, { "epoch": 1.23, "grad_norm": 0.21126706898212433, "learning_rate": 0.00021499910644069502, "loss": 0.8509, "step": 10185 }, { "epoch": 1.23, "grad_norm": 0.21727898716926575, "learning_rate": 0.000214906350239833, "loss": 0.8654, "step": 10190 }, { "epoch": 1.23, "grad_norm": 0.2389363795518875, "learning_rate": 0.00021481356349188705, "loss": 0.8315, "step": 10195 }, { "epoch": 1.23, "grad_norm": 0.22045078873634338, "learning_rate": 0.00021472074624052573, "loss": 0.851, "step": 10200 }, { "epoch": 1.23, "grad_norm": 0.22910571098327637, "learning_rate": 0.0002146278985294319, "loss": 0.9826, "step": 10205 }, { "epoch": 1.23, "grad_norm": 0.2421579509973526, "learning_rate": 0.0002145350204023028, "loss": 0.8577, "step": 10210 }, { "epoch": 1.23, "grad_norm": 0.2479030340909958, "learning_rate": 0.00021444211190285001, "loss": 0.9059, "step": 10215 }, { "epoch": 1.23, "grad_norm": 0.22180233895778656, "learning_rate": 0.00021434917307479927, "loss": 0.9248, "step": 10220 }, { "epoch": 1.23, "grad_norm": 0.23422615230083466, "learning_rate": 0.00021425620396189071, "loss": 0.8011, "step": 10225 }, { "epoch": 1.23, "grad_norm": 0.23456484079360962, "learning_rate": 0.0002141632046078787, "loss": 0.8772, "step": 10230 }, { "epoch": 1.23, "grad_norm": 0.24434050917625427, "learning_rate": 0.00021407017505653176, "loss": 0.8983, "step": 10235 }, { "epoch": 1.23, "grad_norm": 0.26233410835266113, "learning_rate": 0.00021397711535163275, "loss": 0.7669, "step": 10240 }, { "epoch": 1.23, "grad_norm": 0.23086436092853546, "learning_rate": 0.00021388402553697863, "loss": 0.85, "step": 10245 }, { "epoch": 1.24, "grad_norm": 0.2340136170387268, "learning_rate": 0.00021379090565638064, "loss": 0.8472, "step": 10250 }, { "epoch": 1.24, "grad_norm": 0.2060890644788742, "learning_rate": 0.00021369775575366397, "loss": 0.791, "step": 10255 }, { "epoch": 1.24, "grad_norm": 0.22005440294742584, "learning_rate": 0.00021360457587266812, "loss": 0.7792, "step": 10260 }, { "epoch": 1.24, "grad_norm": 0.220989391207695, "learning_rate": 0.00021351136605724658, "loss": 0.9087, "step": 10265 }, { "epoch": 1.24, "grad_norm": 0.20939388871192932, "learning_rate": 0.00021341812635126706, "loss": 0.8171, "step": 10270 }, { "epoch": 1.24, "grad_norm": 0.22090451419353485, "learning_rate": 0.00021332485679861123, "loss": 0.831, "step": 10275 }, { "epoch": 1.24, "grad_norm": 0.25128301978111267, "learning_rate": 0.0002132315574431748, "loss": 0.8665, "step": 10280 }, { "epoch": 1.24, "grad_norm": 0.23706404864788055, "learning_rate": 0.00021313822832886762, "loss": 0.8653, "step": 10285 }, { "epoch": 1.24, "grad_norm": 0.21622657775878906, "learning_rate": 0.00021304486949961344, "loss": 0.9074, "step": 10290 }, { "epoch": 1.24, "grad_norm": 0.20798739790916443, "learning_rate": 0.00021295148099935, "loss": 0.9282, "step": 10295 }, { "epoch": 1.24, "grad_norm": 0.226746067404747, "learning_rate": 0.00021285806287202902, "loss": 0.8985, "step": 10300 }, { "epoch": 1.24, "grad_norm": 0.2492750883102417, "learning_rate": 0.00021276461516161622, "loss": 0.8985, "step": 10305 }, { "epoch": 1.24, "grad_norm": 0.252853125333786, "learning_rate": 0.0002126711379120912, "loss": 0.9173, "step": 10310 }, { "epoch": 1.24, "grad_norm": 0.23097646236419678, "learning_rate": 0.00021257763116744744, "loss": 0.9335, "step": 10315 }, { "epoch": 1.24, "grad_norm": 0.25283390283584595, "learning_rate": 0.0002124840949716923, "loss": 0.9115, "step": 10320 }, { "epoch": 1.24, "grad_norm": 0.2866900861263275, "learning_rate": 0.00021239052936884703, "loss": 0.9016, "step": 10325 }, { "epoch": 1.24, "grad_norm": 0.23940923810005188, "learning_rate": 0.0002122969344029467, "loss": 0.8282, "step": 10330 }, { "epoch": 1.25, "grad_norm": 0.26236215233802795, "learning_rate": 0.00021220331011804022, "loss": 0.8244, "step": 10335 }, { "epoch": 1.25, "grad_norm": 0.23621851205825806, "learning_rate": 0.00021210965655819031, "loss": 0.8655, "step": 10340 }, { "epoch": 1.25, "grad_norm": 0.21117086708545685, "learning_rate": 0.00021201597376747344, "loss": 0.9017, "step": 10345 }, { "epoch": 1.25, "grad_norm": 0.2675582468509674, "learning_rate": 0.00021192226178997977, "loss": 0.8432, "step": 10350 }, { "epoch": 1.25, "grad_norm": 0.2619032561779022, "learning_rate": 0.0002118285206698134, "loss": 0.8931, "step": 10355 }, { "epoch": 1.25, "grad_norm": 0.2394731044769287, "learning_rate": 0.0002117347504510919, "loss": 0.8431, "step": 10360 }, { "epoch": 1.25, "grad_norm": 0.22042964398860931, "learning_rate": 0.00021164095117794674, "loss": 0.8538, "step": 10365 }, { "epoch": 1.25, "grad_norm": 0.22833536565303802, "learning_rate": 0.00021154712289452285, "loss": 0.9355, "step": 10370 }, { "epoch": 1.25, "grad_norm": 0.24898435175418854, "learning_rate": 0.00021145326564497903, "loss": 0.9127, "step": 10375 }, { "epoch": 1.25, "grad_norm": 0.2540437579154968, "learning_rate": 0.00021135937947348757, "loss": 0.9235, "step": 10380 }, { "epoch": 1.25, "grad_norm": 0.26430103182792664, "learning_rate": 0.00021126546442423453, "loss": 0.8293, "step": 10385 }, { "epoch": 1.25, "grad_norm": 0.24272888898849487, "learning_rate": 0.0002111715205414193, "loss": 0.9249, "step": 10390 }, { "epoch": 1.25, "grad_norm": 0.2834715247154236, "learning_rate": 0.00021107754786925512, "loss": 0.8638, "step": 10395 }, { "epoch": 1.25, "grad_norm": 0.23184886574745178, "learning_rate": 0.0002109835464519685, "loss": 0.8379, "step": 10400 }, { "epoch": 1.25, "grad_norm": 0.2805626392364502, "learning_rate": 0.00021088951633379982, "loss": 0.9148, "step": 10405 }, { "epoch": 1.25, "grad_norm": 0.2405596673488617, "learning_rate": 0.0002107954575590026, "loss": 0.9326, "step": 10410 }, { "epoch": 1.25, "grad_norm": 0.21941906213760376, "learning_rate": 0.00021070137017184415, "loss": 0.8786, "step": 10415 }, { "epoch": 1.26, "grad_norm": 0.24544595181941986, "learning_rate": 0.000210607254216605, "loss": 0.8619, "step": 10420 }, { "epoch": 1.26, "grad_norm": 0.22791095077991486, "learning_rate": 0.00021051310973757936, "loss": 0.8595, "step": 10425 }, { "epoch": 1.26, "grad_norm": 0.2604474127292633, "learning_rate": 0.00021041893677907473, "loss": 0.8809, "step": 10430 }, { "epoch": 1.26, "grad_norm": 0.22478339076042175, "learning_rate": 0.00021032473538541195, "loss": 0.8569, "step": 10435 }, { "epoch": 1.26, "grad_norm": 0.25225207209587097, "learning_rate": 0.0002102305056009254, "loss": 0.8429, "step": 10440 }, { "epoch": 1.26, "grad_norm": 0.24379763007164001, "learning_rate": 0.00021013624746996272, "loss": 0.8249, "step": 10445 }, { "epoch": 1.26, "grad_norm": 0.22271931171417236, "learning_rate": 0.00021004196103688487, "loss": 0.9641, "step": 10450 }, { "epoch": 1.26, "grad_norm": 0.2445717453956604, "learning_rate": 0.00020994764634606628, "loss": 0.8815, "step": 10455 }, { "epoch": 1.26, "grad_norm": 0.22217918932437897, "learning_rate": 0.0002098533034418945, "loss": 0.8908, "step": 10460 }, { "epoch": 1.26, "grad_norm": 0.24231913685798645, "learning_rate": 0.00020975893236877048, "loss": 0.864, "step": 10465 }, { "epoch": 1.26, "grad_norm": 0.23367081582546234, "learning_rate": 0.0002096645331711083, "loss": 0.8971, "step": 10470 }, { "epoch": 1.26, "grad_norm": 0.2660883963108063, "learning_rate": 0.00020957010589333546, "loss": 0.8435, "step": 10475 }, { "epoch": 1.26, "grad_norm": 0.2160717397928238, "learning_rate": 0.00020947565057989249, "loss": 0.8609, "step": 10480 }, { "epoch": 1.26, "grad_norm": 0.26367098093032837, "learning_rate": 0.00020938116727523324, "loss": 0.9108, "step": 10485 }, { "epoch": 1.26, "grad_norm": 0.23195737600326538, "learning_rate": 0.0002092866560238247, "loss": 0.8359, "step": 10490 }, { "epoch": 1.26, "grad_norm": 0.2410704344511032, "learning_rate": 0.00020919211687014697, "loss": 0.8616, "step": 10495 }, { "epoch": 1.27, "grad_norm": 0.23946736752986908, "learning_rate": 0.00020909754985869335, "loss": 0.7911, "step": 10500 }, { "epoch": 1.27, "grad_norm": 0.23445159196853638, "learning_rate": 0.0002090029550339702, "loss": 0.8216, "step": 10505 }, { "epoch": 1.27, "grad_norm": 0.2318773865699768, "learning_rate": 0.00020890833244049695, "loss": 0.7892, "step": 10510 }, { "epoch": 1.27, "grad_norm": 0.22648349404335022, "learning_rate": 0.0002088136821228062, "loss": 0.9097, "step": 10515 }, { "epoch": 1.27, "grad_norm": 0.23851384222507477, "learning_rate": 0.00020871900412544345, "loss": 0.8725, "step": 10520 }, { "epoch": 1.27, "grad_norm": 0.22477130591869354, "learning_rate": 0.00020862429849296743, "loss": 0.8717, "step": 10525 }, { "epoch": 1.27, "grad_norm": 0.22048017382621765, "learning_rate": 0.00020852956526994963, "loss": 0.8755, "step": 10530 }, { "epoch": 1.27, "grad_norm": 0.2534314692020416, "learning_rate": 0.00020843480450097473, "loss": 0.8789, "step": 10535 }, { "epoch": 1.27, "grad_norm": 0.2241785079240799, "learning_rate": 0.0002083400162306403, "loss": 0.818, "step": 10540 }, { "epoch": 1.27, "grad_norm": 0.25126171112060547, "learning_rate": 0.00020824520050355681, "loss": 0.8867, "step": 10545 }, { "epoch": 1.27, "grad_norm": 0.24423153698444366, "learning_rate": 0.00020815035736434766, "loss": 0.9067, "step": 10550 }, { "epoch": 1.27, "grad_norm": 0.2558138072490692, "learning_rate": 0.00020805548685764923, "loss": 0.8161, "step": 10555 }, { "epoch": 1.27, "grad_norm": 0.23357480764389038, "learning_rate": 0.00020796058902811075, "loss": 0.8864, "step": 10560 }, { "epoch": 1.27, "grad_norm": 0.2539251446723938, "learning_rate": 0.00020786566392039428, "loss": 0.881, "step": 10565 }, { "epoch": 1.27, "grad_norm": 0.2158096432685852, "learning_rate": 0.00020777071157917468, "loss": 0.8784, "step": 10570 }, { "epoch": 1.27, "grad_norm": 0.22970278561115265, "learning_rate": 0.0002076757320491397, "loss": 0.9085, "step": 10575 }, { "epoch": 1.27, "grad_norm": 0.2667364776134491, "learning_rate": 0.00020758072537498985, "loss": 0.896, "step": 10580 }, { "epoch": 1.28, "grad_norm": 0.22878745198249817, "learning_rate": 0.00020748569160143845, "loss": 0.9321, "step": 10585 }, { "epoch": 1.28, "grad_norm": 0.2596031725406647, "learning_rate": 0.00020739063077321155, "loss": 0.9034, "step": 10590 }, { "epoch": 1.28, "grad_norm": 0.23091496527194977, "learning_rate": 0.00020729554293504794, "loss": 0.9254, "step": 10595 }, { "epoch": 1.28, "grad_norm": 0.258047491312027, "learning_rate": 0.00020720042813169906, "loss": 0.7867, "step": 10600 }, { "epoch": 1.28, "grad_norm": 0.23771882057189941, "learning_rate": 0.00020710528640792916, "loss": 0.8768, "step": 10605 }, { "epoch": 1.28, "grad_norm": 0.22498424351215363, "learning_rate": 0.0002070101178085151, "loss": 0.9163, "step": 10610 }, { "epoch": 1.28, "grad_norm": 0.2267744243144989, "learning_rate": 0.0002069149223782463, "loss": 0.872, "step": 10615 }, { "epoch": 1.28, "grad_norm": 0.23287959396839142, "learning_rate": 0.00020681970016192495, "loss": 0.9396, "step": 10620 }, { "epoch": 1.28, "grad_norm": 0.23463228344917297, "learning_rate": 0.00020672445120436582, "loss": 0.9393, "step": 10625 }, { "epoch": 1.28, "grad_norm": 0.24994787573814392, "learning_rate": 0.00020662917555039616, "loss": 0.827, "step": 10630 }, { "epoch": 1.28, "grad_norm": 0.24779953062534332, "learning_rate": 0.00020653387324485588, "loss": 0.9111, "step": 10635 }, { "epoch": 1.28, "grad_norm": 0.27712857723236084, "learning_rate": 0.00020643854433259742, "loss": 0.9207, "step": 10640 }, { "epoch": 1.28, "grad_norm": 0.25297531485557556, "learning_rate": 0.0002063431888584858, "loss": 0.7711, "step": 10645 }, { "epoch": 1.28, "grad_norm": 0.22088488936424255, "learning_rate": 0.0002062478068673983, "loss": 0.8474, "step": 10650 }, { "epoch": 1.28, "grad_norm": 0.2587786018848419, "learning_rate": 0.00020615239840422506, "loss": 0.9111, "step": 10655 }, { "epoch": 1.28, "grad_norm": 0.2736605107784271, "learning_rate": 0.00020605696351386828, "loss": 0.9088, "step": 10660 }, { "epoch": 1.29, "grad_norm": 0.275921493768692, "learning_rate": 0.00020596150224124293, "loss": 0.8737, "step": 10665 }, { "epoch": 1.29, "grad_norm": 0.24181650578975677, "learning_rate": 0.00020586601463127611, "loss": 0.9555, "step": 10670 }, { "epoch": 1.29, "grad_norm": 0.25569936633110046, "learning_rate": 0.00020577050072890764, "loss": 0.9392, "step": 10675 }, { "epoch": 1.29, "grad_norm": 0.22888949513435364, "learning_rate": 0.00020567496057908948, "loss": 0.8406, "step": 10680 }, { "epoch": 1.29, "grad_norm": 0.22984679043293, "learning_rate": 0.0002055793942267859, "loss": 0.7473, "step": 10685 }, { "epoch": 1.29, "grad_norm": 0.21859197318553925, "learning_rate": 0.00020548380171697366, "loss": 0.9393, "step": 10690 }, { "epoch": 1.29, "grad_norm": 0.23983286321163177, "learning_rate": 0.00020538818309464178, "loss": 0.7875, "step": 10695 }, { "epoch": 1.29, "grad_norm": 0.23376217484474182, "learning_rate": 0.00020529253840479155, "loss": 0.9696, "step": 10700 }, { "epoch": 1.29, "grad_norm": 0.2256166785955429, "learning_rate": 0.00020519686769243653, "loss": 0.9457, "step": 10705 }, { "epoch": 1.29, "grad_norm": 0.24345454573631287, "learning_rate": 0.00020510117100260255, "loss": 0.77, "step": 10710 }, { "epoch": 1.29, "grad_norm": 0.2544359862804413, "learning_rate": 0.00020500544838032765, "loss": 0.895, "step": 10715 }, { "epoch": 1.29, "grad_norm": 0.2471429854631424, "learning_rate": 0.00020490969987066207, "loss": 0.9008, "step": 10720 }, { "epoch": 1.29, "grad_norm": 0.22353385388851166, "learning_rate": 0.00020481392551866827, "loss": 0.9069, "step": 10725 }, { "epoch": 1.29, "grad_norm": 0.24593974649906158, "learning_rate": 0.00020471812536942074, "loss": 1.057, "step": 10730 }, { "epoch": 1.29, "grad_norm": 0.2564097046852112, "learning_rate": 0.00020462229946800634, "loss": 0.8346, "step": 10735 }, { "epoch": 1.29, "grad_norm": 0.22804971039295197, "learning_rate": 0.0002045264478595238, "loss": 0.8141, "step": 10740 }, { "epoch": 1.29, "grad_norm": 0.25203844904899597, "learning_rate": 0.00020443057058908417, "loss": 0.8955, "step": 10745 }, { "epoch": 1.3, "grad_norm": 0.24008525907993317, "learning_rate": 0.0002043346677018104, "loss": 0.8148, "step": 10750 }, { "epoch": 1.3, "grad_norm": 0.27215099334716797, "learning_rate": 0.00020423873924283763, "loss": 0.8765, "step": 10755 }, { "epoch": 1.3, "grad_norm": 0.2237570732831955, "learning_rate": 0.000204142785257313, "loss": 0.9221, "step": 10760 }, { "epoch": 1.3, "grad_norm": 0.24160721898078918, "learning_rate": 0.00020404680579039558, "loss": 0.8637, "step": 10765 }, { "epoch": 1.3, "grad_norm": 0.23897403478622437, "learning_rate": 0.0002039508008872565, "loss": 0.7704, "step": 10770 }, { "epoch": 1.3, "grad_norm": 0.23746059834957123, "learning_rate": 0.00020385477059307885, "loss": 0.8945, "step": 10775 }, { "epoch": 1.3, "grad_norm": 0.23488637804985046, "learning_rate": 0.0002037587149530577, "loss": 0.8504, "step": 10780 }, { "epoch": 1.3, "grad_norm": 0.26902300119400024, "learning_rate": 0.00020366263401240005, "loss": 0.8331, "step": 10785 }, { "epoch": 1.3, "grad_norm": 0.25453630089759827, "learning_rate": 0.00020356652781632477, "loss": 0.913, "step": 10790 }, { "epoch": 1.3, "grad_norm": 0.23822472989559174, "learning_rate": 0.00020347039641006257, "loss": 0.8158, "step": 10795 }, { "epoch": 1.3, "grad_norm": 0.25680580735206604, "learning_rate": 0.00020337423983885617, "loss": 0.7857, "step": 10800 }, { "epoch": 1.3, "grad_norm": 0.2617832124233246, "learning_rate": 0.00020327805814795993, "loss": 0.9449, "step": 10805 }, { "epoch": 1.3, "grad_norm": 0.23507827520370483, "learning_rate": 0.00020318185138264026, "loss": 0.9813, "step": 10810 }, { "epoch": 1.3, "grad_norm": 0.2346469610929489, "learning_rate": 0.00020308561958817518, "loss": 0.8555, "step": 10815 }, { "epoch": 1.3, "grad_norm": 0.2674165666103363, "learning_rate": 0.00020298936280985466, "loss": 0.9193, "step": 10820 }, { "epoch": 1.3, "grad_norm": 0.21533791720867157, "learning_rate": 0.00020289308109298028, "loss": 0.8939, "step": 10825 }, { "epoch": 1.3, "grad_norm": 0.23552724719047546, "learning_rate": 0.0002027967744828654, "loss": 0.8654, "step": 10830 }, { "epoch": 1.31, "grad_norm": 0.26331043243408203, "learning_rate": 0.00020270044302483505, "loss": 0.801, "step": 10835 }, { "epoch": 1.31, "grad_norm": 0.240268275141716, "learning_rate": 0.00020260408676422615, "loss": 0.7792, "step": 10840 }, { "epoch": 1.31, "grad_norm": 0.23092767596244812, "learning_rate": 0.00020250770574638708, "loss": 0.9452, "step": 10845 }, { "epoch": 1.31, "grad_norm": 0.2468583583831787, "learning_rate": 0.00020241130001667797, "loss": 0.8753, "step": 10850 }, { "epoch": 1.31, "grad_norm": 0.27682459354400635, "learning_rate": 0.0002023148696204705, "loss": 0.8244, "step": 10855 }, { "epoch": 1.31, "grad_norm": 0.27396732568740845, "learning_rate": 0.00020221841460314814, "loss": 0.8651, "step": 10860 }, { "epoch": 1.31, "grad_norm": 0.24095529317855835, "learning_rate": 0.00020212193501010573, "loss": 0.8664, "step": 10865 }, { "epoch": 1.31, "grad_norm": 0.24401700496673584, "learning_rate": 0.00020202543088674977, "loss": 0.8406, "step": 10870 }, { "epoch": 1.31, "grad_norm": 0.22764812409877777, "learning_rate": 0.0002019289022784983, "loss": 0.9253, "step": 10875 }, { "epoch": 1.31, "grad_norm": 0.2579391598701477, "learning_rate": 0.00020183234923078092, "loss": 0.9783, "step": 10880 }, { "epoch": 1.31, "grad_norm": 0.23062071204185486, "learning_rate": 0.0002017357717890387, "loss": 0.8766, "step": 10885 }, { "epoch": 1.31, "grad_norm": 0.23495079576969147, "learning_rate": 0.00020163916999872418, "loss": 0.8313, "step": 10890 }, { "epoch": 1.31, "grad_norm": 0.2631068527698517, "learning_rate": 0.00020154254390530142, "loss": 0.9304, "step": 10895 }, { "epoch": 1.31, "grad_norm": 0.2535402178764343, "learning_rate": 0.00020144589355424578, "loss": 0.8889, "step": 10900 }, { "epoch": 1.31, "grad_norm": 0.2067861258983612, "learning_rate": 0.00020134921899104416, "loss": 0.8527, "step": 10905 }, { "epoch": 1.31, "grad_norm": 0.2404996007680893, "learning_rate": 0.00020125252026119487, "loss": 0.8413, "step": 10910 }, { "epoch": 1.32, "grad_norm": 0.27104899287223816, "learning_rate": 0.00020115579741020745, "loss": 0.8226, "step": 10915 }, { "epoch": 1.32, "grad_norm": 0.21938621997833252, "learning_rate": 0.000201059050483603, "loss": 0.85, "step": 10920 }, { "epoch": 1.32, "grad_norm": 0.2280419021844864, "learning_rate": 0.00020096227952691366, "loss": 0.8882, "step": 10925 }, { "epoch": 1.32, "grad_norm": 0.23701699078083038, "learning_rate": 0.00020086548458568326, "loss": 0.8571, "step": 10930 }, { "epoch": 1.32, "grad_norm": 0.22839820384979248, "learning_rate": 0.00020076866570546662, "loss": 0.797, "step": 10935 }, { "epoch": 1.32, "grad_norm": 0.23596982657909393, "learning_rate": 0.00020067182293182994, "loss": 0.9124, "step": 10940 }, { "epoch": 1.32, "grad_norm": 0.22004413604736328, "learning_rate": 0.00020057495631035056, "loss": 0.9069, "step": 10945 }, { "epoch": 1.32, "grad_norm": 0.23677165806293488, "learning_rate": 0.00020047806588661726, "loss": 0.8417, "step": 10950 }, { "epoch": 1.32, "grad_norm": 0.2435624748468399, "learning_rate": 0.00020038115170622982, "loss": 0.7833, "step": 10955 }, { "epoch": 1.32, "grad_norm": 0.2496495395898819, "learning_rate": 0.00020028421381479926, "loss": 0.8015, "step": 10960 }, { "epoch": 1.32, "grad_norm": 0.23711077868938446, "learning_rate": 0.0002001872522579478, "loss": 0.8105, "step": 10965 }, { "epoch": 1.32, "grad_norm": 0.23245902359485626, "learning_rate": 0.0002000902670813088, "loss": 0.973, "step": 10970 }, { "epoch": 1.32, "grad_norm": 0.23252607882022858, "learning_rate": 0.0001999932583305266, "loss": 0.8141, "step": 10975 }, { "epoch": 1.32, "grad_norm": 0.22429288923740387, "learning_rate": 0.00019989622605125684, "loss": 0.9489, "step": 10980 }, { "epoch": 1.32, "grad_norm": 0.2248774915933609, "learning_rate": 0.00019979917028916606, "loss": 0.9385, "step": 10985 }, { "epoch": 1.32, "grad_norm": 0.2719941735267639, "learning_rate": 0.00019970209108993202, "loss": 0.8649, "step": 10990 }, { "epoch": 1.32, "grad_norm": 0.2678004503250122, "learning_rate": 0.0001996049884992433, "loss": 0.8, "step": 10995 }, { "epoch": 1.33, "grad_norm": 0.2112882286310196, "learning_rate": 0.0001995078625627997, "loss": 0.8478, "step": 11000 }, { "epoch": 1.33, "grad_norm": 0.2869125306606293, "learning_rate": 0.00019941071332631188, "loss": 0.8296, "step": 11005 }, { "epoch": 1.33, "grad_norm": 0.21323542296886444, "learning_rate": 0.00019931354083550147, "loss": 0.8881, "step": 11010 }, { "epoch": 1.33, "grad_norm": 0.23022828996181488, "learning_rate": 0.0001992163451361011, "loss": 0.8604, "step": 11015 }, { "epoch": 1.33, "grad_norm": 0.23515096306800842, "learning_rate": 0.00019911912627385426, "loss": 0.9111, "step": 11020 }, { "epoch": 1.33, "grad_norm": 0.2547999918460846, "learning_rate": 0.00019902188429451542, "loss": 0.881, "step": 11025 }, { "epoch": 1.33, "grad_norm": 0.28279218077659607, "learning_rate": 0.0001989246192438499, "loss": 0.908, "step": 11030 }, { "epoch": 1.33, "grad_norm": 0.24193325638771057, "learning_rate": 0.00019882733116763376, "loss": 0.8769, "step": 11035 }, { "epoch": 1.33, "grad_norm": 0.2332753986120224, "learning_rate": 0.00019873002011165409, "loss": 0.803, "step": 11040 }, { "epoch": 1.33, "grad_norm": 0.24493838846683502, "learning_rate": 0.00019863268612170873, "loss": 0.825, "step": 11045 }, { "epoch": 1.33, "grad_norm": 0.22939081490039825, "learning_rate": 0.00019853532924360618, "loss": 0.9395, "step": 11050 }, { "epoch": 1.33, "grad_norm": 0.24580281972885132, "learning_rate": 0.0001984379495231659, "loss": 0.8964, "step": 11055 }, { "epoch": 1.33, "grad_norm": 0.22950120270252228, "learning_rate": 0.00019834054700621802, "loss": 0.8883, "step": 11060 }, { "epoch": 1.33, "grad_norm": 0.2450743019580841, "learning_rate": 0.00019824312173860332, "loss": 0.8721, "step": 11065 }, { "epoch": 1.33, "grad_norm": 0.2725335955619812, "learning_rate": 0.00019814567376617346, "loss": 0.8063, "step": 11070 }, { "epoch": 1.33, "grad_norm": 0.27576860785484314, "learning_rate": 0.00019804820313479066, "loss": 0.9648, "step": 11075 }, { "epoch": 1.34, "grad_norm": 0.23889948427677155, "learning_rate": 0.00019795070989032788, "loss": 0.8958, "step": 11080 }, { "epoch": 1.34, "grad_norm": 0.25442391633987427, "learning_rate": 0.00019785319407866853, "loss": 0.838, "step": 11085 }, { "epoch": 1.34, "grad_norm": 0.26583176851272583, "learning_rate": 0.00019775565574570698, "loss": 0.8578, "step": 11090 }, { "epoch": 1.34, "grad_norm": 0.25874045491218567, "learning_rate": 0.00019765809493734786, "loss": 0.8606, "step": 11095 }, { "epoch": 1.34, "grad_norm": 0.24192367494106293, "learning_rate": 0.00019756051169950663, "loss": 0.7974, "step": 11100 }, { "epoch": 1.34, "grad_norm": 0.23584434390068054, "learning_rate": 0.0001974629060781091, "loss": 0.8465, "step": 11105 }, { "epoch": 1.34, "grad_norm": 0.28704845905303955, "learning_rate": 0.00019736527811909185, "loss": 0.9114, "step": 11110 }, { "epoch": 1.34, "grad_norm": 0.21719807386398315, "learning_rate": 0.00019726762786840177, "loss": 0.9526, "step": 11115 }, { "epoch": 1.34, "grad_norm": 0.23290511965751648, "learning_rate": 0.00019716995537199624, "loss": 0.8695, "step": 11120 }, { "epoch": 1.34, "grad_norm": 0.26695218682289124, "learning_rate": 0.00019707226067584326, "loss": 0.8904, "step": 11125 }, { "epoch": 1.34, "grad_norm": 0.225587397813797, "learning_rate": 0.0001969745438259212, "loss": 0.7754, "step": 11130 }, { "epoch": 1.34, "grad_norm": 0.2279675006866455, "learning_rate": 0.00019687680486821883, "loss": 0.9336, "step": 11135 }, { "epoch": 1.34, "grad_norm": 0.2307017594575882, "learning_rate": 0.00019677904384873536, "loss": 0.8192, "step": 11140 }, { "epoch": 1.34, "grad_norm": 0.2410012036561966, "learning_rate": 0.00019668126081348036, "loss": 0.8694, "step": 11145 }, { "epoch": 1.34, "grad_norm": 0.26555535197257996, "learning_rate": 0.00019658345580847382, "loss": 0.8598, "step": 11150 }, { "epoch": 1.34, "grad_norm": 0.225142240524292, "learning_rate": 0.00019648562887974598, "loss": 0.7988, "step": 11155 }, { "epoch": 1.34, "grad_norm": 0.2711564600467682, "learning_rate": 0.00019638778007333743, "loss": 0.8018, "step": 11160 }, { "epoch": 1.35, "grad_norm": 0.2621377408504486, "learning_rate": 0.00019628990943529909, "loss": 0.8682, "step": 11165 }, { "epoch": 1.35, "grad_norm": 0.23893457651138306, "learning_rate": 0.00019619201701169217, "loss": 0.8165, "step": 11170 }, { "epoch": 1.35, "grad_norm": 0.2528732419013977, "learning_rate": 0.00019609410284858797, "loss": 0.8636, "step": 11175 }, { "epoch": 1.35, "grad_norm": 0.24171330034732819, "learning_rate": 0.0001959961669920683, "loss": 0.9089, "step": 11180 }, { "epoch": 1.35, "grad_norm": 0.24632695317268372, "learning_rate": 0.00019589820948822493, "loss": 0.92, "step": 11185 }, { "epoch": 1.35, "grad_norm": 0.2687559723854065, "learning_rate": 0.00019580023038316, "loss": 0.8794, "step": 11190 }, { "epoch": 1.35, "grad_norm": 0.2685549259185791, "learning_rate": 0.0001957022297229856, "loss": 0.8629, "step": 11195 }, { "epoch": 1.35, "grad_norm": 0.23067381978034973, "learning_rate": 0.00019560420755382416, "loss": 0.836, "step": 11200 }, { "epoch": 1.35, "grad_norm": 0.2574182450771332, "learning_rate": 0.0001955061639218082, "loss": 0.8133, "step": 11205 }, { "epoch": 1.35, "grad_norm": 0.2490427941083908, "learning_rate": 0.00019540809887308032, "loss": 0.7957, "step": 11210 }, { "epoch": 1.35, "grad_norm": 0.24531954526901245, "learning_rate": 0.00019531001245379312, "loss": 0.9497, "step": 11215 }, { "epoch": 1.35, "grad_norm": 0.2284311205148697, "learning_rate": 0.00019521190471010936, "loss": 0.8048, "step": 11220 }, { "epoch": 1.35, "grad_norm": 0.23735016584396362, "learning_rate": 0.00019511377568820184, "loss": 0.7849, "step": 11225 }, { "epoch": 1.35, "grad_norm": 0.24056896567344666, "learning_rate": 0.00019501562543425329, "loss": 0.8545, "step": 11230 }, { "epoch": 1.35, "grad_norm": 0.2434719055891037, "learning_rate": 0.00019491745399445644, "loss": 0.8467, "step": 11235 }, { "epoch": 1.35, "grad_norm": 0.24002546072006226, "learning_rate": 0.0001948192614150141, "loss": 0.8546, "step": 11240 }, { "epoch": 1.35, "grad_norm": 0.2705593705177307, "learning_rate": 0.00019472104774213893, "loss": 0.9254, "step": 11245 }, { "epoch": 1.36, "grad_norm": 0.2447061389684677, "learning_rate": 0.00019462281302205355, "loss": 0.946, "step": 11250 }, { "epoch": 1.36, "grad_norm": 0.24118636548519135, "learning_rate": 0.0001945245573009905, "loss": 0.8431, "step": 11255 }, { "epoch": 1.36, "grad_norm": 0.22377237677574158, "learning_rate": 0.0001944262806251922, "loss": 0.9679, "step": 11260 }, { "epoch": 1.36, "grad_norm": 0.24068385362625122, "learning_rate": 0.00019432798304091085, "loss": 0.928, "step": 11265 }, { "epoch": 1.36, "grad_norm": 0.2337529957294464, "learning_rate": 0.0001942296645944086, "loss": 0.9368, "step": 11270 }, { "epoch": 1.36, "grad_norm": 0.23204030096530914, "learning_rate": 0.00019413132533195737, "loss": 0.9575, "step": 11275 }, { "epoch": 1.36, "grad_norm": 0.23348142206668854, "learning_rate": 0.00019403296529983888, "loss": 0.8327, "step": 11280 }, { "epoch": 1.36, "grad_norm": 0.22873573005199432, "learning_rate": 0.00019393458454434464, "loss": 0.7914, "step": 11285 }, { "epoch": 1.36, "grad_norm": 0.2327519953250885, "learning_rate": 0.00019383618311177587, "loss": 0.8401, "step": 11290 }, { "epoch": 1.36, "grad_norm": 0.2520465850830078, "learning_rate": 0.00019373776104844362, "loss": 0.8922, "step": 11295 }, { "epoch": 1.36, "grad_norm": 0.2645134925842285, "learning_rate": 0.00019363931840066847, "loss": 0.8806, "step": 11300 }, { "epoch": 1.36, "grad_norm": 0.2500244677066803, "learning_rate": 0.00019354085521478088, "loss": 0.9306, "step": 11305 }, { "epoch": 1.36, "grad_norm": 0.2344416230916977, "learning_rate": 0.0001934423715371209, "loss": 0.9232, "step": 11310 }, { "epoch": 1.36, "grad_norm": 0.2218599170446396, "learning_rate": 0.0001933438674140382, "loss": 0.8308, "step": 11315 }, { "epoch": 1.36, "grad_norm": 0.25029993057250977, "learning_rate": 0.00019324534289189203, "loss": 0.861, "step": 11320 }, { "epoch": 1.36, "grad_norm": 0.25338882207870483, "learning_rate": 0.00019314679801705144, "loss": 0.8986, "step": 11325 }, { "epoch": 1.37, "grad_norm": 0.22876478731632233, "learning_rate": 0.00019304823283589482, "loss": 1.0461, "step": 11330 }, { "epoch": 1.37, "grad_norm": 0.2699691355228424, "learning_rate": 0.00019294964739481024, "loss": 0.8773, "step": 11335 }, { "epoch": 1.37, "grad_norm": 0.23028436303138733, "learning_rate": 0.00019285104174019527, "loss": 0.9431, "step": 11340 }, { "epoch": 1.37, "grad_norm": 0.21594227850437164, "learning_rate": 0.00019275241591845704, "loss": 0.904, "step": 11345 }, { "epoch": 1.37, "grad_norm": 0.25375184416770935, "learning_rate": 0.00019265376997601205, "loss": 0.9063, "step": 11350 }, { "epoch": 1.37, "grad_norm": 0.24144120514392853, "learning_rate": 0.0001925551039592865, "loss": 0.8952, "step": 11355 }, { "epoch": 1.37, "grad_norm": 0.23259896039962769, "learning_rate": 0.00019245641791471577, "loss": 0.9056, "step": 11360 }, { "epoch": 1.37, "grad_norm": 0.26159223914146423, "learning_rate": 0.00019235771188874485, "loss": 0.8856, "step": 11365 }, { "epoch": 1.37, "grad_norm": 0.22818419337272644, "learning_rate": 0.0001922589859278281, "loss": 0.8179, "step": 11370 }, { "epoch": 1.37, "grad_norm": 0.24243588745594025, "learning_rate": 0.00019216024007842915, "loss": 0.8159, "step": 11375 }, { "epoch": 1.37, "grad_norm": 0.22796067595481873, "learning_rate": 0.00019206147438702108, "loss": 0.9193, "step": 11380 }, { "epoch": 1.37, "grad_norm": 0.25152865052223206, "learning_rate": 0.00019196268890008642, "loss": 0.938, "step": 11385 }, { "epoch": 1.37, "grad_norm": 0.22121083736419678, "learning_rate": 0.0001918638836641168, "loss": 0.82, "step": 11390 }, { "epoch": 1.37, "grad_norm": 0.22968332469463348, "learning_rate": 0.00019176505872561326, "loss": 0.9606, "step": 11395 }, { "epoch": 1.37, "grad_norm": 0.2671717405319214, "learning_rate": 0.00019166621413108613, "loss": 0.9147, "step": 11400 }, { "epoch": 1.37, "grad_norm": 0.24766571819782257, "learning_rate": 0.00019156734992705496, "loss": 0.8639, "step": 11405 }, { "epoch": 1.37, "grad_norm": 0.244247704744339, "learning_rate": 0.00019146846616004842, "loss": 0.7505, "step": 11410 }, { "epoch": 1.38, "grad_norm": 0.23957106471061707, "learning_rate": 0.00019136956287660464, "loss": 0.8708, "step": 11415 }, { "epoch": 1.38, "grad_norm": 0.2419036477804184, "learning_rate": 0.00019127064012327072, "loss": 0.9644, "step": 11420 }, { "epoch": 1.38, "grad_norm": 0.2512461245059967, "learning_rate": 0.000191171697946603, "loss": 0.8277, "step": 11425 }, { "epoch": 1.38, "grad_norm": 0.24636131525039673, "learning_rate": 0.00019107273639316696, "loss": 0.877, "step": 11430 }, { "epoch": 1.38, "grad_norm": 0.21014370024204254, "learning_rate": 0.0001909737555095372, "loss": 0.8611, "step": 11435 }, { "epoch": 1.38, "grad_norm": 0.2295524775981903, "learning_rate": 0.0001908747553422974, "loss": 0.9243, "step": 11440 }, { "epoch": 1.38, "grad_norm": 0.24215233325958252, "learning_rate": 0.0001907757359380403, "loss": 0.9948, "step": 11445 }, { "epoch": 1.38, "grad_norm": 0.2857617735862732, "learning_rate": 0.00019067669734336776, "loss": 0.8859, "step": 11450 }, { "epoch": 1.38, "grad_norm": 0.25199002027511597, "learning_rate": 0.00019057763960489063, "loss": 0.9734, "step": 11455 }, { "epoch": 1.38, "grad_norm": 0.2548152506351471, "learning_rate": 0.00019047856276922873, "loss": 0.8183, "step": 11460 }, { "epoch": 1.38, "grad_norm": 0.23145854473114014, "learning_rate": 0.00019037946688301097, "loss": 0.9237, "step": 11465 }, { "epoch": 1.38, "grad_norm": 0.24691297113895416, "learning_rate": 0.00019028035199287512, "loss": 0.8724, "step": 11470 }, { "epoch": 1.38, "grad_norm": 0.2465231567621231, "learning_rate": 0.00019018121814546799, "loss": 0.8638, "step": 11475 }, { "epoch": 1.38, "grad_norm": 0.2617138922214508, "learning_rate": 0.00019008206538744516, "loss": 0.9152, "step": 11480 }, { "epoch": 1.38, "grad_norm": 0.23858489096164703, "learning_rate": 0.00018998289376547135, "loss": 0.7177, "step": 11485 }, { "epoch": 1.38, "grad_norm": 0.2825676202774048, "learning_rate": 0.00018988370332621987, "loss": 0.9155, "step": 11490 }, { "epoch": 1.39, "grad_norm": 0.21846085786819458, "learning_rate": 0.00018978449411637317, "loss": 0.8946, "step": 11495 }, { "epoch": 1.39, "grad_norm": 0.23341843485832214, "learning_rate": 0.00018968526618262226, "loss": 0.833, "step": 11500 }, { "epoch": 1.39, "grad_norm": 0.24220559000968933, "learning_rate": 0.00018958601957166725, "loss": 0.8047, "step": 11505 }, { "epoch": 1.39, "grad_norm": 0.2510910630226135, "learning_rate": 0.0001894867543302168, "loss": 0.8861, "step": 11510 }, { "epoch": 1.39, "grad_norm": 0.24948173761367798, "learning_rate": 0.00018938747050498847, "loss": 0.8644, "step": 11515 }, { "epoch": 1.39, "grad_norm": 0.25031885504722595, "learning_rate": 0.00018928816814270844, "loss": 0.8713, "step": 11520 }, { "epoch": 1.39, "grad_norm": 0.2391744703054428, "learning_rate": 0.0001891888472901118, "loss": 0.8059, "step": 11525 }, { "epoch": 1.39, "grad_norm": 0.26208725571632385, "learning_rate": 0.0001890895079939422, "loss": 0.8532, "step": 11530 }, { "epoch": 1.39, "grad_norm": 0.2613973021507263, "learning_rate": 0.00018899015030095201, "loss": 0.8387, "step": 11535 }, { "epoch": 1.39, "grad_norm": 0.2578382194042206, "learning_rate": 0.00018889077425790225, "loss": 0.9407, "step": 11540 }, { "epoch": 1.39, "grad_norm": 0.22165289521217346, "learning_rate": 0.00018879137991156263, "loss": 0.817, "step": 11545 }, { "epoch": 1.39, "grad_norm": 0.2351749688386917, "learning_rate": 0.0001886919673087114, "loss": 0.8507, "step": 11550 }, { "epoch": 1.39, "grad_norm": 0.2208326905965805, "learning_rate": 0.00018859253649613545, "loss": 0.7727, "step": 11555 }, { "epoch": 1.39, "grad_norm": 0.21380893886089325, "learning_rate": 0.00018849308752063017, "loss": 0.829, "step": 11560 }, { "epoch": 1.39, "grad_norm": 0.25803694128990173, "learning_rate": 0.00018839362042899962, "loss": 0.8363, "step": 11565 }, { "epoch": 1.39, "grad_norm": 0.28940629959106445, "learning_rate": 0.0001882941352680562, "loss": 0.9299, "step": 11570 }, { "epoch": 1.39, "grad_norm": 0.2648991346359253, "learning_rate": 0.00018819463208462112, "loss": 0.8416, "step": 11575 }, { "epoch": 1.4, "grad_norm": 0.22129850089550018, "learning_rate": 0.00018809511092552372, "loss": 0.8448, "step": 11580 }, { "epoch": 1.4, "grad_norm": 0.26116523146629333, "learning_rate": 0.00018799557183760206, "loss": 0.9321, "step": 11585 }, { "epoch": 1.4, "grad_norm": 0.2547585964202881, "learning_rate": 0.00018789601486770245, "loss": 0.8947, "step": 11590 }, { "epoch": 1.4, "grad_norm": 0.28379014134407043, "learning_rate": 0.0001877964400626798, "loss": 0.8113, "step": 11595 }, { "epoch": 1.4, "grad_norm": 0.21314746141433716, "learning_rate": 0.00018769684746939723, "loss": 0.89, "step": 11600 }, { "epoch": 1.4, "grad_norm": 0.2194174975156784, "learning_rate": 0.00018759723713472642, "loss": 0.8691, "step": 11605 }, { "epoch": 1.4, "grad_norm": 0.2298017144203186, "learning_rate": 0.00018749760910554715, "loss": 0.8259, "step": 11610 }, { "epoch": 1.4, "grad_norm": 0.2847272455692291, "learning_rate": 0.0001873979634287479, "loss": 0.8773, "step": 11615 }, { "epoch": 1.4, "grad_norm": 0.23975825309753418, "learning_rate": 0.00018729830015122508, "loss": 0.9002, "step": 11620 }, { "epoch": 1.4, "grad_norm": 0.22761796414852142, "learning_rate": 0.00018719861931988354, "loss": 0.8456, "step": 11625 }, { "epoch": 1.4, "grad_norm": 0.24914436042308807, "learning_rate": 0.0001870989209816364, "loss": 0.8834, "step": 11630 }, { "epoch": 1.4, "grad_norm": 0.2555813491344452, "learning_rate": 0.0001869992051834051, "loss": 0.7416, "step": 11635 }, { "epoch": 1.4, "grad_norm": 0.2689943015575409, "learning_rate": 0.00018689947197211908, "loss": 0.85, "step": 11640 }, { "epoch": 1.4, "grad_norm": 0.21290338039398193, "learning_rate": 0.00018679972139471615, "loss": 0.9248, "step": 11645 }, { "epoch": 1.4, "grad_norm": 0.21777038276195526, "learning_rate": 0.00018669995349814228, "loss": 0.9654, "step": 11650 }, { "epoch": 1.4, "grad_norm": 0.2189740687608719, "learning_rate": 0.0001866001683293515, "loss": 0.8877, "step": 11655 }, { "epoch": 1.4, "grad_norm": 0.2227373570203781, "learning_rate": 0.00018650036593530607, "loss": 0.9332, "step": 11660 }, { "epoch": 1.41, "grad_norm": 0.2457219511270523, "learning_rate": 0.00018640054636297632, "loss": 0.7944, "step": 11665 }, { "epoch": 1.41, "grad_norm": 0.2539142072200775, "learning_rate": 0.00018630070965934057, "loss": 0.7875, "step": 11670 }, { "epoch": 1.41, "grad_norm": 0.23336252570152283, "learning_rate": 0.0001862008558713854, "loss": 0.7474, "step": 11675 }, { "epoch": 1.41, "grad_norm": 0.33268702030181885, "learning_rate": 0.00018610098504610523, "loss": 0.944, "step": 11680 }, { "epoch": 1.41, "grad_norm": 0.23532938957214355, "learning_rate": 0.0001860010972305026, "loss": 0.998, "step": 11685 }, { "epoch": 1.41, "grad_norm": 0.24364040791988373, "learning_rate": 0.0001859011924715881, "loss": 0.8007, "step": 11690 }, { "epoch": 1.41, "grad_norm": 0.26851868629455566, "learning_rate": 0.00018580127081638018, "loss": 0.8973, "step": 11695 }, { "epoch": 1.41, "grad_norm": 0.27191677689552307, "learning_rate": 0.00018570133231190524, "loss": 0.9319, "step": 11700 }, { "epoch": 1.41, "grad_norm": 0.24935273826122284, "learning_rate": 0.00018560137700519776, "loss": 0.8381, "step": 11705 }, { "epoch": 1.41, "grad_norm": 0.19971820712089539, "learning_rate": 0.00018550140494329995, "loss": 0.9152, "step": 11710 }, { "epoch": 1.41, "grad_norm": 0.2567872703075409, "learning_rate": 0.000185401416173262, "loss": 0.8468, "step": 11715 }, { "epoch": 1.41, "grad_norm": 0.2599542438983917, "learning_rate": 0.00018530141074214195, "loss": 0.8679, "step": 11720 }, { "epoch": 1.41, "grad_norm": 0.2495017796754837, "learning_rate": 0.00018520138869700573, "loss": 0.8317, "step": 11725 }, { "epoch": 1.41, "grad_norm": 0.25889188051223755, "learning_rate": 0.00018510135008492696, "loss": 0.9075, "step": 11730 }, { "epoch": 1.41, "grad_norm": 0.24831044673919678, "learning_rate": 0.00018500129495298718, "loss": 0.8721, "step": 11735 }, { "epoch": 1.41, "grad_norm": 0.23168979585170746, "learning_rate": 0.0001849012233482756, "loss": 0.9592, "step": 11740 }, { "epoch": 1.42, "grad_norm": 0.27312368154525757, "learning_rate": 0.0001848011353178893, "loss": 0.8826, "step": 11745 }, { "epoch": 1.42, "grad_norm": 0.23228977620601654, "learning_rate": 0.00018470103090893297, "loss": 0.8053, "step": 11750 }, { "epoch": 1.42, "grad_norm": 0.23857393860816956, "learning_rate": 0.00018460091016851915, "loss": 0.8638, "step": 11755 }, { "epoch": 1.42, "grad_norm": 0.24902932345867157, "learning_rate": 0.00018450077314376793, "loss": 0.8719, "step": 11760 }, { "epoch": 1.42, "grad_norm": 0.29293930530548096, "learning_rate": 0.0001844006198818072, "loss": 0.9337, "step": 11765 }, { "epoch": 1.42, "grad_norm": 0.2380332350730896, "learning_rate": 0.00018430045042977224, "loss": 0.8136, "step": 11770 }, { "epoch": 1.42, "grad_norm": 0.2724967896938324, "learning_rate": 0.0001842002648348063, "loss": 0.9473, "step": 11775 }, { "epoch": 1.42, "grad_norm": 0.24302633106708527, "learning_rate": 0.00018410006314405992, "loss": 0.7744, "step": 11780 }, { "epoch": 1.42, "grad_norm": 0.242751345038414, "learning_rate": 0.0001839998454046914, "loss": 0.8238, "step": 11785 }, { "epoch": 1.42, "grad_norm": 0.23499782383441925, "learning_rate": 0.00018389961166386657, "loss": 0.8673, "step": 11790 }, { "epoch": 1.42, "grad_norm": 0.22136856615543365, "learning_rate": 0.00018379936196875868, "loss": 0.9232, "step": 11795 }, { "epoch": 1.42, "grad_norm": 0.22664988040924072, "learning_rate": 0.00018369909636654867, "loss": 0.8296, "step": 11800 }, { "epoch": 1.42, "grad_norm": 0.2627767026424408, "learning_rate": 0.00018359881490442471, "loss": 0.8778, "step": 11805 }, { "epoch": 1.42, "grad_norm": 0.22755451500415802, "learning_rate": 0.00018349851762958274, "loss": 0.949, "step": 11810 }, { "epoch": 1.42, "grad_norm": 0.2317892462015152, "learning_rate": 0.00018339820458922589, "loss": 0.8828, "step": 11815 }, { "epoch": 1.42, "grad_norm": 0.23136401176452637, "learning_rate": 0.00018329787583056486, "loss": 0.7622, "step": 11820 }, { "epoch": 1.42, "grad_norm": 0.24994751811027527, "learning_rate": 0.00018319753140081765, "loss": 0.8811, "step": 11825 }, { "epoch": 1.43, "grad_norm": 0.2566693425178528, "learning_rate": 0.00018309717134720974, "loss": 0.9607, "step": 11830 }, { "epoch": 1.43, "grad_norm": 0.2577987313270569, "learning_rate": 0.00018299679571697383, "loss": 0.8902, "step": 11835 }, { "epoch": 1.43, "grad_norm": 0.2891867160797119, "learning_rate": 0.00018289640455735012, "loss": 0.8107, "step": 11840 }, { "epoch": 1.43, "grad_norm": 0.24823874235153198, "learning_rate": 0.0001827959979155859, "loss": 0.8493, "step": 11845 }, { "epoch": 1.43, "grad_norm": 0.23691007494926453, "learning_rate": 0.00018269557583893602, "loss": 0.8704, "step": 11850 }, { "epoch": 1.43, "grad_norm": 0.2653874158859253, "learning_rate": 0.00018259513837466228, "loss": 0.8665, "step": 11855 }, { "epoch": 1.43, "grad_norm": 0.3187112510204315, "learning_rate": 0.00018249468557003404, "loss": 0.7894, "step": 11860 }, { "epoch": 1.43, "grad_norm": 0.2333473116159439, "learning_rate": 0.00018239421747232758, "loss": 0.7926, "step": 11865 }, { "epoch": 1.43, "grad_norm": 0.2679731547832489, "learning_rate": 0.0001822937341288267, "loss": 0.848, "step": 11870 }, { "epoch": 1.43, "grad_norm": 0.2512493431568146, "learning_rate": 0.00018219323558682203, "loss": 0.9605, "step": 11875 }, { "epoch": 1.43, "grad_norm": 0.241338312625885, "learning_rate": 0.0001820927218936116, "loss": 0.8209, "step": 11880 }, { "epoch": 1.43, "grad_norm": 0.2403215616941452, "learning_rate": 0.0001819921930965005, "loss": 0.808, "step": 11885 }, { "epoch": 1.43, "grad_norm": 0.24776634573936462, "learning_rate": 0.00018189164924280088, "loss": 0.8508, "step": 11890 }, { "epoch": 1.43, "grad_norm": 0.23758898675441742, "learning_rate": 0.00018179109037983203, "loss": 0.9911, "step": 11895 }, { "epoch": 1.43, "grad_norm": 0.24971432983875275, "learning_rate": 0.00018169051655492034, "loss": 0.8894, "step": 11900 }, { "epoch": 1.43, "grad_norm": 0.22912085056304932, "learning_rate": 0.0001815899278153991, "loss": 0.8912, "step": 11905 }, { "epoch": 1.44, "grad_norm": 0.271433562040329, "learning_rate": 0.0001814893242086088, "loss": 0.8364, "step": 11910 }, { "epoch": 1.44, "grad_norm": 0.2268996387720108, "learning_rate": 0.00018138870578189676, "loss": 0.8419, "step": 11915 }, { "epoch": 1.44, "grad_norm": 0.2744608521461487, "learning_rate": 0.0001812880725826174, "loss": 0.8018, "step": 11920 }, { "epoch": 1.44, "grad_norm": 0.2408338189125061, "learning_rate": 0.00018118742465813206, "loss": 0.84, "step": 11925 }, { "epoch": 1.44, "grad_norm": 0.27505186200141907, "learning_rate": 0.00018108676205580895, "loss": 0.8555, "step": 11930 }, { "epoch": 1.44, "grad_norm": 0.23788927495479584, "learning_rate": 0.00018098608482302328, "loss": 0.8613, "step": 11935 }, { "epoch": 1.44, "grad_norm": 0.2505749762058258, "learning_rate": 0.00018088539300715705, "loss": 0.7991, "step": 11940 }, { "epoch": 1.44, "grad_norm": 0.22999998927116394, "learning_rate": 0.00018078468665559924, "loss": 0.7935, "step": 11945 }, { "epoch": 1.44, "grad_norm": 0.2383863478899002, "learning_rate": 0.00018068396581574553, "loss": 0.878, "step": 11950 }, { "epoch": 1.44, "grad_norm": 0.22946636378765106, "learning_rate": 0.00018058323053499854, "loss": 0.8047, "step": 11955 }, { "epoch": 1.44, "grad_norm": 0.23430262506008148, "learning_rate": 0.0001804824808607676, "loss": 0.8075, "step": 11960 }, { "epoch": 1.44, "grad_norm": 0.2360125333070755, "learning_rate": 0.0001803817168404689, "loss": 0.8581, "step": 11965 }, { "epoch": 1.44, "grad_norm": 0.24007384479045868, "learning_rate": 0.00018028093852152528, "loss": 0.8953, "step": 11970 }, { "epoch": 1.44, "grad_norm": 0.2502374053001404, "learning_rate": 0.00018018014595136644, "loss": 0.8884, "step": 11975 }, { "epoch": 1.44, "grad_norm": 0.24452511966228485, "learning_rate": 0.00018007933917742864, "loss": 1.0623, "step": 11980 }, { "epoch": 1.44, "grad_norm": 0.23154529929161072, "learning_rate": 0.0001799785182471549, "loss": 0.8695, "step": 11985 }, { "epoch": 1.44, "grad_norm": 0.2401302307844162, "learning_rate": 0.00017987768320799495, "loss": 0.9001, "step": 11990 }, { "epoch": 1.45, "grad_norm": 0.37644556164741516, "learning_rate": 0.00017977683410740503, "loss": 0.8526, "step": 11995 }, { "epoch": 1.45, "grad_norm": 0.2637287974357605, "learning_rate": 0.00017967597099284813, "loss": 0.9424, "step": 12000 }, { "epoch": 1.45, "grad_norm": 0.23815138638019562, "learning_rate": 0.0001795750939117938, "loss": 0.8184, "step": 12005 }, { "epoch": 1.45, "grad_norm": 0.2669789493083954, "learning_rate": 0.00017947420291171813, "loss": 0.9174, "step": 12010 }, { "epoch": 1.45, "grad_norm": 0.3020137548446655, "learning_rate": 0.00017937329804010372, "loss": 0.9216, "step": 12015 }, { "epoch": 1.45, "grad_norm": 0.22852839529514313, "learning_rate": 0.00017927237934443983, "loss": 0.8869, "step": 12020 }, { "epoch": 1.45, "grad_norm": 0.27569183707237244, "learning_rate": 0.00017917144687222206, "loss": 0.8327, "step": 12025 }, { "epoch": 1.45, "grad_norm": 0.2452722042798996, "learning_rate": 0.0001790705006709527, "loss": 0.9104, "step": 12030 }, { "epoch": 1.45, "grad_norm": 0.24271848797798157, "learning_rate": 0.00017896954078814028, "loss": 0.9341, "step": 12035 }, { "epoch": 1.45, "grad_norm": 0.21451996266841888, "learning_rate": 0.00017886856727129994, "loss": 0.8143, "step": 12040 }, { "epoch": 1.45, "grad_norm": 0.23725618422031403, "learning_rate": 0.00017876758016795313, "loss": 0.8801, "step": 12045 }, { "epoch": 1.45, "grad_norm": 0.24408486485481262, "learning_rate": 0.00017866657952562778, "loss": 0.8149, "step": 12050 }, { "epoch": 1.45, "grad_norm": 0.22583889961242676, "learning_rate": 0.0001785655653918581, "loss": 0.8196, "step": 12055 }, { "epoch": 1.45, "grad_norm": 0.23950889706611633, "learning_rate": 0.00017846453781418474, "loss": 0.7369, "step": 12060 }, { "epoch": 1.45, "grad_norm": 0.24430805444717407, "learning_rate": 0.00017836349684015456, "loss": 0.9414, "step": 12065 }, { "epoch": 1.45, "grad_norm": 0.22092491388320923, "learning_rate": 0.00017826244251732088, "loss": 0.9288, "step": 12070 }, { "epoch": 1.45, "grad_norm": 0.23350967466831207, "learning_rate": 0.00017816137489324314, "loss": 0.8764, "step": 12075 }, { "epoch": 1.46, "grad_norm": 0.25949931144714355, "learning_rate": 0.0001780602940154872, "loss": 0.7207, "step": 12080 }, { "epoch": 1.46, "grad_norm": 0.23288941383361816, "learning_rate": 0.00017795919993162504, "loss": 0.8304, "step": 12085 }, { "epoch": 1.46, "grad_norm": 0.261445015668869, "learning_rate": 0.00017785809268923493, "loss": 0.8202, "step": 12090 }, { "epoch": 1.46, "grad_norm": 0.26035887002944946, "learning_rate": 0.0001777569723359012, "loss": 0.8119, "step": 12095 }, { "epoch": 1.46, "grad_norm": 0.24832282960414886, "learning_rate": 0.00017765583891921454, "loss": 0.8939, "step": 12100 }, { "epoch": 1.46, "grad_norm": 0.24832552671432495, "learning_rate": 0.00017755469248677163, "loss": 0.8043, "step": 12105 }, { "epoch": 1.46, "grad_norm": 0.24635636806488037, "learning_rate": 0.0001774535330861754, "loss": 0.9287, "step": 12110 }, { "epoch": 1.46, "grad_norm": 0.26285749673843384, "learning_rate": 0.0001773523607650348, "loss": 0.903, "step": 12115 }, { "epoch": 1.46, "grad_norm": 0.23266449570655823, "learning_rate": 0.0001772511755709649, "loss": 0.8647, "step": 12120 }, { "epoch": 1.46, "grad_norm": 0.23919335007667542, "learning_rate": 0.00017714997755158675, "loss": 0.8978, "step": 12125 }, { "epoch": 1.46, "grad_norm": 0.247425839304924, "learning_rate": 0.0001770487667545276, "loss": 0.8336, "step": 12130 }, { "epoch": 1.46, "grad_norm": 0.28573018312454224, "learning_rate": 0.00017694754322742048, "loss": 0.9016, "step": 12135 }, { "epoch": 1.46, "grad_norm": 0.23751410841941833, "learning_rate": 0.00017684630701790468, "loss": 0.8116, "step": 12140 }, { "epoch": 1.46, "grad_norm": 0.2392253428697586, "learning_rate": 0.0001767450581736252, "loss": 0.9136, "step": 12145 }, { "epoch": 1.46, "grad_norm": 0.23572461307048798, "learning_rate": 0.00017664379674223318, "loss": 0.8182, "step": 12150 }, { "epoch": 1.46, "grad_norm": 0.2536904513835907, "learning_rate": 0.0001765425227713856, "loss": 0.8735, "step": 12155 }, { "epoch": 1.47, "grad_norm": 0.2494330257177353, "learning_rate": 0.00017644123630874539, "loss": 0.888, "step": 12160 }, { "epoch": 1.47, "grad_norm": 0.2419360876083374, "learning_rate": 0.00017633993740198128, "loss": 0.8367, "step": 12165 }, { "epoch": 1.47, "grad_norm": 0.22948452830314636, "learning_rate": 0.0001762386260987679, "loss": 0.903, "step": 12170 }, { "epoch": 1.47, "grad_norm": 0.2524193227291107, "learning_rate": 0.0001761373024467857, "loss": 0.8555, "step": 12175 }, { "epoch": 1.47, "grad_norm": 0.2861385941505432, "learning_rate": 0.000176035966493721, "loss": 0.9068, "step": 12180 }, { "epoch": 1.47, "grad_norm": 0.2484591007232666, "learning_rate": 0.0001759346182872658, "loss": 0.9578, "step": 12185 }, { "epoch": 1.47, "grad_norm": 0.24529612064361572, "learning_rate": 0.000175833257875118, "loss": 0.8648, "step": 12190 }, { "epoch": 1.47, "grad_norm": 0.23602719604969025, "learning_rate": 0.00017573188530498117, "loss": 0.9002, "step": 12195 }, { "epoch": 1.47, "grad_norm": 0.24331901967525482, "learning_rate": 0.0001756305006245646, "loss": 0.8624, "step": 12200 }, { "epoch": 1.47, "grad_norm": 0.2478303611278534, "learning_rate": 0.0001755291038815832, "loss": 0.8435, "step": 12205 }, { "epoch": 1.47, "grad_norm": 0.26991724967956543, "learning_rate": 0.0001754276951237578, "loss": 0.8139, "step": 12210 }, { "epoch": 1.47, "grad_norm": 0.2541549503803253, "learning_rate": 0.00017532627439881458, "loss": 0.85, "step": 12215 }, { "epoch": 1.47, "grad_norm": 0.24864061176776886, "learning_rate": 0.0001752248417544856, "loss": 0.8312, "step": 12220 }, { "epoch": 1.47, "grad_norm": 0.26080724596977234, "learning_rate": 0.00017512339723850835, "loss": 0.8774, "step": 12225 }, { "epoch": 1.47, "grad_norm": 0.27303215861320496, "learning_rate": 0.00017502194089862608, "loss": 0.8132, "step": 12230 }, { "epoch": 1.47, "grad_norm": 0.24449607729911804, "learning_rate": 0.00017492047278258748, "loss": 0.9219, "step": 12235 }, { "epoch": 1.47, "grad_norm": 0.24350832402706146, "learning_rate": 0.0001748189929381468, "loss": 0.814, "step": 12240 }, { "epoch": 1.48, "grad_norm": 0.22966676950454712, "learning_rate": 0.0001747175014130638, "loss": 0.8465, "step": 12245 }, { "epoch": 1.48, "grad_norm": 0.2714262008666992, "learning_rate": 0.00017461599825510386, "loss": 0.885, "step": 12250 }, { "epoch": 1.48, "grad_norm": 0.2457001507282257, "learning_rate": 0.00017451448351203758, "loss": 0.8376, "step": 12255 }, { "epoch": 1.48, "grad_norm": 0.22185452282428741, "learning_rate": 0.00017441295723164132, "loss": 0.8311, "step": 12260 }, { "epoch": 1.48, "grad_norm": 0.2851621210575104, "learning_rate": 0.00017431141946169662, "loss": 0.7891, "step": 12265 }, { "epoch": 1.48, "grad_norm": 0.2442905604839325, "learning_rate": 0.00017420987024999065, "loss": 0.9465, "step": 12270 }, { "epoch": 1.48, "grad_norm": 0.26960885524749756, "learning_rate": 0.00017410830964431566, "loss": 0.8436, "step": 12275 }, { "epoch": 1.48, "grad_norm": 0.23894372582435608, "learning_rate": 0.0001740067376924696, "loss": 0.8469, "step": 12280 }, { "epoch": 1.48, "grad_norm": 0.295200914144516, "learning_rate": 0.00017390515444225548, "loss": 0.907, "step": 12285 }, { "epoch": 1.48, "grad_norm": 0.24998031556606293, "learning_rate": 0.00017380355994148187, "loss": 0.8642, "step": 12290 }, { "epoch": 1.48, "grad_norm": 0.25072336196899414, "learning_rate": 0.0001737019542379624, "loss": 0.8299, "step": 12295 }, { "epoch": 1.48, "grad_norm": 0.23040717840194702, "learning_rate": 0.00017360033737951622, "loss": 0.8464, "step": 12300 }, { "epoch": 1.48, "grad_norm": 0.21226970851421356, "learning_rate": 0.0001734987094139675, "loss": 0.8759, "step": 12305 }, { "epoch": 1.48, "grad_norm": 0.26856541633605957, "learning_rate": 0.0001733970703891457, "loss": 0.8438, "step": 12310 }, { "epoch": 1.48, "grad_norm": 0.2336094230413437, "learning_rate": 0.00017329542035288565, "loss": 0.8019, "step": 12315 }, { "epoch": 1.48, "grad_norm": 0.2525746524333954, "learning_rate": 0.00017319375935302713, "loss": 0.9302, "step": 12320 }, { "epoch": 1.49, "grad_norm": 0.21425200998783112, "learning_rate": 0.00017309208743741526, "loss": 0.835, "step": 12325 }, { "epoch": 1.49, "grad_norm": 0.2605019509792328, "learning_rate": 0.00017299040465390013, "loss": 0.9248, "step": 12330 }, { "epoch": 1.49, "grad_norm": 0.22529718279838562, "learning_rate": 0.00017288871105033713, "loss": 0.8703, "step": 12335 }, { "epoch": 1.49, "grad_norm": 0.24195663630962372, "learning_rate": 0.00017278700667458657, "loss": 0.836, "step": 12340 }, { "epoch": 1.49, "grad_norm": 0.25822916626930237, "learning_rate": 0.00017268529157451394, "loss": 0.8901, "step": 12345 }, { "epoch": 1.49, "grad_norm": 0.2364194244146347, "learning_rate": 0.00017258356579798973, "loss": 0.8438, "step": 12350 }, { "epoch": 1.49, "grad_norm": 0.26082637906074524, "learning_rate": 0.0001724818293928895, "loss": 0.9269, "step": 12355 }, { "epoch": 1.49, "grad_norm": 0.2297302931547165, "learning_rate": 0.00017238008240709374, "loss": 0.9076, "step": 12360 }, { "epoch": 1.49, "grad_norm": 0.2835592031478882, "learning_rate": 0.00017227832488848799, "loss": 0.8485, "step": 12365 }, { "epoch": 1.49, "grad_norm": 0.2650296986103058, "learning_rate": 0.0001721765568849627, "loss": 0.9593, "step": 12370 }, { "epoch": 1.49, "grad_norm": 0.22768686711788177, "learning_rate": 0.00017207477844441335, "loss": 0.8921, "step": 12375 }, { "epoch": 1.49, "grad_norm": 0.22558462619781494, "learning_rate": 0.00017197298961474006, "loss": 0.8421, "step": 12380 }, { "epoch": 1.49, "grad_norm": 0.224419966340065, "learning_rate": 0.00017187119044384823, "loss": 0.8392, "step": 12385 }, { "epoch": 1.49, "grad_norm": 0.2719701826572418, "learning_rate": 0.00017176938097964784, "loss": 0.9365, "step": 12390 }, { "epoch": 1.49, "grad_norm": 0.24385976791381836, "learning_rate": 0.00017166756127005384, "loss": 0.8264, "step": 12395 }, { "epoch": 1.49, "grad_norm": 0.23817993700504303, "learning_rate": 0.00017156573136298592, "loss": 0.9154, "step": 12400 }, { "epoch": 1.49, "grad_norm": 0.25745269656181335, "learning_rate": 0.00017146389130636864, "loss": 0.8911, "step": 12405 }, { "epoch": 1.5, "grad_norm": 0.29501572251319885, "learning_rate": 0.0001713620411481314, "loss": 0.8841, "step": 12410 }, { "epoch": 1.5, "grad_norm": 0.263081431388855, "learning_rate": 0.00017126018093620808, "loss": 0.8334, "step": 12415 }, { "epoch": 1.5, "grad_norm": 0.22736036777496338, "learning_rate": 0.0001711583107185376, "loss": 0.88, "step": 12420 }, { "epoch": 1.5, "grad_norm": 0.23410391807556152, "learning_rate": 0.00017105643054306352, "loss": 0.8257, "step": 12425 }, { "epoch": 1.5, "grad_norm": 0.26745110750198364, "learning_rate": 0.00017095454045773387, "loss": 0.9252, "step": 12430 }, { "epoch": 1.5, "grad_norm": 0.24385030567646027, "learning_rate": 0.00017085264051050166, "loss": 0.8395, "step": 12435 }, { "epoch": 1.5, "grad_norm": 0.2342156320810318, "learning_rate": 0.0001707507307493243, "loss": 0.8266, "step": 12440 }, { "epoch": 1.5, "grad_norm": 0.2477523386478424, "learning_rate": 0.00017064881122216398, "loss": 0.8427, "step": 12445 }, { "epoch": 1.5, "grad_norm": 0.252871572971344, "learning_rate": 0.00017054688197698736, "loss": 0.8368, "step": 12450 }, { "epoch": 1.5, "grad_norm": 0.22697529196739197, "learning_rate": 0.00017044494306176576, "loss": 0.7964, "step": 12455 }, { "epoch": 1.5, "grad_norm": 0.27553001046180725, "learning_rate": 0.00017034299452447493, "loss": 0.8546, "step": 12460 }, { "epoch": 1.5, "grad_norm": 0.26646605134010315, "learning_rate": 0.00017024103641309537, "loss": 0.8383, "step": 12465 }, { "epoch": 1.5, "grad_norm": 0.255669504404068, "learning_rate": 0.00017013906877561187, "loss": 0.7589, "step": 12470 }, { "epoch": 1.5, "grad_norm": 0.25287455320358276, "learning_rate": 0.0001700370916600138, "loss": 0.8203, "step": 12475 }, { "epoch": 1.5, "grad_norm": 0.2946148216724396, "learning_rate": 0.000169935105114295, "loss": 0.9016, "step": 12480 }, { "epoch": 1.5, "grad_norm": 0.2425968050956726, "learning_rate": 0.0001698331091864537, "loss": 0.786, "step": 12485 }, { "epoch": 1.5, "grad_norm": 0.24818405508995056, "learning_rate": 0.00016973110392449255, "loss": 0.8466, "step": 12490 }, { "epoch": 1.51, "grad_norm": 0.2572382688522339, "learning_rate": 0.00016962908937641873, "loss": 0.8854, "step": 12495 }, { "epoch": 1.51, "grad_norm": 0.22933310270309448, "learning_rate": 0.0001695270655902435, "loss": 0.8843, "step": 12500 }, { "epoch": 1.51, "grad_norm": 0.2477482110261917, "learning_rate": 0.00016942503261398276, "loss": 0.8412, "step": 12505 }, { "epoch": 1.51, "grad_norm": 0.2406783103942871, "learning_rate": 0.00016932299049565657, "loss": 0.7979, "step": 12510 }, { "epoch": 1.51, "grad_norm": 0.2689076364040375, "learning_rate": 0.00016922093928328937, "loss": 0.8078, "step": 12515 }, { "epoch": 1.51, "grad_norm": 0.2702324092388153, "learning_rate": 0.00016911887902490986, "loss": 0.8463, "step": 12520 }, { "epoch": 1.51, "grad_norm": 0.27444717288017273, "learning_rate": 0.00016901680976855096, "loss": 0.8428, "step": 12525 }, { "epoch": 1.51, "grad_norm": 0.24861744046211243, "learning_rate": 0.00016891473156224976, "loss": 0.9324, "step": 12530 }, { "epoch": 1.51, "grad_norm": 0.2355334758758545, "learning_rate": 0.00016881264445404786, "loss": 0.8497, "step": 12535 }, { "epoch": 1.51, "grad_norm": 0.24266460537910461, "learning_rate": 0.00016871054849199068, "loss": 0.8034, "step": 12540 }, { "epoch": 1.51, "grad_norm": 0.24508504569530487, "learning_rate": 0.00016860844372412802, "loss": 0.8216, "step": 12545 }, { "epoch": 1.51, "grad_norm": 0.23630234599113464, "learning_rate": 0.00016850633019851378, "loss": 0.8422, "step": 12550 }, { "epoch": 1.51, "grad_norm": 0.2652592658996582, "learning_rate": 0.00016840420796320602, "loss": 0.9161, "step": 12555 }, { "epoch": 1.51, "grad_norm": 0.21832112967967987, "learning_rate": 0.00016830207706626675, "loss": 0.7847, "step": 12560 }, { "epoch": 1.51, "grad_norm": 0.2525995671749115, "learning_rate": 0.00016819993755576225, "loss": 0.8049, "step": 12565 }, { "epoch": 1.51, "grad_norm": 0.2583009898662567, "learning_rate": 0.00016809778947976273, "loss": 0.8428, "step": 12570 }, { "epoch": 1.52, "grad_norm": 0.2520254850387573, "learning_rate": 0.00016799563288634247, "loss": 0.8095, "step": 12575 }, { "epoch": 1.52, "grad_norm": 0.2578343152999878, "learning_rate": 0.00016789346782357975, "loss": 0.7919, "step": 12580 }, { "epoch": 1.52, "grad_norm": 0.26357656717300415, "learning_rate": 0.00016779129433955686, "loss": 0.8591, "step": 12585 }, { "epoch": 1.52, "grad_norm": 0.256157249212265, "learning_rate": 0.00016768911248236001, "loss": 0.8812, "step": 12590 }, { "epoch": 1.52, "grad_norm": 0.25076690316200256, "learning_rate": 0.00016758692230007939, "loss": 0.8109, "step": 12595 }, { "epoch": 1.52, "grad_norm": 0.26250481605529785, "learning_rate": 0.00016748472384080912, "loss": 0.944, "step": 12600 }, { "epoch": 1.52, "grad_norm": 0.25288984179496765, "learning_rate": 0.0001673825171526471, "loss": 0.9143, "step": 12605 }, { "epoch": 1.52, "grad_norm": 0.26439496874809265, "learning_rate": 0.0001672803022836953, "loss": 0.9372, "step": 12610 }, { "epoch": 1.52, "grad_norm": 0.23811744153499603, "learning_rate": 0.00016717807928205936, "loss": 0.816, "step": 12615 }, { "epoch": 1.52, "grad_norm": 0.2801905870437622, "learning_rate": 0.00016707584819584885, "loss": 0.7269, "step": 12620 }, { "epoch": 1.52, "grad_norm": 0.2476351410150528, "learning_rate": 0.00016697360907317712, "loss": 0.7855, "step": 12625 }, { "epoch": 1.52, "grad_norm": 0.2882719933986664, "learning_rate": 0.0001668713619621613, "loss": 0.9298, "step": 12630 }, { "epoch": 1.52, "grad_norm": 0.27787694334983826, "learning_rate": 0.00016676910691092224, "loss": 0.8385, "step": 12635 }, { "epoch": 1.52, "grad_norm": 0.28809016942977905, "learning_rate": 0.00016666684396758459, "loss": 0.7968, "step": 12640 }, { "epoch": 1.52, "grad_norm": 0.24893060326576233, "learning_rate": 0.00016656457318027667, "loss": 0.8045, "step": 12645 }, { "epoch": 1.52, "grad_norm": 0.22097425162792206, "learning_rate": 0.0001664622945971305, "loss": 0.8332, "step": 12650 }, { "epoch": 1.52, "grad_norm": 0.241326704621315, "learning_rate": 0.00016636000826628184, "loss": 0.8432, "step": 12655 }, { "epoch": 1.53, "grad_norm": 0.2555682063102722, "learning_rate": 0.00016625771423586991, "loss": 0.998, "step": 12660 }, { "epoch": 1.53, "grad_norm": 0.2659325897693634, "learning_rate": 0.00016615541255403786, "loss": 0.796, "step": 12665 }, { "epoch": 1.53, "grad_norm": 0.24846947193145752, "learning_rate": 0.0001660531032689321, "loss": 0.8544, "step": 12670 }, { "epoch": 1.53, "grad_norm": 0.2357734739780426, "learning_rate": 0.0001659507864287029, "loss": 0.8947, "step": 12675 }, { "epoch": 1.53, "grad_norm": 0.22256316244602203, "learning_rate": 0.00016584846208150383, "loss": 0.7654, "step": 12680 }, { "epoch": 1.53, "grad_norm": 0.2255926877260208, "learning_rate": 0.00016574613027549217, "loss": 0.9368, "step": 12685 }, { "epoch": 1.53, "grad_norm": 0.2199205905199051, "learning_rate": 0.00016564379105882873, "loss": 0.8522, "step": 12690 }, { "epoch": 1.53, "grad_norm": 0.28501272201538086, "learning_rate": 0.0001655414444796777, "loss": 0.9038, "step": 12695 }, { "epoch": 1.53, "grad_norm": 0.24840682744979858, "learning_rate": 0.0001654390905862068, "loss": 0.8622, "step": 12700 }, { "epoch": 1.53, "grad_norm": 0.25936436653137207, "learning_rate": 0.00016533672942658717, "loss": 0.7946, "step": 12705 }, { "epoch": 1.53, "grad_norm": 0.2659285068511963, "learning_rate": 0.0001652343610489933, "loss": 0.8187, "step": 12710 }, { "epoch": 1.53, "grad_norm": 0.24160555005073547, "learning_rate": 0.00016513198550160326, "loss": 0.7751, "step": 12715 }, { "epoch": 1.53, "grad_norm": 0.26907095313072205, "learning_rate": 0.00016502960283259823, "loss": 0.807, "step": 12720 }, { "epoch": 1.53, "grad_norm": 0.2532917559146881, "learning_rate": 0.00016492721309016307, "loss": 0.8671, "step": 12725 }, { "epoch": 1.53, "grad_norm": 0.22019241750240326, "learning_rate": 0.00016482481632248568, "loss": 0.8272, "step": 12730 }, { "epoch": 1.53, "grad_norm": 0.22375644743442535, "learning_rate": 0.00016472241257775743, "loss": 0.7704, "step": 12735 }, { "epoch": 1.54, "grad_norm": 0.2382332682609558, "learning_rate": 0.00016462000190417292, "loss": 0.9716, "step": 12740 }, { "epoch": 1.54, "grad_norm": 0.279604971408844, "learning_rate": 0.00016451758434992997, "loss": 0.8371, "step": 12745 }, { "epoch": 1.54, "grad_norm": 0.2584107220172882, "learning_rate": 0.00016441515996322973, "loss": 0.8307, "step": 12750 }, { "epoch": 1.54, "grad_norm": 0.24332301318645477, "learning_rate": 0.00016431272879227648, "loss": 0.8213, "step": 12755 }, { "epoch": 1.54, "grad_norm": 0.2555186450481415, "learning_rate": 0.00016421029088527775, "loss": 0.8662, "step": 12760 }, { "epoch": 1.54, "grad_norm": 0.21609874069690704, "learning_rate": 0.00016410784629044422, "loss": 0.9062, "step": 12765 }, { "epoch": 1.54, "grad_norm": 0.23835386335849762, "learning_rate": 0.00016400539505598974, "loss": 0.9274, "step": 12770 }, { "epoch": 1.54, "grad_norm": 0.24308866262435913, "learning_rate": 0.00016390293723013124, "loss": 0.9382, "step": 12775 }, { "epoch": 1.54, "grad_norm": 0.2690812647342682, "learning_rate": 0.00016380047286108874, "loss": 0.8704, "step": 12780 }, { "epoch": 1.54, "grad_norm": 0.24690578877925873, "learning_rate": 0.00016369800199708546, "loss": 0.9505, "step": 12785 }, { "epoch": 1.54, "grad_norm": 0.27407118678092957, "learning_rate": 0.00016359552468634748, "loss": 0.806, "step": 12790 }, { "epoch": 1.54, "grad_norm": 0.23908410966396332, "learning_rate": 0.00016349304097710416, "loss": 0.8412, "step": 12795 }, { "epoch": 1.54, "grad_norm": 0.21888162195682526, "learning_rate": 0.00016339055091758764, "loss": 0.91, "step": 12800 }, { "epoch": 1.54, "grad_norm": 0.2692570984363556, "learning_rate": 0.00016328805455603315, "loss": 0.8517, "step": 12805 }, { "epoch": 1.54, "grad_norm": 0.21654078364372253, "learning_rate": 0.00016318555194067892, "loss": 0.9092, "step": 12810 }, { "epoch": 1.54, "grad_norm": 0.22260205447673798, "learning_rate": 0.00016308304311976604, "loss": 0.7538, "step": 12815 }, { "epoch": 1.54, "grad_norm": 0.2802688479423523, "learning_rate": 0.00016298052814153866, "loss": 0.9275, "step": 12820 }, { "epoch": 1.55, "grad_norm": 0.24824702739715576, "learning_rate": 0.00016287800705424362, "loss": 0.8563, "step": 12825 }, { "epoch": 1.55, "grad_norm": 0.2404843270778656, "learning_rate": 0.00016277547990613083, "loss": 0.9009, "step": 12830 }, { "epoch": 1.55, "grad_norm": 0.24702101945877075, "learning_rate": 0.00016267294674545286, "loss": 0.9061, "step": 12835 }, { "epoch": 1.55, "grad_norm": 0.24450421333312988, "learning_rate": 0.0001625704076204654, "loss": 0.9004, "step": 12840 }, { "epoch": 1.55, "grad_norm": 0.2563953399658203, "learning_rate": 0.00016246786257942658, "loss": 0.9063, "step": 12845 }, { "epoch": 1.55, "grad_norm": 0.2544556260108948, "learning_rate": 0.00016236531167059762, "loss": 0.8174, "step": 12850 }, { "epoch": 1.55, "grad_norm": 0.23205184936523438, "learning_rate": 0.00016226275494224233, "loss": 0.8916, "step": 12855 }, { "epoch": 1.55, "grad_norm": 0.2657812237739563, "learning_rate": 0.00016216019244262735, "loss": 0.8149, "step": 12860 }, { "epoch": 1.55, "grad_norm": 0.21930214762687683, "learning_rate": 0.00016205762422002198, "loss": 0.8223, "step": 12865 }, { "epoch": 1.55, "grad_norm": 0.2352408766746521, "learning_rate": 0.00016195505032269821, "loss": 0.8929, "step": 12870 }, { "epoch": 1.55, "grad_norm": 0.24339045584201813, "learning_rate": 0.00016185247079893075, "loss": 0.919, "step": 12875 }, { "epoch": 1.55, "grad_norm": 0.27827414870262146, "learning_rate": 0.00016174988569699696, "loss": 0.8859, "step": 12880 }, { "epoch": 1.55, "grad_norm": 0.2651593089103699, "learning_rate": 0.0001616472950651767, "loss": 0.7699, "step": 12885 }, { "epoch": 1.55, "grad_norm": 0.24380792677402496, "learning_rate": 0.00016154469895175266, "loss": 0.9605, "step": 12890 }, { "epoch": 1.55, "grad_norm": 0.2639029920101166, "learning_rate": 0.00016144209740500982, "loss": 0.8723, "step": 12895 }, { "epoch": 1.55, "grad_norm": 0.2522783577442169, "learning_rate": 0.000161339490473236, "loss": 0.8958, "step": 12900 }, { "epoch": 1.55, "grad_norm": 0.2770918309688568, "learning_rate": 0.00016123687820472139, "loss": 0.797, "step": 12905 }, { "epoch": 1.56, "grad_norm": 0.22492796182632446, "learning_rate": 0.00016113426064775875, "loss": 0.8496, "step": 12910 }, { "epoch": 1.56, "grad_norm": 0.2644830644130707, "learning_rate": 0.00016103163785064327, "loss": 0.8458, "step": 12915 }, { "epoch": 1.56, "grad_norm": 0.2638452649116516, "learning_rate": 0.0001609290098616727, "loss": 0.8408, "step": 12920 }, { "epoch": 1.56, "grad_norm": 0.23867619037628174, "learning_rate": 0.0001608263767291471, "loss": 0.8522, "step": 12925 }, { "epoch": 1.56, "grad_norm": 0.2165214717388153, "learning_rate": 0.00016072373850136912, "loss": 0.9328, "step": 12930 }, { "epoch": 1.56, "grad_norm": 0.2513158619403839, "learning_rate": 0.00016062109522664366, "loss": 0.8523, "step": 12935 }, { "epoch": 1.56, "grad_norm": 0.22370438277721405, "learning_rate": 0.00016051844695327806, "loss": 0.8542, "step": 12940 }, { "epoch": 1.56, "grad_norm": 0.2550356090068817, "learning_rate": 0.000160415793729582, "loss": 0.9122, "step": 12945 }, { "epoch": 1.56, "grad_norm": 0.23933614790439606, "learning_rate": 0.00016031313560386758, "loss": 0.8236, "step": 12950 }, { "epoch": 1.56, "grad_norm": 0.2359176129102707, "learning_rate": 0.000160210472624449, "loss": 0.8815, "step": 12955 }, { "epoch": 1.56, "grad_norm": 0.2673303186893463, "learning_rate": 0.00016010780483964295, "loss": 0.8111, "step": 12960 }, { "epoch": 1.56, "grad_norm": 0.2548558712005615, "learning_rate": 0.00016000513229776826, "loss": 0.919, "step": 12965 }, { "epoch": 1.56, "grad_norm": 0.27557799220085144, "learning_rate": 0.00015990245504714608, "loss": 0.7909, "step": 12970 }, { "epoch": 1.56, "grad_norm": 0.285546213388443, "learning_rate": 0.00015979977313609965, "loss": 0.8933, "step": 12975 }, { "epoch": 1.56, "grad_norm": 0.2725813090801239, "learning_rate": 0.00015969708661295456, "loss": 0.8644, "step": 12980 }, { "epoch": 1.56, "grad_norm": 0.23822170495986938, "learning_rate": 0.0001595943955260385, "loss": 0.8707, "step": 12985 }, { "epoch": 1.57, "grad_norm": 0.2242768406867981, "learning_rate": 0.0001594916999236813, "loss": 0.7392, "step": 12990 }, { "epoch": 1.57, "grad_norm": 0.21174441277980804, "learning_rate": 0.00015938899985421486, "loss": 0.794, "step": 12995 }, { "epoch": 1.57, "grad_norm": 0.24703176319599152, "learning_rate": 0.00015928629536597332, "loss": 0.8355, "step": 13000 }, { "epoch": 1.57, "grad_norm": 0.22385214269161224, "learning_rate": 0.00015918358650729276, "loss": 0.763, "step": 13005 }, { "epoch": 1.57, "grad_norm": 0.24205328524112701, "learning_rate": 0.00015908087332651142, "loss": 0.86, "step": 13010 }, { "epoch": 1.57, "grad_norm": 0.256166011095047, "learning_rate": 0.00015897815587196954, "loss": 0.8247, "step": 13015 }, { "epoch": 1.57, "grad_norm": 0.2543538212776184, "learning_rate": 0.00015887543419200936, "loss": 0.9216, "step": 13020 }, { "epoch": 1.57, "grad_norm": 0.2570268511772156, "learning_rate": 0.0001587727083349751, "loss": 0.8575, "step": 13025 }, { "epoch": 1.57, "grad_norm": 0.29525551199913025, "learning_rate": 0.000158669978349213, "loss": 0.9212, "step": 13030 }, { "epoch": 1.57, "grad_norm": 0.23510275781154633, "learning_rate": 0.0001585672442830711, "loss": 0.8684, "step": 13035 }, { "epoch": 1.57, "grad_norm": 0.23046576976776123, "learning_rate": 0.00015846450618489958, "loss": 0.7974, "step": 13040 }, { "epoch": 1.57, "grad_norm": 0.2453136444091797, "learning_rate": 0.0001583617641030503, "loss": 0.832, "step": 13045 }, { "epoch": 1.57, "grad_norm": 0.27528902888298035, "learning_rate": 0.0001582590180858772, "loss": 0.7578, "step": 13050 }, { "epoch": 1.57, "grad_norm": 0.2460784763097763, "learning_rate": 0.0001581562681817359, "loss": 0.8231, "step": 13055 }, { "epoch": 1.57, "grad_norm": 0.29714435338974, "learning_rate": 0.00015805351443898388, "loss": 0.7481, "step": 13060 }, { "epoch": 1.57, "grad_norm": 0.22743813693523407, "learning_rate": 0.0001579507569059806, "loss": 0.7635, "step": 13065 }, { "epoch": 1.57, "grad_norm": 0.24308447539806366, "learning_rate": 0.00015784799563108706, "loss": 0.8822, "step": 13070 }, { "epoch": 1.58, "grad_norm": 0.2288735806941986, "learning_rate": 0.00015774523066266612, "loss": 0.8427, "step": 13075 }, { "epoch": 1.58, "grad_norm": 0.23703986406326294, "learning_rate": 0.00015764246204908245, "loss": 0.9294, "step": 13080 }, { "epoch": 1.58, "grad_norm": 0.2489616721868515, "learning_rate": 0.0001575396898387023, "loss": 0.9377, "step": 13085 }, { "epoch": 1.58, "grad_norm": 0.25076568126678467, "learning_rate": 0.00015743691407989378, "loss": 0.9284, "step": 13090 }, { "epoch": 1.58, "grad_norm": 0.247811421751976, "learning_rate": 0.00015733413482102652, "loss": 0.867, "step": 13095 }, { "epoch": 1.58, "grad_norm": 0.2610926330089569, "learning_rate": 0.00015723135211047186, "loss": 0.8203, "step": 13100 }, { "epoch": 1.58, "grad_norm": 0.2661215662956238, "learning_rate": 0.00015712856599660267, "loss": 0.9256, "step": 13105 }, { "epoch": 1.58, "grad_norm": 0.22630742192268372, "learning_rate": 0.00015702577652779368, "loss": 0.8556, "step": 13110 }, { "epoch": 1.58, "grad_norm": 0.3097597360610962, "learning_rate": 0.00015692298375242087, "loss": 0.8452, "step": 13115 }, { "epoch": 1.58, "grad_norm": 0.2603763937950134, "learning_rate": 0.00015682018771886203, "loss": 0.8142, "step": 13120 }, { "epoch": 1.58, "grad_norm": 0.2540077865123749, "learning_rate": 0.00015671738847549633, "loss": 0.9639, "step": 13125 }, { "epoch": 1.58, "grad_norm": 0.24496349692344666, "learning_rate": 0.0001566145860707046, "loss": 1.0137, "step": 13130 }, { "epoch": 1.58, "grad_norm": 0.2169584482908249, "learning_rate": 0.00015651178055286897, "loss": 0.9417, "step": 13135 }, { "epoch": 1.58, "grad_norm": 0.23268476128578186, "learning_rate": 0.0001564089719703732, "loss": 0.91, "step": 13140 }, { "epoch": 1.58, "grad_norm": 0.21907760202884674, "learning_rate": 0.0001563061603716023, "loss": 0.8338, "step": 13145 }, { "epoch": 1.58, "grad_norm": 0.24328762292861938, "learning_rate": 0.00015620334580494297, "loss": 0.872, "step": 13150 }, { "epoch": 1.59, "grad_norm": 0.26593923568725586, "learning_rate": 0.00015610052831878304, "loss": 0.9121, "step": 13155 }, { "epoch": 1.59, "grad_norm": 0.2493075728416443, "learning_rate": 0.00015599770796151196, "loss": 0.8829, "step": 13160 }, { "epoch": 1.59, "grad_norm": 0.2412213832139969, "learning_rate": 0.00015589488478152027, "loss": 0.8247, "step": 13165 }, { "epoch": 1.59, "grad_norm": 0.2626934349536896, "learning_rate": 0.00015579205882720014, "loss": 0.8603, "step": 13170 }, { "epoch": 1.59, "grad_norm": 0.2839190363883972, "learning_rate": 0.0001556892301469447, "loss": 0.8257, "step": 13175 }, { "epoch": 1.59, "grad_norm": 0.28261008858680725, "learning_rate": 0.0001555863987891486, "loss": 0.7699, "step": 13180 }, { "epoch": 1.59, "grad_norm": 0.2586536109447479, "learning_rate": 0.00015548356480220773, "loss": 0.9528, "step": 13185 }, { "epoch": 1.59, "grad_norm": 0.25561654567718506, "learning_rate": 0.0001553807282345192, "loss": 0.809, "step": 13190 }, { "epoch": 1.59, "grad_norm": 0.24702797830104828, "learning_rate": 0.00015527788913448124, "loss": 0.9084, "step": 13195 }, { "epoch": 1.59, "grad_norm": 0.22921974956989288, "learning_rate": 0.0001551750475504934, "loss": 0.8321, "step": 13200 }, { "epoch": 1.59, "grad_norm": 0.2360827922821045, "learning_rate": 0.0001550722035309563, "loss": 0.8252, "step": 13205 }, { "epoch": 1.59, "grad_norm": 0.24088476598262787, "learning_rate": 0.00015496935712427183, "loss": 0.7994, "step": 13210 }, { "epoch": 1.59, "grad_norm": 0.22839505970478058, "learning_rate": 0.00015486650837884277, "loss": 0.891, "step": 13215 }, { "epoch": 1.59, "grad_norm": 0.257959246635437, "learning_rate": 0.00015476365734307335, "loss": 0.853, "step": 13220 }, { "epoch": 1.59, "grad_norm": 0.23323047161102295, "learning_rate": 0.00015466080406536853, "loss": 0.8453, "step": 13225 }, { "epoch": 1.59, "grad_norm": 0.24002352356910706, "learning_rate": 0.00015455794859413458, "loss": 0.7921, "step": 13230 }, { "epoch": 1.59, "grad_norm": 0.270093709230423, "learning_rate": 0.0001544550909777786, "loss": 0.8082, "step": 13235 }, { "epoch": 1.6, "grad_norm": 0.25525951385498047, "learning_rate": 0.0001543522312647089, "loss": 0.9131, "step": 13240 }, { "epoch": 1.6, "grad_norm": 0.26229140162467957, "learning_rate": 0.00015424936950333463, "loss": 0.9213, "step": 13245 }, { "epoch": 1.6, "grad_norm": 0.25442442297935486, "learning_rate": 0.00015414650574206595, "loss": 0.8611, "step": 13250 }, { "epoch": 1.6, "grad_norm": 0.2250799685716629, "learning_rate": 0.00015404364002931397, "loss": 0.8195, "step": 13255 }, { "epoch": 1.6, "grad_norm": 0.2337421178817749, "learning_rate": 0.00015394077241349073, "loss": 0.8752, "step": 13260 }, { "epoch": 1.6, "grad_norm": 0.2822083830833435, "learning_rate": 0.00015383790294300908, "loss": 0.8054, "step": 13265 }, { "epoch": 1.6, "grad_norm": 0.25542697310447693, "learning_rate": 0.00015373503166628288, "loss": 0.8523, "step": 13270 }, { "epoch": 1.6, "grad_norm": 0.23330925405025482, "learning_rate": 0.00015363215863172671, "loss": 0.9308, "step": 13275 }, { "epoch": 1.6, "grad_norm": 0.3322324752807617, "learning_rate": 0.00015352928388775612, "loss": 0.8854, "step": 13280 }, { "epoch": 1.6, "grad_norm": 0.2551574110984802, "learning_rate": 0.00015342640748278725, "loss": 0.8225, "step": 13285 }, { "epoch": 1.6, "grad_norm": 0.2288314402103424, "learning_rate": 0.00015332352946523733, "loss": 0.8141, "step": 13290 }, { "epoch": 1.6, "grad_norm": 0.28480198979377747, "learning_rate": 0.000153220649883524, "loss": 0.7063, "step": 13295 }, { "epoch": 1.6, "grad_norm": 0.26448380947113037, "learning_rate": 0.0001531177687860659, "loss": 0.8755, "step": 13300 }, { "epoch": 1.6, "grad_norm": 0.29811567068099976, "learning_rate": 0.00015301488622128224, "loss": 0.867, "step": 13305 }, { "epoch": 1.6, "grad_norm": 0.2261502593755722, "learning_rate": 0.00015291200223759306, "loss": 0.881, "step": 13310 }, { "epoch": 1.6, "grad_norm": 0.26991018652915955, "learning_rate": 0.0001528091168834189, "loss": 0.835, "step": 13315 }, { "epoch": 1.6, "grad_norm": 0.2542611062526703, "learning_rate": 0.00015270623020718102, "loss": 0.8976, "step": 13320 }, { "epoch": 1.61, "grad_norm": 0.2485206127166748, "learning_rate": 0.00015260334225730137, "loss": 0.8939, "step": 13325 }, { "epoch": 1.61, "grad_norm": 0.26317083835601807, "learning_rate": 0.00015250045308220236, "loss": 0.8537, "step": 13330 }, { "epoch": 1.61, "grad_norm": 0.23447948694229126, "learning_rate": 0.00015239756273030715, "loss": 0.8983, "step": 13335 }, { "epoch": 1.61, "grad_norm": 0.25760382413864136, "learning_rate": 0.00015229467125003925, "loss": 0.7641, "step": 13340 }, { "epoch": 1.61, "grad_norm": 0.24973775446414948, "learning_rate": 0.00015219177868982286, "loss": 0.8753, "step": 13345 }, { "epoch": 1.61, "grad_norm": 0.26316988468170166, "learning_rate": 0.00015208888509808267, "loss": 0.9377, "step": 13350 }, { "epoch": 1.61, "grad_norm": 0.23706527054309845, "learning_rate": 0.00015198599052324377, "loss": 0.8637, "step": 13355 }, { "epoch": 1.61, "grad_norm": 0.2416650950908661, "learning_rate": 0.00015188309501373175, "loss": 0.8773, "step": 13360 }, { "epoch": 1.61, "grad_norm": 0.24447450041770935, "learning_rate": 0.0001517801986179727, "loss": 0.8913, "step": 13365 }, { "epoch": 1.61, "grad_norm": 0.24984611570835114, "learning_rate": 0.00015167730138439305, "loss": 0.8512, "step": 13370 }, { "epoch": 1.61, "grad_norm": 0.2462444007396698, "learning_rate": 0.00015157440336141967, "loss": 0.934, "step": 13375 }, { "epoch": 1.61, "grad_norm": 0.23950566351413727, "learning_rate": 0.0001514715045974798, "loss": 0.8259, "step": 13380 }, { "epoch": 1.61, "grad_norm": 0.23663043975830078, "learning_rate": 0.000151368605141001, "loss": 0.7744, "step": 13385 }, { "epoch": 1.61, "grad_norm": 0.2686599791049957, "learning_rate": 0.00015126570504041115, "loss": 0.8617, "step": 13390 }, { "epoch": 1.61, "grad_norm": 0.22998297214508057, "learning_rate": 0.0001511628043441385, "loss": 0.8539, "step": 13395 }, { "epoch": 1.61, "grad_norm": 0.2632577121257782, "learning_rate": 0.00015105990310061146, "loss": 0.8452, "step": 13400 }, { "epoch": 1.62, "grad_norm": 0.2295655459165573, "learning_rate": 0.00015095700135825887, "loss": 0.9384, "step": 13405 }, { "epoch": 1.62, "grad_norm": 0.24616779386997223, "learning_rate": 0.00015085409916550961, "loss": 0.8459, "step": 13410 }, { "epoch": 1.62, "grad_norm": 0.25910162925720215, "learning_rate": 0.00015075119657079298, "loss": 0.8525, "step": 13415 }, { "epoch": 1.62, "grad_norm": 0.22823968529701233, "learning_rate": 0.00015064829362253828, "loss": 0.8893, "step": 13420 }, { "epoch": 1.62, "grad_norm": 0.2842884063720703, "learning_rate": 0.0001505453903691751, "loss": 0.806, "step": 13425 }, { "epoch": 1.62, "grad_norm": 0.24456340074539185, "learning_rate": 0.00015044248685913304, "loss": 0.871, "step": 13430 }, { "epoch": 1.62, "grad_norm": 0.23443590104579926, "learning_rate": 0.00015033958314084202, "loss": 0.8712, "step": 13435 }, { "epoch": 1.62, "grad_norm": 0.28807491064071655, "learning_rate": 0.00015023667926273183, "loss": 0.8278, "step": 13440 }, { "epoch": 1.62, "grad_norm": 0.28618142008781433, "learning_rate": 0.00015013377527323257, "loss": 0.9076, "step": 13445 }, { "epoch": 1.62, "grad_norm": 0.2491266131401062, "learning_rate": 0.0001500308712207742, "loss": 0.8047, "step": 13450 }, { "epoch": 1.62, "grad_norm": 0.25134310126304626, "learning_rate": 0.00014992796715378686, "loss": 0.8135, "step": 13455 }, { "epoch": 1.62, "grad_norm": 0.2570821940898895, "learning_rate": 0.00014982506312070053, "loss": 0.8727, "step": 13460 }, { "epoch": 1.62, "grad_norm": 0.23932205140590668, "learning_rate": 0.0001497221591699453, "loss": 0.8599, "step": 13465 }, { "epoch": 1.62, "grad_norm": 0.274517685174942, "learning_rate": 0.00014961925534995118, "loss": 0.8436, "step": 13470 }, { "epoch": 1.62, "grad_norm": 0.24874992668628693, "learning_rate": 0.0001495163517091482, "loss": 0.8423, "step": 13475 }, { "epoch": 1.62, "grad_norm": 0.23968589305877686, "learning_rate": 0.00014941344829596612, "loss": 0.9005, "step": 13480 }, { "epoch": 1.62, "grad_norm": 0.25740721821784973, "learning_rate": 0.00014931054515883473, "loss": 0.8737, "step": 13485 }, { "epoch": 1.63, "grad_norm": 0.22568397223949432, "learning_rate": 0.00014920764234618373, "loss": 0.9345, "step": 13490 }, { "epoch": 1.63, "grad_norm": 0.2677795886993408, "learning_rate": 0.00014910473990644254, "loss": 0.8573, "step": 13495 }, { "epoch": 1.63, "grad_norm": 0.2688808739185333, "learning_rate": 0.00014900183788804048, "loss": 0.8759, "step": 13500 }, { "epoch": 1.63, "grad_norm": 0.25429514050483704, "learning_rate": 0.0001488989363394066, "loss": 0.7835, "step": 13505 }, { "epoch": 1.63, "grad_norm": 0.3000656068325043, "learning_rate": 0.00014879603530896992, "loss": 0.8517, "step": 13510 }, { "epoch": 1.63, "grad_norm": 0.2686077356338501, "learning_rate": 0.00014869313484515897, "loss": 0.9386, "step": 13515 }, { "epoch": 1.63, "grad_norm": 0.2108505368232727, "learning_rate": 0.0001485902349964022, "loss": 0.8535, "step": 13520 }, { "epoch": 1.63, "grad_norm": 0.2740882635116577, "learning_rate": 0.0001484873358111276, "loss": 0.9123, "step": 13525 }, { "epoch": 1.63, "grad_norm": 0.234098881483078, "learning_rate": 0.00014838443733776306, "loss": 0.8044, "step": 13530 }, { "epoch": 1.63, "grad_norm": 0.2779216468334198, "learning_rate": 0.00014828153962473593, "loss": 0.8323, "step": 13535 }, { "epoch": 1.63, "grad_norm": 0.24527031183242798, "learning_rate": 0.00014817864272047334, "loss": 0.8261, "step": 13540 }, { "epoch": 1.63, "grad_norm": 0.2469957321882248, "learning_rate": 0.00014807574667340188, "loss": 0.836, "step": 13545 }, { "epoch": 1.63, "grad_norm": 0.24512287974357605, "learning_rate": 0.00014797285153194805, "loss": 0.799, "step": 13550 }, { "epoch": 1.63, "grad_norm": 0.26496610045433044, "learning_rate": 0.00014786995734453756, "loss": 0.8458, "step": 13555 }, { "epoch": 1.63, "grad_norm": 0.2510302662849426, "learning_rate": 0.0001477670641595959, "loss": 0.8904, "step": 13560 }, { "epoch": 1.63, "grad_norm": 0.22821520268917084, "learning_rate": 0.00014766417202554798, "loss": 0.8772, "step": 13565 }, { "epoch": 1.64, "grad_norm": 0.27513188123703003, "learning_rate": 0.0001475612809908183, "loss": 0.81, "step": 13570 }, { "epoch": 1.64, "grad_norm": 0.24558843672275543, "learning_rate": 0.00014745839110383077, "loss": 0.7985, "step": 13575 }, { "epoch": 1.64, "grad_norm": 0.26508501172065735, "learning_rate": 0.0001473555024130088, "loss": 0.8178, "step": 13580 }, { "epoch": 1.64, "grad_norm": 0.23892903327941895, "learning_rate": 0.00014725261496677513, "loss": 0.8101, "step": 13585 }, { "epoch": 1.64, "grad_norm": 0.24258942902088165, "learning_rate": 0.00014714972881355216, "loss": 0.875, "step": 13590 }, { "epoch": 1.64, "grad_norm": 0.26198121905326843, "learning_rate": 0.0001470468440017615, "loss": 0.8671, "step": 13595 }, { "epoch": 1.64, "grad_norm": 0.22335876524448395, "learning_rate": 0.0001469439605798241, "loss": 0.7895, "step": 13600 }, { "epoch": 1.64, "grad_norm": 0.21281731128692627, "learning_rate": 0.0001468410785961603, "loss": 0.8186, "step": 13605 }, { "epoch": 1.64, "grad_norm": 0.26256877183914185, "learning_rate": 0.00014673819809918985, "loss": 0.8658, "step": 13610 }, { "epoch": 1.64, "grad_norm": 0.2902090549468994, "learning_rate": 0.0001466353191373317, "loss": 0.8785, "step": 13615 }, { "epoch": 1.64, "grad_norm": 0.2701566815376282, "learning_rate": 0.0001465324417590041, "loss": 0.9397, "step": 13620 }, { "epoch": 1.64, "grad_norm": 0.2777699828147888, "learning_rate": 0.00014642956601262452, "loss": 0.8809, "step": 13625 }, { "epoch": 1.64, "grad_norm": 0.24040934443473816, "learning_rate": 0.0001463266919466098, "loss": 0.9082, "step": 13630 }, { "epoch": 1.64, "grad_norm": 0.2828356623649597, "learning_rate": 0.0001462238196093758, "loss": 0.8458, "step": 13635 }, { "epoch": 1.64, "grad_norm": 0.24517838656902313, "learning_rate": 0.00014612094904933772, "loss": 0.9284, "step": 13640 }, { "epoch": 1.64, "grad_norm": 0.23123225569725037, "learning_rate": 0.00014601808031490982, "loss": 0.7244, "step": 13645 }, { "epoch": 1.64, "grad_norm": 0.23790529370307922, "learning_rate": 0.00014591521345450558, "loss": 0.7898, "step": 13650 }, { "epoch": 1.65, "grad_norm": 0.2609568238258362, "learning_rate": 0.00014581234851653753, "loss": 0.8663, "step": 13655 }, { "epoch": 1.65, "grad_norm": 0.36788272857666016, "learning_rate": 0.00014570948554941735, "loss": 0.8454, "step": 13660 }, { "epoch": 1.65, "grad_norm": 0.23802486062049866, "learning_rate": 0.0001456066246015557, "loss": 0.9379, "step": 13665 }, { "epoch": 1.65, "grad_norm": 0.26445284485816956, "learning_rate": 0.00014550376572136246, "loss": 0.9054, "step": 13670 }, { "epoch": 1.65, "grad_norm": 0.23677241802215576, "learning_rate": 0.0001454009089572464, "loss": 0.7262, "step": 13675 }, { "epoch": 1.65, "grad_norm": 0.2473301738500595, "learning_rate": 0.0001452980543576153, "loss": 0.7921, "step": 13680 }, { "epoch": 1.65, "grad_norm": 0.25561195611953735, "learning_rate": 0.0001451952019708759, "loss": 0.8822, "step": 13685 }, { "epoch": 1.65, "grad_norm": 0.27183815836906433, "learning_rate": 0.0001450923518454341, "loss": 0.9218, "step": 13690 }, { "epoch": 1.65, "grad_norm": 0.22998422384262085, "learning_rate": 0.0001449895040296945, "loss": 0.909, "step": 13695 }, { "epoch": 1.65, "grad_norm": 0.2734430730342865, "learning_rate": 0.00014488665857206065, "loss": 0.8962, "step": 13700 }, { "epoch": 1.65, "grad_norm": 0.23932689428329468, "learning_rate": 0.0001447838155209351, "loss": 0.8714, "step": 13705 }, { "epoch": 1.65, "grad_norm": 0.25987210869789124, "learning_rate": 0.0001446809749247192, "loss": 0.7435, "step": 13710 }, { "epoch": 1.65, "grad_norm": 0.2543168365955353, "learning_rate": 0.00014457813683181316, "loss": 0.8738, "step": 13715 }, { "epoch": 1.65, "grad_norm": 0.3010767698287964, "learning_rate": 0.00014447530129061597, "loss": 0.7546, "step": 13720 }, { "epoch": 1.65, "grad_norm": 0.24551789462566376, "learning_rate": 0.00014437246834952537, "loss": 0.8564, "step": 13725 }, { "epoch": 1.65, "grad_norm": 0.22315555810928345, "learning_rate": 0.00014426963805693816, "loss": 0.8453, "step": 13730 }, { "epoch": 1.65, "grad_norm": 0.2511034905910492, "learning_rate": 0.00014416681046124953, "loss": 0.7888, "step": 13735 }, { "epoch": 1.66, "grad_norm": 0.28927719593048096, "learning_rate": 0.00014406398561085364, "loss": 0.8495, "step": 13740 }, { "epoch": 1.66, "grad_norm": 0.23084275424480438, "learning_rate": 0.00014396116355414322, "loss": 0.8212, "step": 13745 }, { "epoch": 1.66, "grad_norm": 0.29675766825675964, "learning_rate": 0.0001438583443395098, "loss": 0.97, "step": 13750 }, { "epoch": 1.66, "grad_norm": 0.27987009286880493, "learning_rate": 0.00014375552801534352, "loss": 0.8982, "step": 13755 }, { "epoch": 1.66, "grad_norm": 0.21575410664081573, "learning_rate": 0.00014365271463003307, "loss": 0.9205, "step": 13760 }, { "epoch": 1.66, "grad_norm": 0.27890732884407043, "learning_rate": 0.0001435499042319659, "loss": 0.8773, "step": 13765 }, { "epoch": 1.66, "grad_norm": 0.23023445904254913, "learning_rate": 0.00014344709686952802, "loss": 0.9438, "step": 13770 }, { "epoch": 1.66, "grad_norm": 0.2396959662437439, "learning_rate": 0.000143344292591104, "loss": 0.8498, "step": 13775 }, { "epoch": 1.66, "grad_norm": 0.2347070574760437, "learning_rate": 0.0001432414914450769, "loss": 0.8571, "step": 13780 }, { "epoch": 1.66, "grad_norm": 0.2968361973762512, "learning_rate": 0.00014313869347982831, "loss": 0.7974, "step": 13785 }, { "epoch": 1.66, "grad_norm": 0.26392439007759094, "learning_rate": 0.0001430358987437385, "loss": 0.8104, "step": 13790 }, { "epoch": 1.66, "grad_norm": 0.25702106952667236, "learning_rate": 0.000142933107285186, "loss": 0.8631, "step": 13795 }, { "epoch": 1.66, "grad_norm": 0.25434088706970215, "learning_rate": 0.0001428303191525479, "loss": 0.8464, "step": 13800 }, { "epoch": 1.66, "grad_norm": 0.23187103867530823, "learning_rate": 0.00014272753439419962, "loss": 0.8209, "step": 13805 }, { "epoch": 1.66, "grad_norm": 0.2695891261100769, "learning_rate": 0.00014262475305851523, "loss": 0.8192, "step": 13810 }, { "epoch": 1.66, "grad_norm": 0.24711039662361145, "learning_rate": 0.000142521975193867, "loss": 0.9271, "step": 13815 }, { "epoch": 1.67, "grad_norm": 0.25541651248931885, "learning_rate": 0.00014241920084862554, "loss": 0.8063, "step": 13820 }, { "epoch": 1.67, "grad_norm": 0.22565728425979614, "learning_rate": 0.00014231643007115994, "loss": 0.8552, "step": 13825 }, { "epoch": 1.67, "grad_norm": 0.24936147034168243, "learning_rate": 0.0001422136629098375, "loss": 0.872, "step": 13830 }, { "epoch": 1.67, "grad_norm": 0.29638171195983887, "learning_rate": 0.0001421108994130239, "loss": 0.9048, "step": 13835 }, { "epoch": 1.67, "grad_norm": 0.23596297204494476, "learning_rate": 0.00014200813962908293, "loss": 0.7751, "step": 13840 }, { "epoch": 1.67, "grad_norm": 0.23561915755271912, "learning_rate": 0.00014190538360637695, "loss": 0.8732, "step": 13845 }, { "epoch": 1.67, "grad_norm": 0.24926547706127167, "learning_rate": 0.00014180263139326624, "loss": 0.8895, "step": 13850 }, { "epoch": 1.67, "grad_norm": 0.30811989307403564, "learning_rate": 0.00014169988303810942, "loss": 0.8066, "step": 13855 }, { "epoch": 1.67, "grad_norm": 0.243475079536438, "learning_rate": 0.00014159713858926323, "loss": 0.8425, "step": 13860 }, { "epoch": 1.67, "grad_norm": 0.23428189754486084, "learning_rate": 0.00014149439809508273, "loss": 0.8981, "step": 13865 }, { "epoch": 1.67, "grad_norm": 0.26283276081085205, "learning_rate": 0.00014139166160392094, "loss": 0.8915, "step": 13870 }, { "epoch": 1.67, "grad_norm": 0.2469039112329483, "learning_rate": 0.00014128892916412907, "loss": 0.7645, "step": 13875 }, { "epoch": 1.67, "grad_norm": 0.2410871535539627, "learning_rate": 0.00014118620082405637, "loss": 0.811, "step": 13880 }, { "epoch": 1.67, "grad_norm": 0.23918049037456512, "learning_rate": 0.00014108347663205033, "loss": 0.7641, "step": 13885 }, { "epoch": 1.67, "grad_norm": 0.2121327817440033, "learning_rate": 0.00014098075663645628, "loss": 0.9609, "step": 13890 }, { "epoch": 1.67, "grad_norm": 0.2228885442018509, "learning_rate": 0.0001408780408856177, "loss": 0.9062, "step": 13895 }, { "epoch": 1.67, "grad_norm": 0.23560738563537598, "learning_rate": 0.0001407753294278759, "loss": 0.7888, "step": 13900 }, { "epoch": 1.68, "grad_norm": 0.25033679604530334, "learning_rate": 0.0001406726223115705, "loss": 0.8781, "step": 13905 }, { "epoch": 1.68, "grad_norm": 0.26739415526390076, "learning_rate": 0.00014056991958503882, "loss": 0.8365, "step": 13910 }, { "epoch": 1.68, "grad_norm": 0.24519668519496918, "learning_rate": 0.0001404672212966161, "loss": 0.9437, "step": 13915 }, { "epoch": 1.68, "grad_norm": 0.232150599360466, "learning_rate": 0.0001403645274946356, "loss": 0.7994, "step": 13920 }, { "epoch": 1.68, "grad_norm": 0.25735652446746826, "learning_rate": 0.00014026183822742847, "loss": 0.8427, "step": 13925 }, { "epoch": 1.68, "grad_norm": 0.2597762644290924, "learning_rate": 0.00014015915354332367, "loss": 0.8662, "step": 13930 }, { "epoch": 1.68, "grad_norm": 0.2447414994239807, "learning_rate": 0.000140056473490648, "loss": 0.8525, "step": 13935 }, { "epoch": 1.68, "grad_norm": 0.24776887893676758, "learning_rate": 0.0001399537981177261, "loss": 0.7445, "step": 13940 }, { "epoch": 1.68, "grad_norm": 0.2430184930562973, "learning_rate": 0.00013985112747288048, "loss": 0.8798, "step": 13945 }, { "epoch": 1.68, "grad_norm": 0.26686951518058777, "learning_rate": 0.00013974846160443128, "loss": 0.8167, "step": 13950 }, { "epoch": 1.68, "grad_norm": 0.2600416839122772, "learning_rate": 0.0001396458005606965, "loss": 0.7542, "step": 13955 }, { "epoch": 1.68, "grad_norm": 0.262658953666687, "learning_rate": 0.0001395431443899918, "loss": 0.8568, "step": 13960 }, { "epoch": 1.68, "grad_norm": 0.24480225145816803, "learning_rate": 0.00013944049314063063, "loss": 0.8143, "step": 13965 }, { "epoch": 1.68, "grad_norm": 0.25501397252082825, "learning_rate": 0.0001393378468609241, "loss": 0.7804, "step": 13970 }, { "epoch": 1.68, "grad_norm": 0.24708352982997894, "learning_rate": 0.00013923520559918086, "loss": 0.8356, "step": 13975 }, { "epoch": 1.68, "grad_norm": 0.2388540506362915, "learning_rate": 0.00013913256940370733, "loss": 0.876, "step": 13980 }, { "epoch": 1.69, "grad_norm": 0.2332463413476944, "learning_rate": 0.00013902993832280757, "loss": 0.7916, "step": 13985 }, { "epoch": 1.69, "grad_norm": 0.2795470058917999, "learning_rate": 0.00013892731240478317, "loss": 0.8881, "step": 13990 }, { "epoch": 1.69, "grad_norm": 0.2751898169517517, "learning_rate": 0.00013882469169793324, "loss": 0.7779, "step": 13995 }, { "epoch": 1.69, "grad_norm": 0.27980801463127136, "learning_rate": 0.00013872207625055449, "loss": 0.8326, "step": 14000 }, { "epoch": 1.69, "grad_norm": 0.2469402402639389, "learning_rate": 0.00013861946611094125, "loss": 0.85, "step": 14005 }, { "epoch": 1.69, "grad_norm": 0.23618659377098083, "learning_rate": 0.00013851686132738516, "loss": 0.7895, "step": 14010 }, { "epoch": 1.69, "grad_norm": 0.25577905774116516, "learning_rate": 0.00013841426194817548, "loss": 0.8099, "step": 14015 }, { "epoch": 1.69, "grad_norm": 0.23130281269550323, "learning_rate": 0.0001383116680215988, "loss": 0.9164, "step": 14020 }, { "epoch": 1.69, "grad_norm": 0.2609565258026123, "learning_rate": 0.00013820907959593938, "loss": 0.8081, "step": 14025 }, { "epoch": 1.69, "grad_norm": 0.24102197587490082, "learning_rate": 0.00013810649671947868, "loss": 0.7962, "step": 14030 }, { "epoch": 1.69, "grad_norm": 0.23725546896457672, "learning_rate": 0.0001380039194404956, "loss": 0.813, "step": 14035 }, { "epoch": 1.69, "grad_norm": 0.24073052406311035, "learning_rate": 0.00013790134780726634, "loss": 0.7779, "step": 14040 }, { "epoch": 1.69, "grad_norm": 0.2828931510448456, "learning_rate": 0.00013779878186806463, "loss": 0.8943, "step": 14045 }, { "epoch": 1.69, "grad_norm": 0.28520870208740234, "learning_rate": 0.00013769622167116138, "loss": 0.8855, "step": 14050 }, { "epoch": 1.69, "grad_norm": 0.2628406882286072, "learning_rate": 0.0001375936672648248, "loss": 0.796, "step": 14055 }, { "epoch": 1.69, "grad_norm": 0.23413865268230438, "learning_rate": 0.00013749111869732034, "loss": 0.9203, "step": 14060 }, { "epoch": 1.69, "grad_norm": 0.24607300758361816, "learning_rate": 0.0001373885760169109, "loss": 0.8389, "step": 14065 }, { "epoch": 1.7, "grad_norm": 0.23336441814899445, "learning_rate": 0.00013728603927185644, "loss": 0.8227, "step": 14070 }, { "epoch": 1.7, "grad_norm": 0.284675270318985, "learning_rate": 0.00013718350851041407, "loss": 0.8254, "step": 14075 }, { "epoch": 1.7, "grad_norm": 0.2564062178134918, "learning_rate": 0.00013708098378083813, "loss": 0.7736, "step": 14080 }, { "epoch": 1.7, "grad_norm": 0.2292274683713913, "learning_rate": 0.00013697846513138035, "loss": 0.8294, "step": 14085 }, { "epoch": 1.7, "grad_norm": 0.2765873074531555, "learning_rate": 0.0001368759526102893, "loss": 0.8284, "step": 14090 }, { "epoch": 1.7, "grad_norm": 0.2501067519187927, "learning_rate": 0.0001367734462658108, "loss": 0.9865, "step": 14095 }, { "epoch": 1.7, "grad_norm": 0.26736894249916077, "learning_rate": 0.00013667094614618766, "loss": 0.7624, "step": 14100 }, { "epoch": 1.7, "grad_norm": 0.24033088982105255, "learning_rate": 0.00013656845229965996, "loss": 0.8987, "step": 14105 }, { "epoch": 1.7, "grad_norm": 0.2716580927371979, "learning_rate": 0.00013646596477446467, "loss": 0.9425, "step": 14110 }, { "epoch": 1.7, "grad_norm": 0.2522694766521454, "learning_rate": 0.00013636348361883578, "loss": 0.9069, "step": 14115 }, { "epoch": 1.7, "grad_norm": 0.25670236349105835, "learning_rate": 0.00013626100888100432, "loss": 0.9238, "step": 14120 }, { "epoch": 1.7, "grad_norm": 0.26054003834724426, "learning_rate": 0.00013615854060919838, "loss": 0.8625, "step": 14125 }, { "epoch": 1.7, "grad_norm": 0.23610499501228333, "learning_rate": 0.0001360560788516429, "loss": 0.7802, "step": 14130 }, { "epoch": 1.7, "grad_norm": 0.2758851647377014, "learning_rate": 0.0001359536236565598, "loss": 0.8364, "step": 14135 }, { "epoch": 1.7, "grad_norm": 0.26497259736061096, "learning_rate": 0.0001358511750721678, "loss": 0.8287, "step": 14140 }, { "epoch": 1.7, "grad_norm": 0.24857574701309204, "learning_rate": 0.0001357487331466827, "loss": 0.862, "step": 14145 }, { "epoch": 1.7, "grad_norm": 0.28877443075180054, "learning_rate": 0.0001356462979283171, "loss": 0.8711, "step": 14150 }, { "epoch": 1.71, "grad_norm": 0.2738126218318939, "learning_rate": 0.00013554386946528033, "loss": 0.9011, "step": 14155 }, { "epoch": 1.71, "grad_norm": 0.26807695627212524, "learning_rate": 0.0001354414478057786, "loss": 0.7915, "step": 14160 }, { "epoch": 1.71, "grad_norm": 0.269781231880188, "learning_rate": 0.0001353390329980151, "loss": 0.8746, "step": 14165 }, { "epoch": 1.71, "grad_norm": 0.2851022183895111, "learning_rate": 0.00013523662509018952, "loss": 0.8851, "step": 14170 }, { "epoch": 1.71, "grad_norm": 0.26294824481010437, "learning_rate": 0.00013513422413049847, "loss": 0.7998, "step": 14175 }, { "epoch": 1.71, "grad_norm": 0.23697425425052643, "learning_rate": 0.00013503183016713518, "loss": 0.8183, "step": 14180 }, { "epoch": 1.71, "grad_norm": 0.22184935212135315, "learning_rate": 0.0001349294432482897, "loss": 0.7641, "step": 14185 }, { "epoch": 1.71, "grad_norm": 0.23600788414478302, "learning_rate": 0.00013482706342214873, "loss": 0.8828, "step": 14190 }, { "epoch": 1.71, "grad_norm": 0.26765936613082886, "learning_rate": 0.0001347246907368956, "loss": 0.8785, "step": 14195 }, { "epoch": 1.71, "grad_norm": 0.2510226368904114, "learning_rate": 0.00013462232524071022, "loss": 0.9082, "step": 14200 }, { "epoch": 1.71, "grad_norm": 0.2888829708099365, "learning_rate": 0.0001345199669817693, "loss": 0.8412, "step": 14205 }, { "epoch": 1.71, "grad_norm": 0.22700832784175873, "learning_rate": 0.00013441761600824602, "loss": 0.8552, "step": 14210 }, { "epoch": 1.71, "grad_norm": 0.2521451711654663, "learning_rate": 0.0001343152723683101, "loss": 0.7209, "step": 14215 }, { "epoch": 1.71, "grad_norm": 0.24360665678977966, "learning_rate": 0.00013421293611012784, "loss": 0.8414, "step": 14220 }, { "epoch": 1.71, "grad_norm": 0.2193845957517624, "learning_rate": 0.00013411060728186217, "loss": 0.7894, "step": 14225 }, { "epoch": 1.71, "grad_norm": 0.2685592472553253, "learning_rate": 0.00013400828593167238, "loss": 0.8794, "step": 14230 }, { "epoch": 1.72, "grad_norm": 0.23406338691711426, "learning_rate": 0.0001339059721077143, "loss": 0.8955, "step": 14235 }, { "epoch": 1.72, "grad_norm": 0.2340015023946762, "learning_rate": 0.00013380366585814016, "loss": 0.7859, "step": 14240 }, { "epoch": 1.72, "grad_norm": 0.2922716438770294, "learning_rate": 0.00013370136723109876, "loss": 0.7737, "step": 14245 }, { "epoch": 1.72, "grad_norm": 0.26283466815948486, "learning_rate": 0.0001335990762747352, "loss": 0.8702, "step": 14250 }, { "epoch": 1.72, "grad_norm": 0.2611430287361145, "learning_rate": 0.00013349679303719105, "loss": 0.7799, "step": 14255 }, { "epoch": 1.72, "grad_norm": 0.2265879213809967, "learning_rate": 0.00013339451756660408, "loss": 0.8453, "step": 14260 }, { "epoch": 1.72, "grad_norm": 0.2740434408187866, "learning_rate": 0.00013329224991110865, "loss": 0.854, "step": 14265 }, { "epoch": 1.72, "grad_norm": 0.23583091795444489, "learning_rate": 0.00013318999011883526, "loss": 0.8633, "step": 14270 }, { "epoch": 1.72, "grad_norm": 0.21470926702022552, "learning_rate": 0.00013308773823791074, "loss": 0.8227, "step": 14275 }, { "epoch": 1.72, "grad_norm": 0.2593861520290375, "learning_rate": 0.0001329854943164582, "loss": 0.7781, "step": 14280 }, { "epoch": 1.72, "grad_norm": 0.27231550216674805, "learning_rate": 0.00013288325840259715, "loss": 0.8061, "step": 14285 }, { "epoch": 1.72, "grad_norm": 0.2605496644973755, "learning_rate": 0.0001327810305444431, "loss": 0.9153, "step": 14290 }, { "epoch": 1.72, "grad_norm": 0.280644953250885, "learning_rate": 0.0001326788107901079, "loss": 0.8612, "step": 14295 }, { "epoch": 1.72, "grad_norm": 0.2497917264699936, "learning_rate": 0.0001325765991876995, "loss": 0.7707, "step": 14300 }, { "epoch": 1.72, "grad_norm": 0.26758888363838196, "learning_rate": 0.0001324743957853222, "loss": 0.8533, "step": 14305 }, { "epoch": 1.72, "grad_norm": 0.2774585485458374, "learning_rate": 0.00013237220063107625, "loss": 0.9308, "step": 14310 }, { "epoch": 1.72, "grad_norm": 0.21268567442893982, "learning_rate": 0.00013227001377305806, "loss": 0.8137, "step": 14315 }, { "epoch": 1.73, "grad_norm": 0.2505970001220703, "learning_rate": 0.0001321678352593602, "loss": 0.7499, "step": 14320 }, { "epoch": 1.73, "grad_norm": 0.2653083801269531, "learning_rate": 0.00013206566513807125, "loss": 0.8927, "step": 14325 }, { "epoch": 1.73, "grad_norm": 0.2813301086425781, "learning_rate": 0.0001319635034572759, "loss": 0.8058, "step": 14330 }, { "epoch": 1.73, "grad_norm": 0.23401866853237152, "learning_rate": 0.0001318613502650547, "loss": 0.8581, "step": 14335 }, { "epoch": 1.73, "grad_norm": 0.24355721473693848, "learning_rate": 0.0001317592056094845, "loss": 0.9558, "step": 14340 }, { "epoch": 1.73, "grad_norm": 0.23587583005428314, "learning_rate": 0.0001316570695386379, "loss": 0.8461, "step": 14345 }, { "epoch": 1.73, "grad_norm": 0.2816319763660431, "learning_rate": 0.00013155494210058353, "loss": 0.8245, "step": 14350 }, { "epoch": 1.73, "grad_norm": 0.2452281266450882, "learning_rate": 0.0001314528233433859, "loss": 0.8102, "step": 14355 }, { "epoch": 1.73, "grad_norm": 0.2361225038766861, "learning_rate": 0.0001313507133151056, "loss": 0.7548, "step": 14360 }, { "epoch": 1.73, "grad_norm": 0.2706858217716217, "learning_rate": 0.0001312486120637989, "loss": 0.8989, "step": 14365 }, { "epoch": 1.73, "grad_norm": 0.25683704018592834, "learning_rate": 0.0001311465196375181, "loss": 0.87, "step": 14370 }, { "epoch": 1.73, "grad_norm": 0.2547888457775116, "learning_rate": 0.0001310444360843112, "loss": 0.8649, "step": 14375 }, { "epoch": 1.73, "grad_norm": 0.28074026107788086, "learning_rate": 0.00013094236145222223, "loss": 0.8258, "step": 14380 }, { "epoch": 1.73, "grad_norm": 0.2704260051250458, "learning_rate": 0.00013084029578929086, "loss": 0.8283, "step": 14385 }, { "epoch": 1.73, "grad_norm": 0.22389455139636993, "learning_rate": 0.00013073823914355257, "loss": 0.7883, "step": 14390 }, { "epoch": 1.73, "grad_norm": 0.29739248752593994, "learning_rate": 0.00013063619156303854, "loss": 0.9081, "step": 14395 }, { "epoch": 1.74, "grad_norm": 0.25847524404525757, "learning_rate": 0.00013053415309577588, "loss": 0.8948, "step": 14400 }, { "epoch": 1.74, "grad_norm": 0.2561867833137512, "learning_rate": 0.0001304321237897872, "loss": 0.9301, "step": 14405 }, { "epoch": 1.74, "grad_norm": 0.23929399251937866, "learning_rate": 0.00013033010369309088, "loss": 0.853, "step": 14410 }, { "epoch": 1.74, "grad_norm": 0.2782881557941437, "learning_rate": 0.0001302280928537009, "loss": 0.8784, "step": 14415 }, { "epoch": 1.74, "grad_norm": 0.2864670753479004, "learning_rate": 0.00013012609131962712, "loss": 0.888, "step": 14420 }, { "epoch": 1.74, "grad_norm": 0.25334519147872925, "learning_rate": 0.00013002409913887475, "loss": 0.8426, "step": 14425 }, { "epoch": 1.74, "grad_norm": 0.2505394518375397, "learning_rate": 0.00012992211635944474, "loss": 0.7904, "step": 14430 }, { "epoch": 1.74, "grad_norm": 0.2567066252231598, "learning_rate": 0.00012982014302933347, "loss": 0.838, "step": 14435 }, { "epoch": 1.74, "grad_norm": 0.2711067199707031, "learning_rate": 0.00012971817919653307, "loss": 0.8792, "step": 14440 }, { "epoch": 1.74, "grad_norm": 0.25377967953681946, "learning_rate": 0.00012961622490903108, "loss": 0.7812, "step": 14445 }, { "epoch": 1.74, "grad_norm": 0.2506578862667084, "learning_rate": 0.00012951428021481056, "loss": 0.9277, "step": 14450 }, { "epoch": 1.74, "grad_norm": 0.22411175072193146, "learning_rate": 0.00012941234516185003, "loss": 0.9082, "step": 14455 }, { "epoch": 1.74, "grad_norm": 0.252632200717926, "learning_rate": 0.00012931041979812364, "loss": 0.7627, "step": 14460 }, { "epoch": 1.74, "grad_norm": 0.2694997489452362, "learning_rate": 0.00012920850417160078, "loss": 0.9178, "step": 14465 }, { "epoch": 1.74, "grad_norm": 0.2513391077518463, "learning_rate": 0.0001291065983302463, "loss": 0.8679, "step": 14470 }, { "epoch": 1.74, "grad_norm": 0.2643163800239563, "learning_rate": 0.00012900470232202045, "loss": 0.8087, "step": 14475 }, { "epoch": 1.74, "grad_norm": 0.2633654773235321, "learning_rate": 0.00012890281619487898, "loss": 0.8016, "step": 14480 }, { "epoch": 1.75, "grad_norm": 0.24543441832065582, "learning_rate": 0.00012880093999677282, "loss": 0.8115, "step": 14485 }, { "epoch": 1.75, "grad_norm": 0.2534711956977844, "learning_rate": 0.00012869907377564827, "loss": 0.7686, "step": 14490 }, { "epoch": 1.75, "grad_norm": 0.24713997542858124, "learning_rate": 0.00012859721757944696, "loss": 0.8995, "step": 14495 }, { "epoch": 1.75, "grad_norm": 0.2603427469730377, "learning_rate": 0.00012849537145610587, "loss": 0.9065, "step": 14500 }, { "epoch": 1.75, "grad_norm": 0.26300033926963806, "learning_rate": 0.00012839353545355712, "loss": 0.8299, "step": 14505 }, { "epoch": 1.75, "grad_norm": 0.2567319869995117, "learning_rate": 0.0001282917096197281, "loss": 0.8677, "step": 14510 }, { "epoch": 1.75, "grad_norm": 0.2563308775424957, "learning_rate": 0.0001281898940025414, "loss": 0.7826, "step": 14515 }, { "epoch": 1.75, "grad_norm": 0.2692480683326721, "learning_rate": 0.0001280880886499149, "loss": 0.8732, "step": 14520 }, { "epoch": 1.75, "grad_norm": 0.26520323753356934, "learning_rate": 0.0001279862936097616, "loss": 0.8239, "step": 14525 }, { "epoch": 1.75, "grad_norm": 0.2513013780117035, "learning_rate": 0.00012788450892998952, "loss": 0.7887, "step": 14530 }, { "epoch": 1.75, "grad_norm": 0.2527408003807068, "learning_rate": 0.000127782734658502, "loss": 0.9332, "step": 14535 }, { "epoch": 1.75, "grad_norm": 0.2598322629928589, "learning_rate": 0.00012768097084319736, "loss": 0.8357, "step": 14540 }, { "epoch": 1.75, "grad_norm": 0.257907509803772, "learning_rate": 0.00012757921753196906, "loss": 0.7879, "step": 14545 }, { "epoch": 1.75, "grad_norm": 0.2565610110759735, "learning_rate": 0.00012747747477270552, "loss": 0.9305, "step": 14550 }, { "epoch": 1.75, "grad_norm": 0.25154614448547363, "learning_rate": 0.00012737574261329027, "loss": 0.734, "step": 14555 }, { "epoch": 1.75, "grad_norm": 0.24832646548748016, "learning_rate": 0.00012727402110160194, "loss": 0.853, "step": 14560 }, { "epoch": 1.75, "grad_norm": 0.2888548672199249, "learning_rate": 0.00012717231028551397, "loss": 0.8503, "step": 14565 }, { "epoch": 1.76, "grad_norm": 0.2529507279396057, "learning_rate": 0.00012707061021289485, "loss": 0.8343, "step": 14570 }, { "epoch": 1.76, "grad_norm": 0.27451494336128235, "learning_rate": 0.00012696892093160803, "loss": 0.7783, "step": 14575 }, { "epoch": 1.76, "grad_norm": 0.2568528652191162, "learning_rate": 0.00012686724248951189, "loss": 0.9118, "step": 14580 }, { "epoch": 1.76, "grad_norm": 0.2533772587776184, "learning_rate": 0.00012676557493445962, "loss": 0.7751, "step": 14585 }, { "epoch": 1.76, "grad_norm": 0.272686630487442, "learning_rate": 0.0001266639183142994, "loss": 0.8448, "step": 14590 }, { "epoch": 1.76, "grad_norm": 0.25127992033958435, "learning_rate": 0.0001265622726768741, "loss": 0.8768, "step": 14595 }, { "epoch": 1.76, "grad_norm": 0.2625642418861389, "learning_rate": 0.00012646063807002168, "loss": 0.9275, "step": 14600 }, { "epoch": 1.76, "grad_norm": 0.26558226346969604, "learning_rate": 0.00012635901454157472, "loss": 0.7573, "step": 14605 }, { "epoch": 1.76, "grad_norm": 0.2596949338912964, "learning_rate": 0.00012625740213936064, "loss": 0.8072, "step": 14610 }, { "epoch": 1.76, "grad_norm": 0.25480490922927856, "learning_rate": 0.0001261558009112015, "loss": 0.9022, "step": 14615 }, { "epoch": 1.76, "grad_norm": 0.2669465243816376, "learning_rate": 0.00012605421090491434, "loss": 0.978, "step": 14620 }, { "epoch": 1.76, "grad_norm": 0.22800852358341217, "learning_rate": 0.00012595263216831076, "loss": 0.6918, "step": 14625 }, { "epoch": 1.76, "grad_norm": 0.2479008287191391, "learning_rate": 0.00012585106474919704, "loss": 0.8508, "step": 14630 }, { "epoch": 1.76, "grad_norm": 0.24926097691059113, "learning_rate": 0.00012574950869537419, "loss": 0.7538, "step": 14635 }, { "epoch": 1.76, "grad_norm": 0.23529042303562164, "learning_rate": 0.0001256479640546379, "loss": 0.8676, "step": 14640 }, { "epoch": 1.76, "grad_norm": 0.24101610481739044, "learning_rate": 0.00012554643087477844, "loss": 0.9266, "step": 14645 }, { "epoch": 1.77, "grad_norm": 0.28825363516807556, "learning_rate": 0.00012544490920358072, "loss": 0.9265, "step": 14650 }, { "epoch": 1.77, "grad_norm": 0.2545327842235565, "learning_rate": 0.00012534339908882412, "loss": 0.8619, "step": 14655 }, { "epoch": 1.77, "grad_norm": 0.3079511523246765, "learning_rate": 0.00012524190057828277, "loss": 0.853, "step": 14660 }, { "epoch": 1.77, "grad_norm": 0.2799226939678192, "learning_rate": 0.00012514041371972518, "loss": 0.9301, "step": 14665 }, { "epoch": 1.77, "grad_norm": 0.25301575660705566, "learning_rate": 0.00012503893856091448, "loss": 0.8581, "step": 14670 }, { "epoch": 1.77, "grad_norm": 0.26482102274894714, "learning_rate": 0.00012493747514960815, "loss": 0.9563, "step": 14675 }, { "epoch": 1.77, "grad_norm": 0.23120246827602386, "learning_rate": 0.0001248360235335584, "loss": 0.8965, "step": 14680 }, { "epoch": 1.77, "grad_norm": 0.2709689736366272, "learning_rate": 0.00012473458376051163, "loss": 0.7622, "step": 14685 }, { "epoch": 1.77, "grad_norm": 0.254367470741272, "learning_rate": 0.00012463315587820878, "loss": 0.9004, "step": 14690 }, { "epoch": 1.77, "grad_norm": 0.25970759987831116, "learning_rate": 0.0001245317399343851, "loss": 0.7974, "step": 14695 }, { "epoch": 1.77, "grad_norm": 0.23521548509597778, "learning_rate": 0.00012443033597677047, "loss": 0.7991, "step": 14700 }, { "epoch": 1.77, "grad_norm": 0.2456759810447693, "learning_rate": 0.00012432894405308887, "loss": 0.9068, "step": 14705 }, { "epoch": 1.77, "grad_norm": 0.3220599889755249, "learning_rate": 0.00012422756421105868, "loss": 0.8967, "step": 14710 }, { "epoch": 1.77, "grad_norm": 0.25704309344291687, "learning_rate": 0.00012412619649839263, "loss": 0.9185, "step": 14715 }, { "epoch": 1.77, "grad_norm": 0.2799650728702545, "learning_rate": 0.0001240248409627978, "loss": 0.8382, "step": 14720 }, { "epoch": 1.77, "grad_norm": 0.22531536221504211, "learning_rate": 0.00012392349765197541, "loss": 0.7933, "step": 14725 }, { "epoch": 1.77, "grad_norm": 0.2780819535255432, "learning_rate": 0.00012382216661362098, "loss": 0.8583, "step": 14730 }, { "epoch": 1.78, "grad_norm": 0.2472156137228012, "learning_rate": 0.00012372084789542424, "loss": 0.7335, "step": 14735 }, { "epoch": 1.78, "grad_norm": 0.22671552002429962, "learning_rate": 0.00012361954154506926, "loss": 0.8889, "step": 14740 }, { "epoch": 1.78, "grad_norm": 0.23616662621498108, "learning_rate": 0.00012351824761023405, "loss": 0.9454, "step": 14745 }, { "epoch": 1.78, "grad_norm": 0.24710243940353394, "learning_rate": 0.00012341696613859098, "loss": 0.7842, "step": 14750 }, { "epoch": 1.78, "grad_norm": 0.27012568712234497, "learning_rate": 0.0001233156971778064, "loss": 0.7432, "step": 14755 }, { "epoch": 1.78, "grad_norm": 0.30002421140670776, "learning_rate": 0.00012321444077554095, "loss": 0.9517, "step": 14760 }, { "epoch": 1.78, "grad_norm": 0.2851850688457489, "learning_rate": 0.00012311319697944913, "loss": 0.8708, "step": 14765 }, { "epoch": 1.78, "grad_norm": 0.2681834101676941, "learning_rate": 0.00012301196583717973, "loss": 0.9541, "step": 14770 }, { "epoch": 1.78, "grad_norm": 0.2770148515701294, "learning_rate": 0.0001229107473963754, "loss": 0.7985, "step": 14775 }, { "epoch": 1.78, "grad_norm": 0.23922573029994965, "learning_rate": 0.000122809541704673, "loss": 0.8304, "step": 14780 }, { "epoch": 1.78, "grad_norm": 0.38129690289497375, "learning_rate": 0.00012270834880970323, "loss": 0.8315, "step": 14785 }, { "epoch": 1.78, "grad_norm": 0.2657111585140228, "learning_rate": 0.00012260716875909085, "loss": 0.792, "step": 14790 }, { "epoch": 1.78, "grad_norm": 0.2710582911968231, "learning_rate": 0.0001225060016004545, "loss": 0.7308, "step": 14795 }, { "epoch": 1.78, "grad_norm": 0.2484636902809143, "learning_rate": 0.0001224048473814069, "loss": 0.852, "step": 14800 }, { "epoch": 1.78, "grad_norm": 0.24950478971004486, "learning_rate": 0.0001223037061495545, "loss": 0.8909, "step": 14805 }, { "epoch": 1.78, "grad_norm": 0.2482597976922989, "learning_rate": 0.00012220257795249778, "loss": 0.8531, "step": 14810 }, { "epoch": 1.79, "grad_norm": 0.2781403362751007, "learning_rate": 0.00012210146283783092, "loss": 0.855, "step": 14815 }, { "epoch": 1.79, "grad_norm": 0.26472207903862, "learning_rate": 0.00012200036085314218, "loss": 0.8358, "step": 14820 }, { "epoch": 1.79, "grad_norm": 0.25163668394088745, "learning_rate": 0.00012189927204601348, "loss": 0.8767, "step": 14825 }, { "epoch": 1.79, "grad_norm": 0.23350991308689117, "learning_rate": 0.00012179819646402052, "loss": 0.8239, "step": 14830 }, { "epoch": 1.79, "grad_norm": 0.275762677192688, "learning_rate": 0.00012169713415473288, "loss": 0.7539, "step": 14835 }, { "epoch": 1.79, "grad_norm": 0.2697180509567261, "learning_rate": 0.00012159608516571383, "loss": 0.8765, "step": 14840 }, { "epoch": 1.79, "grad_norm": 0.25511398911476135, "learning_rate": 0.00012149504954452036, "loss": 0.9266, "step": 14845 }, { "epoch": 1.79, "grad_norm": 0.26254042983055115, "learning_rate": 0.0001213940273387031, "loss": 0.8991, "step": 14850 }, { "epoch": 1.79, "grad_norm": 0.27790650725364685, "learning_rate": 0.00012129301859580665, "loss": 0.8492, "step": 14855 }, { "epoch": 1.79, "grad_norm": 0.2523384690284729, "learning_rate": 0.00012119202336336897, "loss": 0.8993, "step": 14860 }, { "epoch": 1.79, "grad_norm": 0.23431278765201569, "learning_rate": 0.00012109104168892177, "loss": 0.9201, "step": 14865 }, { "epoch": 1.79, "grad_norm": 0.2700154185295105, "learning_rate": 0.00012099007361999037, "loss": 0.8558, "step": 14870 }, { "epoch": 1.79, "grad_norm": 0.2271653562784195, "learning_rate": 0.00012088911920409374, "loss": 0.8236, "step": 14875 }, { "epoch": 1.79, "grad_norm": 0.24218647181987762, "learning_rate": 0.00012078817848874434, "loss": 0.7495, "step": 14880 }, { "epoch": 1.79, "grad_norm": 0.25467637181282043, "learning_rate": 0.00012068725152144827, "loss": 0.8275, "step": 14885 }, { "epoch": 1.79, "grad_norm": 0.277286559343338, "learning_rate": 0.00012058633834970502, "loss": 0.8756, "step": 14890 }, { "epoch": 1.79, "grad_norm": 0.26092687249183655, "learning_rate": 0.00012048543902100779, "loss": 0.8228, "step": 14895 }, { "epoch": 1.8, "grad_norm": 0.24140042066574097, "learning_rate": 0.00012038455358284309, "loss": 0.8216, "step": 14900 }, { "epoch": 1.8, "grad_norm": 0.2441730499267578, "learning_rate": 0.00012028368208269097, "loss": 0.873, "step": 14905 }, { "epoch": 1.8, "grad_norm": 0.242695614695549, "learning_rate": 0.00012018282456802487, "loss": 0.734, "step": 14910 }, { "epoch": 1.8, "grad_norm": 0.27291175723075867, "learning_rate": 0.00012008198108631176, "loss": 1.0009, "step": 14915 }, { "epoch": 1.8, "grad_norm": 0.25119420886039734, "learning_rate": 0.00011998115168501192, "loss": 1.0065, "step": 14920 }, { "epoch": 1.8, "grad_norm": 0.2412206530570984, "learning_rate": 0.00011988033641157898, "loss": 0.8785, "step": 14925 }, { "epoch": 1.8, "grad_norm": 0.24067923426628113, "learning_rate": 0.00011977953531345996, "loss": 0.9148, "step": 14930 }, { "epoch": 1.8, "grad_norm": 0.2587399184703827, "learning_rate": 0.00011967874843809522, "loss": 0.8226, "step": 14935 }, { "epoch": 1.8, "grad_norm": 0.26349955797195435, "learning_rate": 0.00011957797583291841, "loss": 0.7778, "step": 14940 }, { "epoch": 1.8, "grad_norm": 0.2319829910993576, "learning_rate": 0.00011947721754535645, "loss": 0.7828, "step": 14945 }, { "epoch": 1.8, "grad_norm": 0.27520623803138733, "learning_rate": 0.00011937647362282948, "loss": 0.8186, "step": 14950 }, { "epoch": 1.8, "grad_norm": 0.23423904180526733, "learning_rate": 0.00011927574411275107, "loss": 0.773, "step": 14955 }, { "epoch": 1.8, "grad_norm": 0.2540885806083679, "learning_rate": 0.0001191750290625278, "loss": 0.7941, "step": 14960 }, { "epoch": 1.8, "grad_norm": 0.26657721400260925, "learning_rate": 0.00011907432851955952, "loss": 0.848, "step": 14965 }, { "epoch": 1.8, "grad_norm": 0.22718775272369385, "learning_rate": 0.00011897364253123921, "loss": 0.7955, "step": 14970 }, { "epoch": 1.8, "grad_norm": 0.252336323261261, "learning_rate": 0.00011887297114495312, "loss": 0.894, "step": 14975 }, { "epoch": 1.8, "grad_norm": 0.2825542390346527, "learning_rate": 0.0001187723144080805, "loss": 0.9111, "step": 14980 }, { "epoch": 1.81, "grad_norm": 0.2752092182636261, "learning_rate": 0.00011867167236799376, "loss": 0.839, "step": 14985 }, { "epoch": 1.81, "grad_norm": 0.2676568925380707, "learning_rate": 0.00011857104507205831, "loss": 0.9122, "step": 14990 }, { "epoch": 1.81, "grad_norm": 0.24729391932487488, "learning_rate": 0.00011847043256763285, "loss": 0.7432, "step": 14995 }, { "epoch": 1.81, "grad_norm": 0.24606819450855255, "learning_rate": 0.00011836983490206889, "loss": 0.7616, "step": 15000 }, { "epoch": 1.81, "grad_norm": 0.27244672179222107, "learning_rate": 0.00011826925212271102, "loss": 0.8588, "step": 15005 }, { "epoch": 1.81, "grad_norm": 0.26637476682662964, "learning_rate": 0.00011816868427689683, "loss": 0.9134, "step": 15010 }, { "epoch": 1.81, "grad_norm": 0.27990347146987915, "learning_rate": 0.00011806813141195691, "loss": 0.8513, "step": 15015 }, { "epoch": 1.81, "grad_norm": 0.33465734124183655, "learning_rate": 0.0001179675935752148, "loss": 0.8542, "step": 15020 }, { "epoch": 1.81, "grad_norm": 0.22314803302288055, "learning_rate": 0.0001178670708139869, "loss": 0.8547, "step": 15025 }, { "epoch": 1.81, "grad_norm": 0.2743472754955292, "learning_rate": 0.00011776656317558251, "loss": 0.8579, "step": 15030 }, { "epoch": 1.81, "grad_norm": 0.23580728471279144, "learning_rate": 0.000117666070707304, "loss": 0.9977, "step": 15035 }, { "epoch": 1.81, "grad_norm": 0.2850353419780731, "learning_rate": 0.0001175655934564464, "loss": 0.7897, "step": 15040 }, { "epoch": 1.81, "grad_norm": 0.2732098698616028, "learning_rate": 0.00011746513147029762, "loss": 0.6902, "step": 15045 }, { "epoch": 1.81, "grad_norm": 0.26247236132621765, "learning_rate": 0.00011736468479613841, "loss": 0.74, "step": 15050 }, { "epoch": 1.81, "grad_norm": 0.23917360603809357, "learning_rate": 0.00011726425348124232, "loss": 0.8132, "step": 15055 }, { "epoch": 1.81, "grad_norm": 0.2416449338197708, "learning_rate": 0.00011716383757287568, "loss": 0.94, "step": 15060 }, { "epoch": 1.82, "grad_norm": 0.26987558603286743, "learning_rate": 0.00011706343711829753, "loss": 0.8469, "step": 15065 }, { "epoch": 1.82, "grad_norm": 0.2562698721885681, "learning_rate": 0.0001169630521647596, "loss": 0.7888, "step": 15070 }, { "epoch": 1.82, "grad_norm": 0.2376549392938614, "learning_rate": 0.0001168626827595065, "loss": 0.8179, "step": 15075 }, { "epoch": 1.82, "grad_norm": 0.23913566768169403, "learning_rate": 0.0001167623289497754, "loss": 0.9149, "step": 15080 }, { "epoch": 1.82, "grad_norm": 0.2533036172389984, "learning_rate": 0.00011666199078279604, "loss": 0.8483, "step": 15085 }, { "epoch": 1.82, "grad_norm": 0.23935401439666748, "learning_rate": 0.00011656166830579087, "loss": 0.8704, "step": 15090 }, { "epoch": 1.82, "grad_norm": 0.243647962808609, "learning_rate": 0.00011646136156597513, "loss": 0.8779, "step": 15095 }, { "epoch": 1.82, "grad_norm": 0.27903953194618225, "learning_rate": 0.0001163610706105564, "loss": 0.8851, "step": 15100 }, { "epoch": 1.82, "grad_norm": 0.25441214442253113, "learning_rate": 0.00011626079548673496, "loss": 0.8484, "step": 15105 }, { "epoch": 1.82, "grad_norm": 0.23863151669502258, "learning_rate": 0.00011616053624170359, "loss": 0.8271, "step": 15110 }, { "epoch": 1.82, "grad_norm": 0.271961510181427, "learning_rate": 0.00011606029292264766, "loss": 0.855, "step": 15115 }, { "epoch": 1.82, "grad_norm": 0.25115951895713806, "learning_rate": 0.00011596006557674497, "loss": 0.8599, "step": 15120 }, { "epoch": 1.82, "grad_norm": 0.21527209877967834, "learning_rate": 0.00011585985425116589, "loss": 0.8878, "step": 15125 }, { "epoch": 1.82, "grad_norm": 0.2371513992547989, "learning_rate": 0.00011575965899307306, "loss": 0.8347, "step": 15130 }, { "epoch": 1.82, "grad_norm": 0.26192623376846313, "learning_rate": 0.00011565947984962187, "loss": 0.814, "step": 15135 }, { "epoch": 1.82, "grad_norm": 0.2839601933956146, "learning_rate": 0.00011555931686795987, "loss": 0.8186, "step": 15140 }, { "epoch": 1.82, "grad_norm": 0.2582460045814514, "learning_rate": 0.0001154591700952271, "loss": 0.7673, "step": 15145 }, { "epoch": 1.83, "grad_norm": 0.2260724902153015, "learning_rate": 0.0001153590395785559, "loss": 0.8031, "step": 15150 }, { "epoch": 1.83, "grad_norm": 0.2650599777698517, "learning_rate": 0.00011525892536507111, "loss": 0.8469, "step": 15155 }, { "epoch": 1.83, "grad_norm": 0.280758798122406, "learning_rate": 0.00011515882750188976, "loss": 0.8937, "step": 15160 }, { "epoch": 1.83, "grad_norm": 0.23013721406459808, "learning_rate": 0.00011505874603612122, "loss": 0.8758, "step": 15165 }, { "epoch": 1.83, "grad_norm": 0.23622775077819824, "learning_rate": 0.0001149586810148671, "loss": 0.9219, "step": 15170 }, { "epoch": 1.83, "grad_norm": 0.23313015699386597, "learning_rate": 0.00011485863248522144, "loss": 0.8072, "step": 15175 }, { "epoch": 1.83, "grad_norm": 0.27392974495887756, "learning_rate": 0.00011475860049427036, "loss": 0.7727, "step": 15180 }, { "epoch": 1.83, "grad_norm": 0.24279291927814484, "learning_rate": 0.00011465858508909219, "loss": 0.8076, "step": 15185 }, { "epoch": 1.83, "grad_norm": 0.22247040271759033, "learning_rate": 0.00011455858631675752, "loss": 0.8597, "step": 15190 }, { "epoch": 1.83, "grad_norm": 0.23884879052639008, "learning_rate": 0.00011445860422432913, "loss": 0.9097, "step": 15195 }, { "epoch": 1.83, "grad_norm": 0.22139222919940948, "learning_rate": 0.00011435863885886188, "loss": 0.8262, "step": 15200 }, { "epoch": 1.83, "grad_norm": 0.20830343663692474, "learning_rate": 0.00011425869026740278, "loss": 0.8878, "step": 15205 }, { "epoch": 1.83, "grad_norm": 0.23654043674468994, "learning_rate": 0.00011415875849699094, "loss": 0.8093, "step": 15210 }, { "epoch": 1.83, "grad_norm": 0.2731797993183136, "learning_rate": 0.00011405884359465766, "loss": 0.8693, "step": 15215 }, { "epoch": 1.83, "grad_norm": 0.2721179723739624, "learning_rate": 0.00011395894560742612, "loss": 0.7557, "step": 15220 }, { "epoch": 1.83, "grad_norm": 0.2314218133687973, "learning_rate": 0.0001138590645823117, "loss": 0.911, "step": 15225 }, { "epoch": 1.84, "grad_norm": 0.2549746036529541, "learning_rate": 0.00011375920056632164, "loss": 0.8295, "step": 15230 }, { "epoch": 1.84, "grad_norm": 0.2613559365272522, "learning_rate": 0.00011365935360645536, "loss": 0.8176, "step": 15235 }, { "epoch": 1.84, "grad_norm": 0.24376095831394196, "learning_rate": 0.0001135595237497041, "loss": 0.8712, "step": 15240 }, { "epoch": 1.84, "grad_norm": 0.2682994604110718, "learning_rate": 0.00011345971104305111, "loss": 0.8861, "step": 15245 }, { "epoch": 1.84, "grad_norm": 0.2671447992324829, "learning_rate": 0.0001133599155334715, "loss": 0.9091, "step": 15250 }, { "epoch": 1.84, "grad_norm": 0.2641143500804901, "learning_rate": 0.00011326013726793249, "loss": 0.8653, "step": 15255 }, { "epoch": 1.84, "grad_norm": 0.2844027280807495, "learning_rate": 0.00011316037629339299, "loss": 0.8379, "step": 15260 }, { "epoch": 1.84, "grad_norm": 0.25467634201049805, "learning_rate": 0.00011306063265680384, "loss": 0.7981, "step": 15265 }, { "epoch": 1.84, "grad_norm": 0.27292025089263916, "learning_rate": 0.00011296090640510758, "loss": 0.9155, "step": 15270 }, { "epoch": 1.84, "grad_norm": 0.27698391675949097, "learning_rate": 0.00011286119758523885, "loss": 0.8058, "step": 15275 }, { "epoch": 1.84, "grad_norm": 0.2688562870025635, "learning_rate": 0.00011276150624412388, "loss": 0.8207, "step": 15280 }, { "epoch": 1.84, "grad_norm": 0.2935139238834381, "learning_rate": 0.00011266183242868073, "loss": 0.8824, "step": 15285 }, { "epoch": 1.84, "grad_norm": 0.26238155364990234, "learning_rate": 0.00011256217618581916, "loss": 0.8231, "step": 15290 }, { "epoch": 1.84, "grad_norm": 0.23228612542152405, "learning_rate": 0.00011246253756244079, "loss": 0.8126, "step": 15295 }, { "epoch": 1.84, "grad_norm": 0.2749589681625366, "learning_rate": 0.00011236291660543881, "loss": 0.8331, "step": 15300 }, { "epoch": 1.84, "grad_norm": 0.26084092259407043, "learning_rate": 0.0001122633133616982, "loss": 0.9848, "step": 15305 }, { "epoch": 1.84, "grad_norm": 0.2661038041114807, "learning_rate": 0.0001121637278780954, "loss": 0.873, "step": 15310 }, { "epoch": 1.85, "grad_norm": 0.27569863200187683, "learning_rate": 0.00011206416020149887, "loss": 0.8332, "step": 15315 }, { "epoch": 1.85, "grad_norm": 0.23671133816242218, "learning_rate": 0.00011196461037876834, "loss": 0.8793, "step": 15320 }, { "epoch": 1.85, "grad_norm": 0.26371756196022034, "learning_rate": 0.00011186507845675527, "loss": 0.8082, "step": 15325 }, { "epoch": 1.85, "grad_norm": 0.2549302875995636, "learning_rate": 0.00011176556448230271, "loss": 0.7892, "step": 15330 }, { "epoch": 1.85, "grad_norm": 0.2468237727880478, "learning_rate": 0.00011166606850224524, "loss": 0.7797, "step": 15335 }, { "epoch": 1.85, "grad_norm": 0.2780306041240692, "learning_rate": 0.00011156659056340898, "loss": 0.7513, "step": 15340 }, { "epoch": 1.85, "grad_norm": 0.264596551656723, "learning_rate": 0.00011146713071261145, "loss": 0.8805, "step": 15345 }, { "epoch": 1.85, "grad_norm": 0.2658274173736572, "learning_rate": 0.00011136768899666191, "loss": 0.8718, "step": 15350 }, { "epoch": 1.85, "grad_norm": 0.24554196000099182, "learning_rate": 0.00011126826546236087, "loss": 0.943, "step": 15355 }, { "epoch": 1.85, "grad_norm": 0.24591578543186188, "learning_rate": 0.00011116886015650035, "loss": 0.7976, "step": 15360 }, { "epoch": 1.85, "grad_norm": 0.2368362545967102, "learning_rate": 0.00011106947312586373, "loss": 0.867, "step": 15365 }, { "epoch": 1.85, "grad_norm": 0.2802934944629669, "learning_rate": 0.00011097010441722595, "loss": 0.8927, "step": 15370 }, { "epoch": 1.85, "grad_norm": 0.25117719173431396, "learning_rate": 0.00011087075407735316, "loss": 0.8307, "step": 15375 }, { "epoch": 1.85, "grad_norm": 0.23962007462978363, "learning_rate": 0.00011077142215300297, "loss": 0.8692, "step": 15380 }, { "epoch": 1.85, "grad_norm": 0.22337029874324799, "learning_rate": 0.00011067210869092417, "loss": 0.7974, "step": 15385 }, { "epoch": 1.85, "grad_norm": 0.25178852677345276, "learning_rate": 0.00011057281373785712, "loss": 0.9266, "step": 15390 }, { "epoch": 1.85, "grad_norm": 0.25446346402168274, "learning_rate": 0.00011047353734053327, "loss": 0.8743, "step": 15395 }, { "epoch": 1.86, "grad_norm": 0.26338401436805725, "learning_rate": 0.0001103742795456754, "loss": 0.8066, "step": 15400 }, { "epoch": 1.86, "grad_norm": 0.2573286294937134, "learning_rate": 0.00011027504039999744, "loss": 0.8641, "step": 15405 }, { "epoch": 1.86, "grad_norm": 0.2608354091644287, "learning_rate": 0.00011017581995020475, "loss": 0.8588, "step": 15410 }, { "epoch": 1.86, "grad_norm": 0.23720592260360718, "learning_rate": 0.0001100766182429937, "loss": 0.8744, "step": 15415 }, { "epoch": 1.86, "grad_norm": 0.26355910301208496, "learning_rate": 0.00010997743532505192, "loss": 0.8457, "step": 15420 }, { "epoch": 1.86, "grad_norm": 0.3250594139099121, "learning_rate": 0.00010987827124305812, "loss": 0.7379, "step": 15425 }, { "epoch": 1.86, "grad_norm": 0.2539902329444885, "learning_rate": 0.00010977912604368232, "loss": 0.7225, "step": 15430 }, { "epoch": 1.86, "grad_norm": 0.23028779029846191, "learning_rate": 0.00010967999977358551, "loss": 0.8119, "step": 15435 }, { "epoch": 1.86, "grad_norm": 0.23592324554920197, "learning_rate": 0.00010958089247941981, "loss": 0.8199, "step": 15440 }, { "epoch": 1.86, "grad_norm": 0.2703402638435364, "learning_rate": 0.00010948180420782835, "loss": 0.8367, "step": 15445 }, { "epoch": 1.86, "grad_norm": 0.2614709734916687, "learning_rate": 0.00010938273500544543, "loss": 0.7616, "step": 15450 }, { "epoch": 1.86, "grad_norm": 0.2527773976325989, "learning_rate": 0.00010928368491889626, "loss": 0.9269, "step": 15455 }, { "epoch": 1.86, "grad_norm": 0.2695715129375458, "learning_rate": 0.00010918465399479712, "loss": 0.8234, "step": 15460 }, { "epoch": 1.86, "grad_norm": 0.2826976478099823, "learning_rate": 0.00010908564227975518, "loss": 0.8455, "step": 15465 }, { "epoch": 1.86, "grad_norm": 0.2363000065088272, "learning_rate": 0.00010898664982036877, "loss": 0.8312, "step": 15470 }, { "epoch": 1.86, "grad_norm": 0.29471728205680847, "learning_rate": 0.000108887676663227, "loss": 0.8394, "step": 15475 }, { "epoch": 1.87, "grad_norm": 0.24590782821178436, "learning_rate": 0.00010878872285490984, "loss": 0.8675, "step": 15480 }, { "epoch": 1.87, "grad_norm": 0.26483914256095886, "learning_rate": 0.00010868978844198827, "loss": 0.7959, "step": 15485 }, { "epoch": 1.87, "grad_norm": 0.25184696912765503, "learning_rate": 0.00010859087347102416, "loss": 0.8902, "step": 15490 }, { "epoch": 1.87, "grad_norm": 0.23830364644527435, "learning_rate": 0.00010849197798857015, "loss": 0.9064, "step": 15495 }, { "epoch": 1.87, "grad_norm": 0.24322542548179626, "learning_rate": 0.00010839310204116975, "loss": 0.854, "step": 15500 }, { "epoch": 1.87, "grad_norm": 0.24347706139087677, "learning_rate": 0.0001082942456753572, "loss": 0.8495, "step": 15505 }, { "epoch": 1.87, "grad_norm": 0.24465703964233398, "learning_rate": 0.0001081954089376577, "loss": 0.8317, "step": 15510 }, { "epoch": 1.87, "grad_norm": 0.24925029277801514, "learning_rate": 0.00010809659187458702, "loss": 0.8258, "step": 15515 }, { "epoch": 1.87, "grad_norm": 0.27092206478118896, "learning_rate": 0.00010799779453265178, "loss": 0.8398, "step": 15520 }, { "epoch": 1.87, "grad_norm": 0.2629016041755676, "learning_rate": 0.00010789901695834921, "loss": 0.7819, "step": 15525 }, { "epoch": 1.87, "grad_norm": 0.2438107430934906, "learning_rate": 0.00010780025919816748, "loss": 0.8834, "step": 15530 }, { "epoch": 1.87, "grad_norm": 0.2341863512992859, "learning_rate": 0.00010770152129858515, "loss": 0.8463, "step": 15535 }, { "epoch": 1.87, "grad_norm": 0.2474951595067978, "learning_rate": 0.00010760280330607161, "loss": 0.8869, "step": 15540 }, { "epoch": 1.87, "grad_norm": 0.2520921528339386, "learning_rate": 0.00010750410526708675, "loss": 0.8906, "step": 15545 }, { "epoch": 1.87, "grad_norm": 0.2502596974372864, "learning_rate": 0.00010740542722808123, "loss": 0.774, "step": 15550 }, { "epoch": 1.87, "grad_norm": 0.2887754440307617, "learning_rate": 0.0001073067692354962, "loss": 0.7609, "step": 15555 }, { "epoch": 1.87, "grad_norm": 0.26220768690109253, "learning_rate": 0.00010720813133576336, "loss": 0.855, "step": 15560 }, { "epoch": 1.88, "grad_norm": 0.264213889837265, "learning_rate": 0.00010710951357530489, "loss": 0.7649, "step": 15565 }, { "epoch": 1.88, "grad_norm": 0.26668137311935425, "learning_rate": 0.00010701091600053379, "loss": 0.8864, "step": 15570 }, { "epoch": 1.88, "grad_norm": 0.2364049255847931, "learning_rate": 0.00010691233865785321, "loss": 0.8681, "step": 15575 }, { "epoch": 1.88, "grad_norm": 0.2663012444972992, "learning_rate": 0.00010681378159365696, "loss": 0.9484, "step": 15580 }, { "epoch": 1.88, "grad_norm": 0.25167354941368103, "learning_rate": 0.00010671524485432926, "loss": 0.8411, "step": 15585 }, { "epoch": 1.88, "grad_norm": 0.2526310384273529, "learning_rate": 0.00010661672848624477, "loss": 0.8295, "step": 15590 }, { "epoch": 1.88, "grad_norm": 0.25531667470932007, "learning_rate": 0.0001065182325357686, "loss": 0.8014, "step": 15595 }, { "epoch": 1.88, "grad_norm": 0.24380475282669067, "learning_rate": 0.00010641975704925615, "loss": 0.9029, "step": 15600 }, { "epoch": 1.88, "grad_norm": 0.25942888855934143, "learning_rate": 0.00010632130207305324, "loss": 0.8727, "step": 15605 }, { "epoch": 1.88, "grad_norm": 0.26547759771347046, "learning_rate": 0.00010622286765349618, "loss": 0.7635, "step": 15610 }, { "epoch": 1.88, "grad_norm": 0.27288028597831726, "learning_rate": 0.00010612445383691137, "loss": 0.7731, "step": 15615 }, { "epoch": 1.88, "grad_norm": 0.2501557171344757, "learning_rate": 0.00010602606066961564, "loss": 0.8859, "step": 15620 }, { "epoch": 1.88, "grad_norm": 0.25001439452171326, "learning_rate": 0.00010592768819791608, "loss": 0.8516, "step": 15625 }, { "epoch": 1.88, "grad_norm": 0.2799443006515503, "learning_rate": 0.00010582933646811008, "loss": 0.8835, "step": 15630 }, { "epoch": 1.88, "grad_norm": 0.2613951861858368, "learning_rate": 0.00010573100552648517, "loss": 0.9239, "step": 15635 }, { "epoch": 1.88, "grad_norm": 0.27016937732696533, "learning_rate": 0.00010563269541931922, "loss": 0.8181, "step": 15640 }, { "epoch": 1.89, "grad_norm": 0.22996383905410767, "learning_rate": 0.00010553440619288014, "loss": 0.8646, "step": 15645 }, { "epoch": 1.89, "grad_norm": 0.29044246673583984, "learning_rate": 0.00010543613789342621, "loss": 0.8249, "step": 15650 }, { "epoch": 1.89, "grad_norm": 0.2618498206138611, "learning_rate": 0.00010533789056720571, "loss": 0.7699, "step": 15655 }, { "epoch": 1.89, "grad_norm": 0.25093433260917664, "learning_rate": 0.00010523966426045709, "loss": 0.7474, "step": 15660 }, { "epoch": 1.89, "grad_norm": 0.2828831076622009, "learning_rate": 0.00010514145901940887, "loss": 0.7825, "step": 15665 }, { "epoch": 1.89, "grad_norm": 0.2531161308288574, "learning_rate": 0.0001050432748902798, "loss": 0.8004, "step": 15670 }, { "epoch": 1.89, "grad_norm": 0.23352687060832977, "learning_rate": 0.0001049451119192785, "loss": 0.7407, "step": 15675 }, { "epoch": 1.89, "grad_norm": 0.22876708209514618, "learning_rate": 0.00010484697015260379, "loss": 0.7868, "step": 15680 }, { "epoch": 1.89, "grad_norm": 0.27175286412239075, "learning_rate": 0.00010474884963644434, "loss": 0.7846, "step": 15685 }, { "epoch": 1.89, "grad_norm": 0.2837650775909424, "learning_rate": 0.00010465075041697908, "loss": 0.7969, "step": 15690 }, { "epoch": 1.89, "grad_norm": 0.2851541340351105, "learning_rate": 0.00010455267254037663, "loss": 0.7862, "step": 15695 }, { "epoch": 1.89, "grad_norm": 0.2439732849597931, "learning_rate": 0.00010445461605279579, "loss": 0.8859, "step": 15700 }, { "epoch": 1.89, "grad_norm": 0.24269287288188934, "learning_rate": 0.00010435658100038505, "loss": 0.7997, "step": 15705 }, { "epoch": 1.89, "grad_norm": 0.24573330581188202, "learning_rate": 0.00010425856742928313, "loss": 1.0228, "step": 15710 }, { "epoch": 1.89, "grad_norm": 0.25582510232925415, "learning_rate": 0.00010416057538561842, "loss": 0.8363, "step": 15715 }, { "epoch": 1.89, "grad_norm": 0.21897505223751068, "learning_rate": 0.00010406260491550918, "loss": 0.8839, "step": 15720 }, { "epoch": 1.89, "grad_norm": 0.23649723827838898, "learning_rate": 0.0001039646560650636, "loss": 0.8441, "step": 15725 }, { "epoch": 1.9, "grad_norm": 0.2502078413963318, "learning_rate": 0.00010386672888037969, "loss": 0.8262, "step": 15730 }, { "epoch": 1.9, "grad_norm": 0.2571605145931244, "learning_rate": 0.00010376882340754519, "loss": 0.8712, "step": 15735 }, { "epoch": 1.9, "grad_norm": 0.26057517528533936, "learning_rate": 0.0001036709396926377, "loss": 0.8395, "step": 15740 }, { "epoch": 1.9, "grad_norm": 0.2550731301307678, "learning_rate": 0.00010357307778172445, "loss": 0.8821, "step": 15745 }, { "epoch": 1.9, "grad_norm": 0.2894580066204071, "learning_rate": 0.00010347523772086268, "loss": 0.8224, "step": 15750 }, { "epoch": 1.9, "grad_norm": 0.24666161835193634, "learning_rate": 0.00010337741955609907, "loss": 0.7983, "step": 15755 }, { "epoch": 1.9, "grad_norm": 0.26421642303466797, "learning_rate": 0.00010327962333347008, "loss": 0.7837, "step": 15760 }, { "epoch": 1.9, "grad_norm": 0.27278557419776917, "learning_rate": 0.00010318184909900188, "loss": 0.757, "step": 15765 }, { "epoch": 1.9, "grad_norm": 0.25606653094291687, "learning_rate": 0.00010308409689871029, "loss": 0.9554, "step": 15770 }, { "epoch": 1.9, "grad_norm": 0.2995527982711792, "learning_rate": 0.00010298636677860074, "loss": 0.8754, "step": 15775 }, { "epoch": 1.9, "grad_norm": 0.2641645669937134, "learning_rate": 0.00010288865878466825, "loss": 0.841, "step": 15780 }, { "epoch": 1.9, "grad_norm": 0.2527436912059784, "learning_rate": 0.00010279097296289741, "loss": 0.7688, "step": 15785 }, { "epoch": 1.9, "grad_norm": 0.25887179374694824, "learning_rate": 0.0001026933093592625, "loss": 0.7526, "step": 15790 }, { "epoch": 1.9, "grad_norm": 0.2504650354385376, "learning_rate": 0.00010259566801972721, "loss": 0.8904, "step": 15795 }, { "epoch": 1.9, "grad_norm": 0.235322505235672, "learning_rate": 0.00010249804899024482, "loss": 0.7883, "step": 15800 }, { "epoch": 1.9, "grad_norm": 0.250555157661438, "learning_rate": 0.00010240045231675802, "loss": 0.8137, "step": 15805 }, { "epoch": 1.9, "grad_norm": 0.2679235637187958, "learning_rate": 0.00010230287804519914, "loss": 0.8408, "step": 15810 }, { "epoch": 1.91, "grad_norm": 0.2516213357448578, "learning_rate": 0.00010220532622148982, "loss": 0.8198, "step": 15815 }, { "epoch": 1.91, "grad_norm": 0.26022079586982727, "learning_rate": 0.00010210779689154118, "loss": 0.8253, "step": 15820 }, { "epoch": 1.91, "grad_norm": 0.3123502731323242, "learning_rate": 0.0001020102901012537, "loss": 0.8452, "step": 15825 }, { "epoch": 1.91, "grad_norm": 0.24394842982292175, "learning_rate": 0.00010191280589651746, "loss": 0.8889, "step": 15830 }, { "epoch": 1.91, "grad_norm": 0.24881669878959656, "learning_rate": 0.00010181534432321171, "loss": 0.8135, "step": 15835 }, { "epoch": 1.91, "grad_norm": 0.2647436261177063, "learning_rate": 0.00010171790542720504, "loss": 0.8659, "step": 15840 }, { "epoch": 1.91, "grad_norm": 0.2636089324951172, "learning_rate": 0.00010162048925435549, "loss": 0.891, "step": 15845 }, { "epoch": 1.91, "grad_norm": 0.2648860216140747, "learning_rate": 0.00010152309585051035, "loss": 0.7962, "step": 15850 }, { "epoch": 1.91, "grad_norm": 0.2279883474111557, "learning_rate": 0.00010142572526150616, "loss": 0.9005, "step": 15855 }, { "epoch": 1.91, "grad_norm": 0.297305703163147, "learning_rate": 0.0001013283775331687, "loss": 0.7451, "step": 15860 }, { "epoch": 1.91, "grad_norm": 0.2450573891401291, "learning_rate": 0.00010123105271131319, "loss": 0.8414, "step": 15865 }, { "epoch": 1.91, "grad_norm": 0.2383514642715454, "learning_rate": 0.00010113375084174382, "loss": 0.8569, "step": 15870 }, { "epoch": 1.91, "grad_norm": 0.23251613974571228, "learning_rate": 0.00010103647197025414, "loss": 0.8139, "step": 15875 }, { "epoch": 1.91, "grad_norm": 0.2642490863800049, "learning_rate": 0.0001009392161426267, "loss": 0.8877, "step": 15880 }, { "epoch": 1.91, "grad_norm": 0.25998982787132263, "learning_rate": 0.00010084198340463345, "loss": 0.8697, "step": 15885 }, { "epoch": 1.91, "grad_norm": 0.2662920355796814, "learning_rate": 0.00010074477380203529, "loss": 0.7452, "step": 15890 }, { "epoch": 1.92, "grad_norm": 0.24700447916984558, "learning_rate": 0.00010064758738058231, "loss": 0.856, "step": 15895 }, { "epoch": 1.92, "grad_norm": 0.26831844449043274, "learning_rate": 0.0001005504241860136, "loss": 0.7415, "step": 15900 }, { "epoch": 1.92, "grad_norm": 0.26766031980514526, "learning_rate": 0.00010045328426405749, "loss": 0.7633, "step": 15905 }, { "epoch": 1.92, "grad_norm": 0.24661500751972198, "learning_rate": 0.00010035616766043119, "loss": 0.8742, "step": 15910 }, { "epoch": 1.92, "grad_norm": 0.2439432591199875, "learning_rate": 0.00010025907442084102, "loss": 0.7972, "step": 15915 }, { "epoch": 1.92, "grad_norm": 0.236423522233963, "learning_rate": 0.0001001620045909822, "loss": 0.7564, "step": 15920 }, { "epoch": 1.92, "grad_norm": 0.26059049367904663, "learning_rate": 0.00010006495821653914, "loss": 0.824, "step": 15925 }, { "epoch": 1.92, "grad_norm": 0.2574262022972107, "learning_rate": 9.996793534318505e-05, "loss": 0.8156, "step": 15930 }, { "epoch": 1.92, "grad_norm": 0.2577226758003235, "learning_rate": 9.987093601658209e-05, "loss": 0.8548, "step": 15935 }, { "epoch": 1.92, "grad_norm": 0.2414378523826599, "learning_rate": 9.977396028238136e-05, "loss": 0.8627, "step": 15940 }, { "epoch": 1.92, "grad_norm": 0.2435644119977951, "learning_rate": 9.969639671222088e-05, "loss": 0.8817, "step": 15945 }, { "epoch": 1.92, "grad_norm": 0.25886785984039307, "learning_rate": 9.959946355934948e-05, "loss": 0.7624, "step": 15950 }, { "epoch": 1.92, "grad_norm": 0.22503872215747833, "learning_rate": 9.950255412664435e-05, "loss": 0.9167, "step": 15955 }, { "epoch": 1.92, "grad_norm": 0.24574321508407593, "learning_rate": 9.940566845971425e-05, "loss": 0.7814, "step": 15960 }, { "epoch": 1.92, "grad_norm": 0.23402521014213562, "learning_rate": 9.93088066041567e-05, "loss": 0.8389, "step": 15965 }, { "epoch": 1.92, "grad_norm": 0.27619630098342896, "learning_rate": 9.921196860555813e-05, "loss": 0.8364, "step": 15970 }, { "epoch": 1.92, "grad_norm": 0.2658560872077942, "learning_rate": 9.91151545094938e-05, "loss": 0.8762, "step": 15975 }, { "epoch": 1.93, "grad_norm": 0.2881810963153839, "learning_rate": 9.90183643615276e-05, "loss": 0.8234, "step": 15980 }, { "epoch": 1.93, "grad_norm": 0.2377784252166748, "learning_rate": 9.892159820721216e-05, "loss": 0.8424, "step": 15985 }, { "epoch": 1.93, "grad_norm": 0.22696682810783386, "learning_rate": 9.882485609208885e-05, "loss": 0.7891, "step": 15990 }, { "epoch": 1.93, "grad_norm": 0.2700740098953247, "learning_rate": 9.872813806168778e-05, "loss": 0.8931, "step": 15995 }, { "epoch": 1.93, "grad_norm": 0.23319804668426514, "learning_rate": 9.86314441615276e-05, "loss": 0.7404, "step": 16000 }, { "epoch": 1.93, "grad_norm": 0.23562254011631012, "learning_rate": 9.853477443711572e-05, "loss": 0.9749, "step": 16005 }, { "epoch": 1.93, "grad_norm": 0.23507817089557648, "learning_rate": 9.843812893394801e-05, "loss": 0.8976, "step": 16010 }, { "epoch": 1.93, "grad_norm": 0.25539952516555786, "learning_rate": 9.834150769750921e-05, "loss": 0.8241, "step": 16015 }, { "epoch": 1.93, "grad_norm": 0.24266016483306885, "learning_rate": 9.824491077327242e-05, "loss": 0.8013, "step": 16020 }, { "epoch": 1.93, "grad_norm": 0.29498210549354553, "learning_rate": 9.814833820669934e-05, "loss": 0.8838, "step": 16025 }, { "epoch": 1.93, "grad_norm": 0.26613032817840576, "learning_rate": 9.805179004324022e-05, "loss": 0.8375, "step": 16030 }, { "epoch": 1.93, "grad_norm": 0.2595424950122833, "learning_rate": 9.795526632833388e-05, "loss": 0.8382, "step": 16035 }, { "epoch": 1.93, "grad_norm": 0.24384133517742157, "learning_rate": 9.785876710740755e-05, "loss": 0.9724, "step": 16040 }, { "epoch": 1.93, "grad_norm": 0.2365197241306305, "learning_rate": 9.776229242587701e-05, "loss": 0.8506, "step": 16045 }, { "epoch": 1.93, "grad_norm": 0.2736034095287323, "learning_rate": 9.766584232914633e-05, "loss": 0.818, "step": 16050 }, { "epoch": 1.93, "grad_norm": 0.2698809504508972, "learning_rate": 9.756941686260826e-05, "loss": 0.7629, "step": 16055 }, { "epoch": 1.94, "grad_norm": 0.24631649255752563, "learning_rate": 9.747301607164378e-05, "loss": 0.8116, "step": 16060 }, { "epoch": 1.94, "grad_norm": 0.23516754806041718, "learning_rate": 9.737664000162233e-05, "loss": 0.8521, "step": 16065 }, { "epoch": 1.94, "grad_norm": 0.22055235505104065, "learning_rate": 9.728028869790162e-05, "loss": 0.8474, "step": 16070 }, { "epoch": 1.94, "grad_norm": 0.27016615867614746, "learning_rate": 9.718396220582785e-05, "loss": 0.8257, "step": 16075 }, { "epoch": 1.94, "grad_norm": 0.26734715700149536, "learning_rate": 9.708766057073543e-05, "loss": 0.7563, "step": 16080 }, { "epoch": 1.94, "grad_norm": 0.2916874587535858, "learning_rate": 9.69913838379471e-05, "loss": 0.7673, "step": 16085 }, { "epoch": 1.94, "grad_norm": 0.23642754554748535, "learning_rate": 9.689513205277387e-05, "loss": 0.8493, "step": 16090 }, { "epoch": 1.94, "grad_norm": 0.2527269721031189, "learning_rate": 9.679890526051507e-05, "loss": 0.8293, "step": 16095 }, { "epoch": 1.94, "grad_norm": 0.2781146168708801, "learning_rate": 9.670270350645823e-05, "loss": 0.728, "step": 16100 }, { "epoch": 1.94, "grad_norm": 0.24582897126674652, "learning_rate": 9.660652683587907e-05, "loss": 0.8191, "step": 16105 }, { "epoch": 1.94, "grad_norm": 0.28193217515945435, "learning_rate": 9.65103752940415e-05, "loss": 0.8848, "step": 16110 }, { "epoch": 1.94, "grad_norm": 0.2869209349155426, "learning_rate": 9.641424892619766e-05, "loss": 0.7661, "step": 16115 }, { "epoch": 1.94, "grad_norm": 0.25964149832725525, "learning_rate": 9.631814777758782e-05, "loss": 0.8795, "step": 16120 }, { "epoch": 1.94, "grad_norm": 0.2651737332344055, "learning_rate": 9.622207189344035e-05, "loss": 0.8498, "step": 16125 }, { "epoch": 1.94, "grad_norm": 0.24850745499134064, "learning_rate": 9.612602131897169e-05, "loss": 0.8648, "step": 16130 }, { "epoch": 1.94, "grad_norm": 0.2507181167602539, "learning_rate": 9.602999609938658e-05, "loss": 0.7739, "step": 16135 }, { "epoch": 1.94, "grad_norm": 0.24325953423976898, "learning_rate": 9.593399627987757e-05, "loss": 0.8611, "step": 16140 }, { "epoch": 1.95, "grad_norm": 0.2649693489074707, "learning_rate": 9.58380219056254e-05, "loss": 0.7544, "step": 16145 }, { "epoch": 1.95, "grad_norm": 0.25593236088752747, "learning_rate": 9.574207302179874e-05, "loss": 0.8856, "step": 16150 }, { "epoch": 1.95, "grad_norm": 0.22325646877288818, "learning_rate": 9.56461496735544e-05, "loss": 0.7696, "step": 16155 }, { "epoch": 1.95, "grad_norm": 0.2551780343055725, "learning_rate": 9.555025190603709e-05, "loss": 0.8319, "step": 16160 }, { "epoch": 1.95, "grad_norm": 0.2650226354598999, "learning_rate": 9.54543797643794e-05, "loss": 0.873, "step": 16165 }, { "epoch": 1.95, "grad_norm": 0.25608155131340027, "learning_rate": 9.5358533293702e-05, "loss": 0.8617, "step": 16170 }, { "epoch": 1.95, "grad_norm": 0.2500768303871155, "learning_rate": 9.526271253911346e-05, "loss": 0.8167, "step": 16175 }, { "epoch": 1.95, "grad_norm": 0.24089059233665466, "learning_rate": 9.516691754571015e-05, "loss": 0.8327, "step": 16180 }, { "epoch": 1.95, "grad_norm": 0.24258366227149963, "learning_rate": 9.50711483585764e-05, "loss": 0.8146, "step": 16185 }, { "epoch": 1.95, "grad_norm": 0.2549149990081787, "learning_rate": 9.49754050227843e-05, "loss": 0.8334, "step": 16190 }, { "epoch": 1.95, "grad_norm": 0.23933325707912445, "learning_rate": 9.487968758339395e-05, "loss": 0.8018, "step": 16195 }, { "epoch": 1.95, "grad_norm": 0.24095794558525085, "learning_rate": 9.478399608545314e-05, "loss": 0.8354, "step": 16200 }, { "epoch": 1.95, "grad_norm": 0.2553461492061615, "learning_rate": 9.468833057399741e-05, "loss": 0.8267, "step": 16205 }, { "epoch": 1.95, "grad_norm": 0.22408875823020935, "learning_rate": 9.459269109405017e-05, "loss": 0.9216, "step": 16210 }, { "epoch": 1.95, "grad_norm": 0.2684914469718933, "learning_rate": 9.44970776906225e-05, "loss": 0.7983, "step": 16215 }, { "epoch": 1.95, "grad_norm": 0.26553064584732056, "learning_rate": 9.440149040871329e-05, "loss": 0.8494, "step": 16220 }, { "epoch": 1.95, "grad_norm": 0.2991175055503845, "learning_rate": 9.430592929330907e-05, "loss": 0.9366, "step": 16225 }, { "epoch": 1.96, "grad_norm": 0.2854938209056854, "learning_rate": 9.421039438938399e-05, "loss": 0.7927, "step": 16230 }, { "epoch": 1.96, "grad_norm": 0.23396198451519012, "learning_rate": 9.41148857419001e-05, "loss": 0.8203, "step": 16235 }, { "epoch": 1.96, "grad_norm": 0.2335088849067688, "learning_rate": 9.401940339580687e-05, "loss": 0.8674, "step": 16240 }, { "epoch": 1.96, "grad_norm": 0.27896398305892944, "learning_rate": 9.392394739604141e-05, "loss": 0.8955, "step": 16245 }, { "epoch": 1.96, "grad_norm": 0.27797919511795044, "learning_rate": 9.382851778752858e-05, "loss": 0.8088, "step": 16250 }, { "epoch": 1.96, "grad_norm": 0.22393245995044708, "learning_rate": 9.373311461518066e-05, "loss": 0.7955, "step": 16255 }, { "epoch": 1.96, "grad_norm": 0.2545322775840759, "learning_rate": 9.363773792389759e-05, "loss": 0.8558, "step": 16260 }, { "epoch": 1.96, "grad_norm": 0.23659881949424744, "learning_rate": 9.354238775856672e-05, "loss": 0.9073, "step": 16265 }, { "epoch": 1.96, "grad_norm": 0.2683257460594177, "learning_rate": 9.344706416406312e-05, "loss": 0.7528, "step": 16270 }, { "epoch": 1.96, "grad_norm": 0.2514216899871826, "learning_rate": 9.335176718524919e-05, "loss": 0.8392, "step": 16275 }, { "epoch": 1.96, "grad_norm": 0.22830678522586823, "learning_rate": 9.325649686697485e-05, "loss": 0.7843, "step": 16280 }, { "epoch": 1.96, "grad_norm": 0.2406618446111679, "learning_rate": 9.316125325407746e-05, "loss": 0.8878, "step": 16285 }, { "epoch": 1.96, "grad_norm": 0.24721916019916534, "learning_rate": 9.306603639138187e-05, "loss": 0.8405, "step": 16290 }, { "epoch": 1.96, "grad_norm": 0.2820783853530884, "learning_rate": 9.297084632370026e-05, "loss": 0.8073, "step": 16295 }, { "epoch": 1.96, "grad_norm": 0.24439501762390137, "learning_rate": 9.287568309583227e-05, "loss": 0.9226, "step": 16300 }, { "epoch": 1.96, "grad_norm": 0.25293993949890137, "learning_rate": 9.278054675256479e-05, "loss": 0.7773, "step": 16305 }, { "epoch": 1.97, "grad_norm": 0.26933521032333374, "learning_rate": 9.268543733867225e-05, "loss": 0.8361, "step": 16310 }, { "epoch": 1.97, "grad_norm": 0.27007099986076355, "learning_rate": 9.259035489891628e-05, "loss": 0.7993, "step": 16315 }, { "epoch": 1.97, "grad_norm": 0.25989168882369995, "learning_rate": 9.24952994780458e-05, "loss": 0.8354, "step": 16320 }, { "epoch": 1.97, "grad_norm": 0.2629483640193939, "learning_rate": 9.240027112079702e-05, "loss": 0.7799, "step": 16325 }, { "epoch": 1.97, "grad_norm": 0.24853236973285675, "learning_rate": 9.230526987189351e-05, "loss": 0.7944, "step": 16330 }, { "epoch": 1.97, "grad_norm": 0.2612740099430084, "learning_rate": 9.2210295776046e-05, "loss": 0.7517, "step": 16335 }, { "epoch": 1.97, "grad_norm": 0.2724323570728302, "learning_rate": 9.211534887795241e-05, "loss": 0.8058, "step": 16340 }, { "epoch": 1.97, "grad_norm": 0.2580575942993164, "learning_rate": 9.202042922229788e-05, "loss": 0.8327, "step": 16345 }, { "epoch": 1.97, "grad_norm": 0.29225781559944153, "learning_rate": 9.192553685375488e-05, "loss": 0.847, "step": 16350 }, { "epoch": 1.97, "grad_norm": 0.26023924350738525, "learning_rate": 9.18306718169828e-05, "loss": 0.8871, "step": 16355 }, { "epoch": 1.97, "grad_norm": 0.26399603486061096, "learning_rate": 9.173583415662835e-05, "loss": 0.8788, "step": 16360 }, { "epoch": 1.97, "grad_norm": 0.2926609516143799, "learning_rate": 9.164102391732514e-05, "loss": 0.7472, "step": 16365 }, { "epoch": 1.97, "grad_norm": 0.2964513897895813, "learning_rate": 9.156519549902523e-05, "loss": 0.88, "step": 16370 }, { "epoch": 1.97, "grad_norm": 0.28954005241394043, "learning_rate": 9.147043473005033e-05, "loss": 0.7696, "step": 16375 }, { "epoch": 1.97, "grad_norm": 0.2357548326253891, "learning_rate": 9.137570150703257e-05, "loss": 0.7794, "step": 16380 }, { "epoch": 1.97, "grad_norm": 0.22681371867656708, "learning_rate": 9.128099587455652e-05, "loss": 0.8403, "step": 16385 }, { "epoch": 1.97, "grad_norm": 0.26432034373283386, "learning_rate": 9.118631787719381e-05, "loss": 0.8527, "step": 16390 }, { "epoch": 1.98, "grad_norm": 0.2536108195781708, "learning_rate": 9.109166755950302e-05, "loss": 0.8427, "step": 16395 }, { "epoch": 1.98, "grad_norm": 0.22776862978935242, "learning_rate": 9.101596726464934e-05, "loss": 0.8992, "step": 16400 }, { "epoch": 1.98, "grad_norm": 0.24553075432777405, "learning_rate": 9.092136688261414e-05, "loss": 0.8519, "step": 16405 }, { "epoch": 1.98, "grad_norm": 0.25476738810539246, "learning_rate": 9.082679430494567e-05, "loss": 0.7777, "step": 16410 }, { "epoch": 1.98, "grad_norm": 0.2561832368373871, "learning_rate": 9.07322495761529e-05, "loss": 0.8118, "step": 16415 }, { "epoch": 1.98, "grad_norm": 0.26670464873313904, "learning_rate": 9.06377327407317e-05, "loss": 0.873, "step": 16420 }, { "epoch": 1.98, "grad_norm": 0.261623352766037, "learning_rate": 9.05432438431648e-05, "loss": 0.7815, "step": 16425 }, { "epoch": 1.98, "grad_norm": 0.283966600894928, "learning_rate": 9.044878292792187e-05, "loss": 0.8202, "step": 16430 }, { "epoch": 1.98, "grad_norm": 0.23486657440662384, "learning_rate": 9.035435003945933e-05, "loss": 0.802, "step": 16435 }, { "epoch": 1.98, "grad_norm": 0.233073890209198, "learning_rate": 9.025994522222043e-05, "loss": 0.8367, "step": 16440 }, { "epoch": 1.98, "grad_norm": 0.24187245965003967, "learning_rate": 9.016556852063515e-05, "loss": 0.894, "step": 16445 }, { "epoch": 1.98, "grad_norm": 0.2719327211380005, "learning_rate": 9.007121997912044e-05, "loss": 0.9086, "step": 16450 }, { "epoch": 1.98, "grad_norm": 0.2707170844078064, "learning_rate": 8.997689964207978e-05, "loss": 0.7866, "step": 16455 }, { "epoch": 1.98, "grad_norm": 0.2778245508670807, "learning_rate": 8.988260755390346e-05, "loss": 0.8574, "step": 16460 }, { "epoch": 1.98, "grad_norm": 0.24011406302452087, "learning_rate": 8.978834375896841e-05, "loss": 0.8875, "step": 16465 }, { "epoch": 1.98, "grad_norm": 0.26258188486099243, "learning_rate": 8.96941083016384e-05, "loss": 0.8013, "step": 16470 }, { "epoch": 1.99, "grad_norm": 0.25577783584594727, "learning_rate": 8.95999012262637e-05, "loss": 0.7737, "step": 16475 }, { "epoch": 1.99, "grad_norm": 0.2674656808376312, "learning_rate": 8.950572257718132e-05, "loss": 0.8443, "step": 16480 }, { "epoch": 1.99, "grad_norm": 0.2525770962238312, "learning_rate": 8.941157239871479e-05, "loss": 0.8448, "step": 16485 }, { "epoch": 1.99, "grad_norm": 0.23468473553657532, "learning_rate": 8.931745073517443e-05, "loss": 0.883, "step": 16490 }, { "epoch": 1.99, "grad_norm": 0.23663926124572754, "learning_rate": 8.922335763085696e-05, "loss": 0.8718, "step": 16495 }, { "epoch": 1.99, "grad_norm": 0.22883319854736328, "learning_rate": 8.912929313004572e-05, "loss": 0.9618, "step": 16500 }, { "epoch": 1.99, "grad_norm": 0.24675635993480682, "learning_rate": 8.903525727701054e-05, "loss": 0.8928, "step": 16505 }, { "epoch": 1.99, "grad_norm": 0.2842852771282196, "learning_rate": 8.89412501160079e-05, "loss": 0.8255, "step": 16510 }, { "epoch": 1.99, "grad_norm": 0.26586049795150757, "learning_rate": 8.884727169128066e-05, "loss": 0.9482, "step": 16515 }, { "epoch": 1.99, "grad_norm": 0.3201524615287781, "learning_rate": 8.875332204705818e-05, "loss": 0.8118, "step": 16520 }, { "epoch": 1.99, "grad_norm": 0.2589406967163086, "learning_rate": 8.865940122755623e-05, "loss": 0.8802, "step": 16525 }, { "epoch": 1.99, "grad_norm": 0.28572580218315125, "learning_rate": 8.85655092769772e-05, "loss": 0.9211, "step": 16530 }, { "epoch": 1.99, "grad_norm": 0.24606913328170776, "learning_rate": 8.847164623950965e-05, "loss": 0.8589, "step": 16535 }, { "epoch": 1.99, "grad_norm": 0.2393767237663269, "learning_rate": 8.837781215932862e-05, "loss": 0.8135, "step": 16540 }, { "epoch": 1.99, "grad_norm": 0.26532062888145447, "learning_rate": 8.828400708059567e-05, "loss": 0.7389, "step": 16545 }, { "epoch": 1.99, "grad_norm": 0.25288575887680054, "learning_rate": 8.81902310474585e-05, "loss": 0.8357, "step": 16550 }, { "epoch": 1.99, "grad_norm": 0.23215311765670776, "learning_rate": 8.809648410405123e-05, "loss": 0.7262, "step": 16555 }, { "epoch": 2.0, "grad_norm": 0.2512628734111786, "learning_rate": 8.800276629449426e-05, "loss": 0.7932, "step": 16560 }, { "epoch": 2.0, "grad_norm": 0.23022831976413727, "learning_rate": 8.790907766289437e-05, "loss": 0.7226, "step": 16565 }, { "epoch": 2.0, "grad_norm": 0.24034039676189423, "learning_rate": 8.781541825334453e-05, "loss": 0.8625, "step": 16570 }, { "epoch": 2.0, "grad_norm": 0.23891344666481018, "learning_rate": 8.772178810992392e-05, "loss": 0.8287, "step": 16575 }, { "epoch": 2.0, "grad_norm": 0.23461419343948364, "learning_rate": 8.762818727669797e-05, "loss": 0.7968, "step": 16580 }, { "epoch": 2.0, "grad_norm": 0.24658669531345367, "learning_rate": 8.753461579771846e-05, "loss": 0.787, "step": 16585 }, { "epoch": 2.0, "grad_norm": 0.23091553151607513, "learning_rate": 8.744107371702315e-05, "loss": 0.8718, "step": 16590 }, { "epoch": 2.0, "grad_norm": 0.26409897208213806, "learning_rate": 8.734756107863608e-05, "loss": 0.8378, "step": 16595 }, { "epoch": 2.0, "grad_norm": 0.27900955080986023, "learning_rate": 8.725407792656731e-05, "loss": 0.7441, "step": 16600 }, { "epoch": 2.0, "grad_norm": 0.2287808060646057, "learning_rate": 8.716062430481328e-05, "loss": 0.8506, "step": 16605 }, { "epoch": 2.0, "grad_norm": 0.26879292726516724, "learning_rate": 8.706720025735627e-05, "loss": 0.8325, "step": 16610 }, { "epoch": 2.0, "grad_norm": 0.28211572766304016, "learning_rate": 8.697380582816476e-05, "loss": 0.8599, "step": 16615 }, { "epoch": 2.0, "grad_norm": 0.2926715016365051, "learning_rate": 8.688044106119325e-05, "loss": 0.7717, "step": 16620 }, { "epoch": 2.0, "grad_norm": 0.27610814571380615, "learning_rate": 8.678710600038233e-05, "loss": 0.8317, "step": 16625 }, { "epoch": 2.0, "grad_norm": 0.2478606104850769, "learning_rate": 8.669380068965856e-05, "loss": 0.8358, "step": 16630 }, { "epoch": 2.0, "grad_norm": 0.2299978882074356, "learning_rate": 8.660052517293448e-05, "loss": 0.8603, "step": 16635 }, { "epoch": 2.0, "grad_norm": 0.27341413497924805, "learning_rate": 8.650727949410867e-05, "loss": 0.9561, "step": 16640 }, { "epoch": 2.01, "grad_norm": 0.25213295221328735, "learning_rate": 8.641406369706572e-05, "loss": 0.8045, "step": 16645 }, { "epoch": 2.01, "grad_norm": 0.2605089843273163, "learning_rate": 8.6320877825676e-05, "loss": 0.8112, "step": 16650 }, { "epoch": 2.01, "grad_norm": 0.27594542503356934, "learning_rate": 8.622772192379588e-05, "loss": 0.8145, "step": 16655 }, { "epoch": 2.01, "grad_norm": 0.2801823318004608, "learning_rate": 8.61345960352676e-05, "loss": 0.6998, "step": 16660 }, { "epoch": 2.01, "grad_norm": 0.2606346011161804, "learning_rate": 8.604150020391937e-05, "loss": 0.7655, "step": 16665 }, { "epoch": 2.01, "grad_norm": 0.23958322405815125, "learning_rate": 8.594843447356517e-05, "loss": 0.8022, "step": 16670 }, { "epoch": 2.01, "grad_norm": 0.26263347268104553, "learning_rate": 8.585539888800475e-05, "loss": 0.7376, "step": 16675 }, { "epoch": 2.01, "grad_norm": 0.23959647119045258, "learning_rate": 8.576239349102375e-05, "loss": 0.763, "step": 16680 }, { "epoch": 2.01, "grad_norm": 0.25928083062171936, "learning_rate": 8.56694183263937e-05, "loss": 0.7902, "step": 16685 }, { "epoch": 2.01, "grad_norm": 0.24009615182876587, "learning_rate": 8.557647343787175e-05, "loss": 0.8473, "step": 16690 }, { "epoch": 2.01, "grad_norm": 0.2364853471517563, "learning_rate": 8.548355886920084e-05, "loss": 0.7173, "step": 16695 }, { "epoch": 2.01, "grad_norm": 0.2518242299556732, "learning_rate": 8.539067466410962e-05, "loss": 0.8219, "step": 16700 }, { "epoch": 2.01, "grad_norm": 0.28275394439697266, "learning_rate": 8.529782086631254e-05, "loss": 0.9054, "step": 16705 }, { "epoch": 2.01, "grad_norm": 0.2858952283859253, "learning_rate": 8.520499751950965e-05, "loss": 0.8016, "step": 16710 }, { "epoch": 2.01, "grad_norm": 0.2402840256690979, "learning_rate": 8.51122046673867e-05, "loss": 0.8295, "step": 16715 }, { "epoch": 2.01, "grad_norm": 0.2762582004070282, "learning_rate": 8.501944235361502e-05, "loss": 0.733, "step": 16720 }, { "epoch": 2.02, "grad_norm": 0.2743135094642639, "learning_rate": 8.492671062185177e-05, "loss": 0.7954, "step": 16725 }, { "epoch": 2.02, "grad_norm": 0.24607881903648376, "learning_rate": 8.483400951573954e-05, "loss": 0.7754, "step": 16730 }, { "epoch": 2.02, "grad_norm": 0.27087563276290894, "learning_rate": 8.474133907890651e-05, "loss": 0.7463, "step": 16735 }, { "epoch": 2.02, "grad_norm": 0.24239374697208405, "learning_rate": 8.464869935496641e-05, "loss": 0.7516, "step": 16740 }, { "epoch": 2.02, "grad_norm": 0.27605438232421875, "learning_rate": 8.455609038751871e-05, "loss": 0.7053, "step": 16745 }, { "epoch": 2.02, "grad_norm": 0.29548242688179016, "learning_rate": 8.446351222014822e-05, "loss": 0.7657, "step": 16750 }, { "epoch": 2.02, "grad_norm": 0.26143786311149597, "learning_rate": 8.437096489642526e-05, "loss": 0.7796, "step": 16755 }, { "epoch": 2.02, "grad_norm": 0.26324915885925293, "learning_rate": 8.42784484599057e-05, "loss": 0.7717, "step": 16760 }, { "epoch": 2.02, "grad_norm": 0.2451925128698349, "learning_rate": 8.418596295413083e-05, "loss": 0.8181, "step": 16765 }, { "epoch": 2.02, "grad_norm": 0.26199206709861755, "learning_rate": 8.409350842262741e-05, "loss": 0.8205, "step": 16770 }, { "epoch": 2.02, "grad_norm": 0.26254212856292725, "learning_rate": 8.400108490890763e-05, "loss": 0.7734, "step": 16775 }, { "epoch": 2.02, "grad_norm": 0.24498294293880463, "learning_rate": 8.390869245646897e-05, "loss": 0.7043, "step": 16780 }, { "epoch": 2.02, "grad_norm": 0.30816173553466797, "learning_rate": 8.381633110879454e-05, "loss": 0.8023, "step": 16785 }, { "epoch": 2.02, "grad_norm": 0.24860282242298126, "learning_rate": 8.372400090935256e-05, "loss": 0.7682, "step": 16790 }, { "epoch": 2.02, "grad_norm": 0.28883734345436096, "learning_rate": 8.363170190159673e-05, "loss": 0.8831, "step": 16795 }, { "epoch": 2.02, "grad_norm": 0.28336241841316223, "learning_rate": 8.353943412896596e-05, "loss": 0.7443, "step": 16800 }, { "epoch": 2.02, "grad_norm": 0.27060964703559875, "learning_rate": 8.34471976348846e-05, "loss": 0.8307, "step": 16805 }, { "epoch": 2.03, "grad_norm": 0.27045443654060364, "learning_rate": 8.33549924627622e-05, "loss": 0.7417, "step": 16810 }, { "epoch": 2.03, "grad_norm": 0.24936868250370026, "learning_rate": 8.326281865599356e-05, "loss": 0.801, "step": 16815 }, { "epoch": 2.03, "grad_norm": 0.28478366136550903, "learning_rate": 8.317067625795867e-05, "loss": 0.7997, "step": 16820 }, { "epoch": 2.03, "grad_norm": 0.28683024644851685, "learning_rate": 8.307856531202295e-05, "loss": 0.8765, "step": 16825 }, { "epoch": 2.03, "grad_norm": 0.2502270042896271, "learning_rate": 8.298648586153676e-05, "loss": 0.8878, "step": 16830 }, { "epoch": 2.03, "grad_norm": 0.25373736023902893, "learning_rate": 8.289443794983578e-05, "loss": 0.7722, "step": 16835 }, { "epoch": 2.03, "grad_norm": 0.2576674818992615, "learning_rate": 8.280242162024079e-05, "loss": 0.8224, "step": 16840 }, { "epoch": 2.03, "grad_norm": 0.26497557759284973, "learning_rate": 8.271043691605778e-05, "loss": 0.8216, "step": 16845 }, { "epoch": 2.03, "grad_norm": 0.2641935646533966, "learning_rate": 8.261848388057775e-05, "loss": 0.755, "step": 16850 }, { "epoch": 2.03, "grad_norm": 0.28792330622673035, "learning_rate": 8.252656255707689e-05, "loss": 0.7801, "step": 16855 }, { "epoch": 2.03, "grad_norm": 0.2565497159957886, "learning_rate": 8.243467298881636e-05, "loss": 0.7938, "step": 16860 }, { "epoch": 2.03, "grad_norm": 0.2724232077598572, "learning_rate": 8.234281521904253e-05, "loss": 0.8549, "step": 16865 }, { "epoch": 2.03, "grad_norm": 0.24661044776439667, "learning_rate": 8.225098929098673e-05, "loss": 0.812, "step": 16870 }, { "epoch": 2.03, "grad_norm": 0.23888921737670898, "learning_rate": 8.215919524786521e-05, "loss": 0.7724, "step": 16875 }, { "epoch": 2.03, "grad_norm": 0.24567218124866486, "learning_rate": 8.206743313287925e-05, "loss": 0.7095, "step": 16880 }, { "epoch": 2.03, "grad_norm": 0.2549126148223877, "learning_rate": 8.197570298921533e-05, "loss": 0.7106, "step": 16885 }, { "epoch": 2.04, "grad_norm": 0.2611244022846222, "learning_rate": 8.18840048600446e-05, "loss": 0.8283, "step": 16890 }, { "epoch": 2.04, "grad_norm": 0.24455487728118896, "learning_rate": 8.179233878852323e-05, "loss": 0.7387, "step": 16895 }, { "epoch": 2.04, "grad_norm": 0.3053877651691437, "learning_rate": 8.170070481779224e-05, "loss": 0.8595, "step": 16900 }, { "epoch": 2.04, "grad_norm": 0.2966110408306122, "learning_rate": 8.160910299097782e-05, "loss": 0.7955, "step": 16905 }, { "epoch": 2.04, "grad_norm": 0.29260462522506714, "learning_rate": 8.15175333511907e-05, "loss": 0.8606, "step": 16910 }, { "epoch": 2.04, "grad_norm": 0.27227169275283813, "learning_rate": 8.14259959415267e-05, "loss": 0.6951, "step": 16915 }, { "epoch": 2.04, "grad_norm": 0.2581891417503357, "learning_rate": 8.133449080506615e-05, "loss": 0.8414, "step": 16920 }, { "epoch": 2.04, "grad_norm": 0.2824326157569885, "learning_rate": 8.124301798487458e-05, "loss": 0.7542, "step": 16925 }, { "epoch": 2.04, "grad_norm": 0.28472477197647095, "learning_rate": 8.115157752400211e-05, "loss": 0.7875, "step": 16930 }, { "epoch": 2.04, "grad_norm": 0.3563947081565857, "learning_rate": 8.106016946548365e-05, "loss": 0.8582, "step": 16935 }, { "epoch": 2.04, "grad_norm": 0.27918270230293274, "learning_rate": 8.096879385233879e-05, "loss": 0.8431, "step": 16940 }, { "epoch": 2.04, "grad_norm": 0.31938403844833374, "learning_rate": 8.087745072757208e-05, "loss": 0.8991, "step": 16945 }, { "epoch": 2.04, "grad_norm": 0.33040347695350647, "learning_rate": 8.078614013417253e-05, "loss": 0.7591, "step": 16950 }, { "epoch": 2.04, "grad_norm": 0.28828245401382446, "learning_rate": 8.069486211511394e-05, "loss": 0.7754, "step": 16955 }, { "epoch": 2.04, "grad_norm": 0.24763058125972748, "learning_rate": 8.060361671335474e-05, "loss": 0.7698, "step": 16960 }, { "epoch": 2.04, "grad_norm": 0.23437534272670746, "learning_rate": 8.051240397183818e-05, "loss": 0.7253, "step": 16965 }, { "epoch": 2.04, "grad_norm": 0.2842622995376587, "learning_rate": 8.042122393349189e-05, "loss": 0.773, "step": 16970 }, { "epoch": 2.05, "grad_norm": 0.26870718598365784, "learning_rate": 8.033007664122827e-05, "loss": 0.7793, "step": 16975 }, { "epoch": 2.05, "grad_norm": 0.26021209359169006, "learning_rate": 8.023896213794425e-05, "loss": 0.8217, "step": 16980 }, { "epoch": 2.05, "grad_norm": 0.26792365312576294, "learning_rate": 8.014788046652135e-05, "loss": 0.8153, "step": 16985 }, { "epoch": 2.05, "grad_norm": 0.2724539637565613, "learning_rate": 8.00568316698256e-05, "loss": 0.7681, "step": 16990 }, { "epoch": 2.05, "grad_norm": 0.27169302105903625, "learning_rate": 7.996581579070762e-05, "loss": 0.7707, "step": 16995 }, { "epoch": 2.05, "grad_norm": 0.25969168543815613, "learning_rate": 7.987483287200243e-05, "loss": 0.8198, "step": 17000 }, { "epoch": 2.05, "grad_norm": 0.279259592294693, "learning_rate": 7.978388295652974e-05, "loss": 0.7626, "step": 17005 }, { "epoch": 2.05, "grad_norm": 0.27438825368881226, "learning_rate": 7.969296608709351e-05, "loss": 0.7247, "step": 17010 }, { "epoch": 2.05, "grad_norm": 0.30393609404563904, "learning_rate": 7.96020823064823e-05, "loss": 0.7484, "step": 17015 }, { "epoch": 2.05, "grad_norm": 0.2659038007259369, "learning_rate": 7.951123165746892e-05, "loss": 0.9497, "step": 17020 }, { "epoch": 2.05, "grad_norm": 0.2514190673828125, "learning_rate": 7.942041418281086e-05, "loss": 0.729, "step": 17025 }, { "epoch": 2.05, "grad_norm": 0.2725908160209656, "learning_rate": 7.932962992524974e-05, "loss": 0.8552, "step": 17030 }, { "epoch": 2.05, "grad_norm": 0.28015610575675964, "learning_rate": 7.923887892751165e-05, "loss": 0.7491, "step": 17035 }, { "epoch": 2.05, "grad_norm": 0.2758082151412964, "learning_rate": 7.914816123230703e-05, "loss": 0.7509, "step": 17040 }, { "epoch": 2.05, "grad_norm": 0.26611337065696716, "learning_rate": 7.905747688233069e-05, "loss": 0.7658, "step": 17045 }, { "epoch": 2.05, "grad_norm": 0.24941769242286682, "learning_rate": 7.896682592026164e-05, "loss": 0.7546, "step": 17050 }, { "epoch": 2.05, "grad_norm": 0.28314536809921265, "learning_rate": 7.887620838876326e-05, "loss": 0.7747, "step": 17055 }, { "epoch": 2.06, "grad_norm": 0.29477396607398987, "learning_rate": 7.878562433048316e-05, "loss": 0.8356, "step": 17060 }, { "epoch": 2.06, "grad_norm": 0.278089314699173, "learning_rate": 7.869507378805321e-05, "loss": 0.8421, "step": 17065 }, { "epoch": 2.06, "grad_norm": 0.27571311593055725, "learning_rate": 7.86045568040895e-05, "loss": 0.8294, "step": 17070 }, { "epoch": 2.06, "grad_norm": 0.27711910009384155, "learning_rate": 7.851407342119226e-05, "loss": 0.8208, "step": 17075 }, { "epoch": 2.06, "grad_norm": 0.27659597992897034, "learning_rate": 7.842362368194611e-05, "loss": 0.7766, "step": 17080 }, { "epoch": 2.06, "grad_norm": 0.28369686007499695, "learning_rate": 7.833320762891964e-05, "loss": 0.8258, "step": 17085 }, { "epoch": 2.06, "grad_norm": 0.2623741626739502, "learning_rate": 7.824282530466562e-05, "loss": 0.836, "step": 17090 }, { "epoch": 2.06, "grad_norm": 0.2581416368484497, "learning_rate": 7.815247675172097e-05, "loss": 0.7441, "step": 17095 }, { "epoch": 2.06, "grad_norm": 0.2673351764678955, "learning_rate": 7.806216201260677e-05, "loss": 0.8585, "step": 17100 }, { "epoch": 2.06, "grad_norm": 0.26765942573547363, "learning_rate": 7.797188112982811e-05, "loss": 0.7769, "step": 17105 }, { "epoch": 2.06, "grad_norm": 0.28835397958755493, "learning_rate": 7.788163414587417e-05, "loss": 0.8654, "step": 17110 }, { "epoch": 2.06, "grad_norm": 0.263345330953598, "learning_rate": 7.779142110321812e-05, "loss": 0.9097, "step": 17115 }, { "epoch": 2.06, "grad_norm": 0.2905290126800537, "learning_rate": 7.770124204431734e-05, "loss": 0.8099, "step": 17120 }, { "epoch": 2.06, "grad_norm": 0.2563280463218689, "learning_rate": 7.761109701161308e-05, "loss": 0.8323, "step": 17125 }, { "epoch": 2.06, "grad_norm": 0.24418336153030396, "learning_rate": 7.752098604753045e-05, "loss": 0.7408, "step": 17130 }, { "epoch": 2.06, "grad_norm": 0.267090767621994, "learning_rate": 7.743090919447869e-05, "loss": 0.6895, "step": 17135 }, { "epoch": 2.07, "grad_norm": 0.2744917869567871, "learning_rate": 7.734086649485109e-05, "loss": 0.7986, "step": 17140 }, { "epoch": 2.07, "grad_norm": 0.2881864011287689, "learning_rate": 7.725085799102464e-05, "loss": 0.7426, "step": 17145 }, { "epoch": 2.07, "grad_norm": 0.32715755701065063, "learning_rate": 7.716088372536035e-05, "loss": 0.779, "step": 17150 }, { "epoch": 2.07, "grad_norm": 0.2890661060810089, "learning_rate": 7.707094374020302e-05, "loss": 0.8839, "step": 17155 }, { "epoch": 2.07, "grad_norm": 0.2906983196735382, "learning_rate": 7.698103807788152e-05, "loss": 0.9254, "step": 17160 }, { "epoch": 2.07, "grad_norm": 0.27394556999206543, "learning_rate": 7.689116678070839e-05, "loss": 0.7887, "step": 17165 }, { "epoch": 2.07, "grad_norm": 0.2804103195667267, "learning_rate": 7.680132989098004e-05, "loss": 0.7343, "step": 17170 }, { "epoch": 2.07, "grad_norm": 0.2799692451953888, "learning_rate": 7.671152745097664e-05, "loss": 0.7547, "step": 17175 }, { "epoch": 2.07, "grad_norm": 0.2532331943511963, "learning_rate": 7.662175950296231e-05, "loss": 0.8438, "step": 17180 }, { "epoch": 2.07, "grad_norm": 0.2761310338973999, "learning_rate": 7.653202608918479e-05, "loss": 0.7216, "step": 17185 }, { "epoch": 2.07, "grad_norm": 0.27380895614624023, "learning_rate": 7.644232725187557e-05, "loss": 0.7755, "step": 17190 }, { "epoch": 2.07, "grad_norm": 0.3025756776332855, "learning_rate": 7.635266303324993e-05, "loss": 0.7884, "step": 17195 }, { "epoch": 2.07, "grad_norm": 0.26098042726516724, "learning_rate": 7.62630334755068e-05, "loss": 0.7618, "step": 17200 }, { "epoch": 2.07, "grad_norm": 0.2765859365463257, "learning_rate": 7.617343862082887e-05, "loss": 0.8837, "step": 17205 }, { "epoch": 2.07, "grad_norm": 0.29430699348449707, "learning_rate": 7.608387851138241e-05, "loss": 0.8484, "step": 17210 }, { "epoch": 2.07, "grad_norm": 0.2927020788192749, "learning_rate": 7.599435318931737e-05, "loss": 0.7411, "step": 17215 }, { "epoch": 2.07, "grad_norm": 0.29646044969558716, "learning_rate": 7.590486269676741e-05, "loss": 0.7927, "step": 17220 }, { "epoch": 2.08, "grad_norm": 0.2691041827201843, "learning_rate": 7.58154070758497e-05, "loss": 0.7709, "step": 17225 }, { "epoch": 2.08, "grad_norm": 0.2560572922229767, "learning_rate": 7.572598636866499e-05, "loss": 0.8836, "step": 17230 }, { "epoch": 2.08, "grad_norm": 0.27019980549812317, "learning_rate": 7.563660061729763e-05, "loss": 0.83, "step": 17235 }, { "epoch": 2.08, "grad_norm": 0.2663953900337219, "learning_rate": 7.554724986381558e-05, "loss": 0.7005, "step": 17240 }, { "epoch": 2.08, "grad_norm": 0.2575012743473053, "learning_rate": 7.545793415027026e-05, "loss": 0.8471, "step": 17245 }, { "epoch": 2.08, "grad_norm": 0.3181961476802826, "learning_rate": 7.53686535186966e-05, "loss": 0.8156, "step": 17250 }, { "epoch": 2.08, "grad_norm": 0.2710973024368286, "learning_rate": 7.527940801111296e-05, "loss": 0.7603, "step": 17255 }, { "epoch": 2.08, "grad_norm": 0.23654502630233765, "learning_rate": 7.519019766952135e-05, "loss": 0.7897, "step": 17260 }, { "epoch": 2.08, "grad_norm": 0.2502569258213043, "learning_rate": 7.51010225359071e-05, "loss": 0.7789, "step": 17265 }, { "epoch": 2.08, "grad_norm": 0.2620236277580261, "learning_rate": 7.501188265223893e-05, "loss": 0.7543, "step": 17270 }, { "epoch": 2.08, "grad_norm": 0.27987682819366455, "learning_rate": 7.492277806046908e-05, "loss": 0.8192, "step": 17275 }, { "epoch": 2.08, "grad_norm": 0.23856423795223236, "learning_rate": 7.483370880253311e-05, "loss": 0.7938, "step": 17280 }, { "epoch": 2.08, "grad_norm": 0.26246753334999084, "learning_rate": 7.474467492034998e-05, "loss": 0.7727, "step": 17285 }, { "epoch": 2.08, "grad_norm": 0.2560681998729706, "learning_rate": 7.465567645582199e-05, "loss": 0.7481, "step": 17290 }, { "epoch": 2.08, "grad_norm": 0.27870872616767883, "learning_rate": 7.45667134508347e-05, "loss": 0.7659, "step": 17295 }, { "epoch": 2.08, "grad_norm": 0.2658903896808624, "learning_rate": 7.447778594725717e-05, "loss": 0.8235, "step": 17300 }, { "epoch": 2.09, "grad_norm": 0.3086778223514557, "learning_rate": 7.438889398694161e-05, "loss": 0.8337, "step": 17305 }, { "epoch": 2.09, "grad_norm": 0.2679433226585388, "learning_rate": 7.430003761172349e-05, "loss": 0.7323, "step": 17310 }, { "epoch": 2.09, "grad_norm": 0.2665143311023712, "learning_rate": 7.421121686342152e-05, "loss": 0.8554, "step": 17315 }, { "epoch": 2.09, "grad_norm": 0.24720510840415955, "learning_rate": 7.412243178383784e-05, "loss": 0.788, "step": 17320 }, { "epoch": 2.09, "grad_norm": 0.28797394037246704, "learning_rate": 7.403368241475757e-05, "loss": 0.7654, "step": 17325 }, { "epoch": 2.09, "grad_norm": 0.24396300315856934, "learning_rate": 7.394496879794911e-05, "loss": 0.8301, "step": 17330 }, { "epoch": 2.09, "grad_norm": 0.26526421308517456, "learning_rate": 7.385629097516407e-05, "loss": 0.9021, "step": 17335 }, { "epoch": 2.09, "grad_norm": 0.2767682671546936, "learning_rate": 7.376764898813714e-05, "loss": 0.7755, "step": 17340 }, { "epoch": 2.09, "grad_norm": 0.3317005932331085, "learning_rate": 7.367904287858618e-05, "loss": 0.8482, "step": 17345 }, { "epoch": 2.09, "grad_norm": 0.2770203948020935, "learning_rate": 7.359047268821219e-05, "loss": 0.8076, "step": 17350 }, { "epoch": 2.09, "grad_norm": 0.2635518014431, "learning_rate": 7.350193845869918e-05, "loss": 0.8716, "step": 17355 }, { "epoch": 2.09, "grad_norm": 0.2668205797672272, "learning_rate": 7.341344023171441e-05, "loss": 0.7338, "step": 17360 }, { "epoch": 2.09, "grad_norm": 0.29583418369293213, "learning_rate": 7.332497804890803e-05, "loss": 0.7572, "step": 17365 }, { "epoch": 2.09, "grad_norm": 0.26507455110549927, "learning_rate": 7.323655195191328e-05, "loss": 0.7844, "step": 17370 }, { "epoch": 2.09, "grad_norm": 0.2781868278980255, "learning_rate": 7.314816198234636e-05, "loss": 0.7465, "step": 17375 }, { "epoch": 2.09, "grad_norm": 0.2629784345626831, "learning_rate": 7.305980818180663e-05, "loss": 0.8417, "step": 17380 }, { "epoch": 2.09, "grad_norm": 0.2834410071372986, "learning_rate": 7.297149059187628e-05, "loss": 0.7415, "step": 17385 }, { "epoch": 2.1, "grad_norm": 0.3003019392490387, "learning_rate": 7.28832092541205e-05, "loss": 0.8308, "step": 17390 }, { "epoch": 2.1, "grad_norm": 0.2973310351371765, "learning_rate": 7.279496421008735e-05, "loss": 0.7261, "step": 17395 }, { "epoch": 2.1, "grad_norm": 0.2581186890602112, "learning_rate": 7.2706755501308e-05, "loss": 0.6732, "step": 17400 }, { "epoch": 2.1, "grad_norm": 0.276574969291687, "learning_rate": 7.261858316929634e-05, "loss": 0.7984, "step": 17405 }, { "epoch": 2.1, "grad_norm": 0.24236898124217987, "learning_rate": 7.253044725554922e-05, "loss": 0.7923, "step": 17410 }, { "epoch": 2.1, "grad_norm": 0.26904451847076416, "learning_rate": 7.244234780154627e-05, "loss": 0.7706, "step": 17415 }, { "epoch": 2.1, "grad_norm": 0.2632739841938019, "learning_rate": 7.235428484875006e-05, "loss": 0.748, "step": 17420 }, { "epoch": 2.1, "grad_norm": 0.3082495629787445, "learning_rate": 7.226625843860595e-05, "loss": 0.8136, "step": 17425 }, { "epoch": 2.1, "grad_norm": 0.25225359201431274, "learning_rate": 7.217826861254208e-05, "loss": 0.7853, "step": 17430 }, { "epoch": 2.1, "grad_norm": 0.2693781554698944, "learning_rate": 7.209031541196931e-05, "loss": 0.8022, "step": 17435 }, { "epoch": 2.1, "grad_norm": 0.2704066038131714, "learning_rate": 7.200239887828147e-05, "loss": 0.8619, "step": 17440 }, { "epoch": 2.1, "grad_norm": 0.2754969000816345, "learning_rate": 7.191451905285494e-05, "loss": 0.7699, "step": 17445 }, { "epoch": 2.1, "grad_norm": 0.2515884041786194, "learning_rate": 7.182667597704889e-05, "loss": 0.7937, "step": 17450 }, { "epoch": 2.1, "grad_norm": 0.27935126423835754, "learning_rate": 7.17388696922051e-05, "loss": 0.7801, "step": 17455 }, { "epoch": 2.1, "grad_norm": 0.24923402070999146, "learning_rate": 7.165110023964828e-05, "loss": 0.8244, "step": 17460 }, { "epoch": 2.1, "grad_norm": 0.30999448895454407, "learning_rate": 7.156336766068557e-05, "loss": 0.7159, "step": 17465 }, { "epoch": 2.1, "grad_norm": 0.2763430178165436, "learning_rate": 7.147567199660684e-05, "loss": 0.8173, "step": 17470 }, { "epoch": 2.11, "grad_norm": 0.25914233922958374, "learning_rate": 7.138801328868453e-05, "loss": 0.7264, "step": 17475 }, { "epoch": 2.11, "grad_norm": 0.28961434960365295, "learning_rate": 7.130039157817384e-05, "loss": 0.7886, "step": 17480 }, { "epoch": 2.11, "grad_norm": 0.29315581917762756, "learning_rate": 7.121280690631239e-05, "loss": 0.7655, "step": 17485 }, { "epoch": 2.11, "grad_norm": 0.24615170061588287, "learning_rate": 7.112525931432047e-05, "loss": 0.7012, "step": 17490 }, { "epoch": 2.11, "grad_norm": 0.28100574016571045, "learning_rate": 7.103774884340087e-05, "loss": 0.8085, "step": 17495 }, { "epoch": 2.11, "grad_norm": 0.25875842571258545, "learning_rate": 7.095027553473891e-05, "loss": 0.85, "step": 17500 }, { "epoch": 2.11, "grad_norm": 0.28253424167633057, "learning_rate": 7.086283942950246e-05, "loss": 0.7207, "step": 17505 }, { "epoch": 2.11, "grad_norm": 0.2582673728466034, "learning_rate": 7.077544056884182e-05, "loss": 0.8388, "step": 17510 }, { "epoch": 2.11, "grad_norm": 0.2423962503671646, "learning_rate": 7.068807899388976e-05, "loss": 0.7351, "step": 17515 }, { "epoch": 2.11, "grad_norm": 0.2518567144870758, "learning_rate": 7.060075474576165e-05, "loss": 0.755, "step": 17520 }, { "epoch": 2.11, "grad_norm": 0.2877778708934784, "learning_rate": 7.051346786555513e-05, "loss": 0.7769, "step": 17525 }, { "epoch": 2.11, "grad_norm": 0.2926938235759735, "learning_rate": 7.042621839435029e-05, "loss": 0.8798, "step": 17530 }, { "epoch": 2.11, "grad_norm": 0.295379638671875, "learning_rate": 7.033900637320958e-05, "loss": 0.7104, "step": 17535 }, { "epoch": 2.11, "grad_norm": 0.27940165996551514, "learning_rate": 7.0251831843178e-05, "loss": 0.732, "step": 17540 }, { "epoch": 2.11, "grad_norm": 0.2717758119106293, "learning_rate": 7.01646948452827e-05, "loss": 0.8071, "step": 17545 }, { "epoch": 2.11, "grad_norm": 0.26429474353790283, "learning_rate": 7.007759542053324e-05, "loss": 0.767, "step": 17550 }, { "epoch": 2.12, "grad_norm": 0.3140299916267395, "learning_rate": 6.999053360992155e-05, "loss": 0.7958, "step": 17555 }, { "epoch": 2.12, "grad_norm": 0.2404472976922989, "learning_rate": 6.990350945442173e-05, "loss": 0.8426, "step": 17560 }, { "epoch": 2.12, "grad_norm": 0.2709415555000305, "learning_rate": 6.981652299499032e-05, "loss": 0.7802, "step": 17565 }, { "epoch": 2.12, "grad_norm": 0.2728278338909149, "learning_rate": 6.972957427256594e-05, "loss": 0.7786, "step": 17570 }, { "epoch": 2.12, "grad_norm": 0.27763307094573975, "learning_rate": 6.964266332806966e-05, "loss": 0.735, "step": 17575 }, { "epoch": 2.12, "grad_norm": 0.27662762999534607, "learning_rate": 6.955579020240459e-05, "loss": 0.8434, "step": 17580 }, { "epoch": 2.12, "grad_norm": 0.26812487840652466, "learning_rate": 6.946895493645613e-05, "loss": 0.7325, "step": 17585 }, { "epoch": 2.12, "grad_norm": 0.27408894896507263, "learning_rate": 6.938215757109176e-05, "loss": 0.9187, "step": 17590 }, { "epoch": 2.12, "grad_norm": 0.27132144570350647, "learning_rate": 6.929539814716136e-05, "loss": 0.7242, "step": 17595 }, { "epoch": 2.12, "grad_norm": 0.3133727014064789, "learning_rate": 6.920867670549668e-05, "loss": 0.8118, "step": 17600 }, { "epoch": 2.12, "grad_norm": 0.2706137001514435, "learning_rate": 6.912199328691175e-05, "loss": 0.7838, "step": 17605 }, { "epoch": 2.12, "grad_norm": 0.26105305552482605, "learning_rate": 6.90353479322026e-05, "loss": 0.821, "step": 17610 }, { "epoch": 2.12, "grad_norm": 0.26275086402893066, "learning_rate": 6.894874068214751e-05, "loss": 0.8778, "step": 17615 }, { "epoch": 2.12, "grad_norm": 0.28266021609306335, "learning_rate": 6.88621715775067e-05, "loss": 0.91, "step": 17620 }, { "epoch": 2.12, "grad_norm": 0.2602560520172119, "learning_rate": 6.877564065902245e-05, "loss": 0.8879, "step": 17625 }, { "epoch": 2.12, "grad_norm": 0.2614297568798065, "learning_rate": 6.868914796741907e-05, "loss": 0.7482, "step": 17630 }, { "epoch": 2.12, "grad_norm": 0.2839134633541107, "learning_rate": 6.860269354340292e-05, "loss": 0.7748, "step": 17635 }, { "epoch": 2.13, "grad_norm": 0.2632676064968109, "learning_rate": 6.85162774276623e-05, "loss": 0.8696, "step": 17640 }, { "epoch": 2.13, "grad_norm": 0.24448035657405853, "learning_rate": 6.842989966086751e-05, "loss": 0.7791, "step": 17645 }, { "epoch": 2.13, "grad_norm": 0.289100706577301, "learning_rate": 6.834356028367076e-05, "loss": 0.7354, "step": 17650 }, { "epoch": 2.13, "grad_norm": 0.27051296830177307, "learning_rate": 6.82572593367063e-05, "loss": 0.9232, "step": 17655 }, { "epoch": 2.13, "grad_norm": 0.25651922821998596, "learning_rate": 6.81709968605902e-05, "loss": 0.8086, "step": 17660 }, { "epoch": 2.13, "grad_norm": 0.24785958230495453, "learning_rate": 6.808477289592045e-05, "loss": 0.7559, "step": 17665 }, { "epoch": 2.13, "grad_norm": 0.25814223289489746, "learning_rate": 6.799858748327681e-05, "loss": 0.8003, "step": 17670 }, { "epoch": 2.13, "grad_norm": 0.2958534061908722, "learning_rate": 6.791244066322115e-05, "loss": 0.8522, "step": 17675 }, { "epoch": 2.13, "grad_norm": 0.27761539816856384, "learning_rate": 6.782633247629697e-05, "loss": 0.8069, "step": 17680 }, { "epoch": 2.13, "grad_norm": 0.33598434925079346, "learning_rate": 6.774026296302963e-05, "loss": 0.7955, "step": 17685 }, { "epoch": 2.13, "grad_norm": 0.26065829396247864, "learning_rate": 6.765423216392623e-05, "loss": 0.751, "step": 17690 }, { "epoch": 2.13, "grad_norm": 0.2681087553501129, "learning_rate": 6.756824011947586e-05, "loss": 0.7626, "step": 17695 }, { "epoch": 2.13, "grad_norm": 0.27821341156959534, "learning_rate": 6.748228687014915e-05, "loss": 0.6555, "step": 17700 }, { "epoch": 2.13, "grad_norm": 0.2935525178909302, "learning_rate": 6.739637245639858e-05, "loss": 0.8103, "step": 17705 }, { "epoch": 2.13, "grad_norm": 0.29609110951423645, "learning_rate": 6.73104969186583e-05, "loss": 0.8574, "step": 17710 }, { "epoch": 2.13, "grad_norm": 0.30124762654304504, "learning_rate": 6.722466029734422e-05, "loss": 0.865, "step": 17715 }, { "epoch": 2.14, "grad_norm": 0.26538020372390747, "learning_rate": 6.713886263285388e-05, "loss": 0.7681, "step": 17720 }, { "epoch": 2.14, "grad_norm": 0.26841890811920166, "learning_rate": 6.705310396556651e-05, "loss": 0.7141, "step": 17725 }, { "epoch": 2.14, "grad_norm": 0.2693963050842285, "learning_rate": 6.696738433584295e-05, "loss": 0.7318, "step": 17730 }, { "epoch": 2.14, "grad_norm": 0.3165275752544403, "learning_rate": 6.688170378402581e-05, "loss": 0.7613, "step": 17735 }, { "epoch": 2.14, "grad_norm": 0.2781963050365448, "learning_rate": 6.679606235043913e-05, "loss": 0.7928, "step": 17740 }, { "epoch": 2.14, "grad_norm": 0.2980717122554779, "learning_rate": 6.671046007538862e-05, "loss": 0.7451, "step": 17745 }, { "epoch": 2.14, "grad_norm": 0.26822733879089355, "learning_rate": 6.662489699916153e-05, "loss": 0.8043, "step": 17750 }, { "epoch": 2.14, "grad_norm": 0.2781360447406769, "learning_rate": 6.653937316202675e-05, "loss": 0.8289, "step": 17755 }, { "epoch": 2.14, "grad_norm": 0.28398361802101135, "learning_rate": 6.64538886042346e-05, "loss": 0.7195, "step": 17760 }, { "epoch": 2.14, "grad_norm": 0.28372859954833984, "learning_rate": 6.636844336601695e-05, "loss": 0.8084, "step": 17765 }, { "epoch": 2.14, "grad_norm": 0.2855859696865082, "learning_rate": 6.628303748758719e-05, "loss": 0.7861, "step": 17770 }, { "epoch": 2.14, "grad_norm": 0.2785280644893646, "learning_rate": 6.619767100914013e-05, "loss": 0.8347, "step": 17775 }, { "epoch": 2.14, "grad_norm": 0.2754531800746918, "learning_rate": 6.611234397085207e-05, "loss": 0.8999, "step": 17780 }, { "epoch": 2.14, "grad_norm": 0.2457299828529358, "learning_rate": 6.602705641288078e-05, "loss": 0.6812, "step": 17785 }, { "epoch": 2.14, "grad_norm": 0.2855609357357025, "learning_rate": 6.594180837536533e-05, "loss": 0.7006, "step": 17790 }, { "epoch": 2.14, "grad_norm": 0.23807214200496674, "learning_rate": 6.585659989842641e-05, "loss": 0.7315, "step": 17795 }, { "epoch": 2.14, "grad_norm": 0.2731025815010071, "learning_rate": 6.57714310221659e-05, "loss": 0.8854, "step": 17800 }, { "epoch": 2.15, "grad_norm": 0.2559722363948822, "learning_rate": 6.568630178666706e-05, "loss": 0.7751, "step": 17805 }, { "epoch": 2.15, "grad_norm": 0.28620442748069763, "learning_rate": 6.560121223199455e-05, "loss": 0.85, "step": 17810 }, { "epoch": 2.15, "grad_norm": 0.2663728892803192, "learning_rate": 6.551616239819441e-05, "loss": 0.9119, "step": 17815 }, { "epoch": 2.15, "grad_norm": 0.2895793318748474, "learning_rate": 6.543115232529386e-05, "loss": 0.8386, "step": 17820 }, { "epoch": 2.15, "grad_norm": 0.25901633501052856, "learning_rate": 6.534618205330146e-05, "loss": 0.8167, "step": 17825 }, { "epoch": 2.15, "grad_norm": 0.2944163382053375, "learning_rate": 6.526125162220702e-05, "loss": 0.8255, "step": 17830 }, { "epoch": 2.15, "grad_norm": 0.27901577949523926, "learning_rate": 6.51763610719817e-05, "loss": 0.7202, "step": 17835 }, { "epoch": 2.15, "grad_norm": 0.25898268818855286, "learning_rate": 6.509151044257776e-05, "loss": 0.7927, "step": 17840 }, { "epoch": 2.15, "grad_norm": 0.27605336904525757, "learning_rate": 6.500669977392874e-05, "loss": 0.9295, "step": 17845 }, { "epoch": 2.15, "grad_norm": 0.3069280683994293, "learning_rate": 6.492192910594933e-05, "loss": 0.8324, "step": 17850 }, { "epoch": 2.15, "grad_norm": 0.2427961677312851, "learning_rate": 6.483719847853545e-05, "loss": 0.8261, "step": 17855 }, { "epoch": 2.15, "grad_norm": 0.31651127338409424, "learning_rate": 6.475250793156412e-05, "loss": 0.7048, "step": 17860 }, { "epoch": 2.15, "grad_norm": 0.3037257492542267, "learning_rate": 6.466785750489357e-05, "loss": 0.7998, "step": 17865 }, { "epoch": 2.15, "grad_norm": 0.29183289408683777, "learning_rate": 6.458324723836299e-05, "loss": 0.7498, "step": 17870 }, { "epoch": 2.15, "grad_norm": 0.264967679977417, "learning_rate": 6.449867717179293e-05, "loss": 0.844, "step": 17875 }, { "epoch": 2.15, "grad_norm": 0.319900780916214, "learning_rate": 6.44141473449848e-05, "loss": 0.7544, "step": 17880 }, { "epoch": 2.15, "grad_norm": 0.2805964946746826, "learning_rate": 6.432965779772115e-05, "loss": 0.8429, "step": 17885 }, { "epoch": 2.16, "grad_norm": 0.24578483402729034, "learning_rate": 6.424520856976551e-05, "loss": 0.7295, "step": 17890 }, { "epoch": 2.16, "grad_norm": 0.27586978673934937, "learning_rate": 6.416079970086259e-05, "loss": 0.9162, "step": 17895 }, { "epoch": 2.16, "grad_norm": 0.2891899347305298, "learning_rate": 6.407643123073797e-05, "loss": 0.7682, "step": 17900 }, { "epoch": 2.16, "grad_norm": 0.26478612422943115, "learning_rate": 6.399210319909824e-05, "loss": 0.8028, "step": 17905 }, { "epoch": 2.16, "grad_norm": 0.26194480061531067, "learning_rate": 6.390781564563093e-05, "loss": 0.8431, "step": 17910 }, { "epoch": 2.16, "grad_norm": 0.2496415376663208, "learning_rate": 6.382356861000466e-05, "loss": 0.8244, "step": 17915 }, { "epoch": 2.16, "grad_norm": 0.2828780710697174, "learning_rate": 6.373936213186884e-05, "loss": 0.8356, "step": 17920 }, { "epoch": 2.16, "grad_norm": 0.3063983917236328, "learning_rate": 6.365519625085388e-05, "loss": 0.9442, "step": 17925 }, { "epoch": 2.16, "grad_norm": 0.2884746491909027, "learning_rate": 6.357107100657088e-05, "loss": 0.7947, "step": 17930 }, { "epoch": 2.16, "grad_norm": 0.2872295379638672, "learning_rate": 6.348698643861213e-05, "loss": 0.8708, "step": 17935 }, { "epoch": 2.16, "grad_norm": 0.2698356509208679, "learning_rate": 6.340294258655056e-05, "loss": 0.7472, "step": 17940 }, { "epoch": 2.16, "grad_norm": 0.2601388394832611, "learning_rate": 6.331893948994003e-05, "loss": 0.7562, "step": 17945 }, { "epoch": 2.16, "grad_norm": 0.27124738693237305, "learning_rate": 6.32349771883151e-05, "loss": 0.8861, "step": 17950 }, { "epoch": 2.16, "grad_norm": 0.2834950089454651, "learning_rate": 6.315105572119134e-05, "loss": 0.8054, "step": 17955 }, { "epoch": 2.16, "grad_norm": 0.26736146211624146, "learning_rate": 6.306717512806492e-05, "loss": 0.8403, "step": 17960 }, { "epoch": 2.16, "grad_norm": 0.2949734330177307, "learning_rate": 6.298333544841284e-05, "loss": 0.788, "step": 17965 }, { "epoch": 2.17, "grad_norm": 0.2661183178424835, "learning_rate": 6.289953672169276e-05, "loss": 0.7755, "step": 17970 }, { "epoch": 2.17, "grad_norm": 0.23854653537273407, "learning_rate": 6.281577898734329e-05, "loss": 0.7737, "step": 17975 }, { "epoch": 2.17, "grad_norm": 0.27708199620246887, "learning_rate": 6.273206228478351e-05, "loss": 0.7334, "step": 17980 }, { "epoch": 2.17, "grad_norm": 0.25264453887939453, "learning_rate": 6.264838665341331e-05, "loss": 0.796, "step": 17985 }, { "epoch": 2.17, "grad_norm": 0.2900809645652771, "learning_rate": 6.25647521326132e-05, "loss": 0.78, "step": 17990 }, { "epoch": 2.17, "grad_norm": 0.24564304947853088, "learning_rate": 6.248115876174438e-05, "loss": 0.7526, "step": 17995 }, { "epoch": 2.17, "grad_norm": 0.29007548093795776, "learning_rate": 6.239760658014865e-05, "loss": 0.8331, "step": 18000 }, { "epoch": 2.17, "grad_norm": 0.2651544213294983, "learning_rate": 6.231409562714845e-05, "loss": 0.7203, "step": 18005 }, { "epoch": 2.17, "grad_norm": 0.28148752450942993, "learning_rate": 6.223062594204676e-05, "loss": 0.7299, "step": 18010 }, { "epoch": 2.17, "grad_norm": 0.2735466957092285, "learning_rate": 6.214719756412729e-05, "loss": 0.6825, "step": 18015 }, { "epoch": 2.17, "grad_norm": 0.2654740810394287, "learning_rate": 6.206381053265412e-05, "loss": 0.8268, "step": 18020 }, { "epoch": 2.17, "grad_norm": 0.264364093542099, "learning_rate": 6.198046488687201e-05, "loss": 0.7943, "step": 18025 }, { "epoch": 2.17, "grad_norm": 0.2690427601337433, "learning_rate": 6.18971606660061e-05, "loss": 0.7951, "step": 18030 }, { "epoch": 2.17, "grad_norm": 0.3025277853012085, "learning_rate": 6.181389790926224e-05, "loss": 0.7487, "step": 18035 }, { "epoch": 2.17, "grad_norm": 0.2653854489326477, "learning_rate": 6.173067665582659e-05, "loss": 0.8685, "step": 18040 }, { "epoch": 2.17, "grad_norm": 0.2819320559501648, "learning_rate": 6.164749694486579e-05, "loss": 0.769, "step": 18045 }, { "epoch": 2.17, "grad_norm": 0.28492966294288635, "learning_rate": 6.156435881552708e-05, "loss": 0.821, "step": 18050 }, { "epoch": 2.18, "grad_norm": 0.24990375339984894, "learning_rate": 6.148126230693796e-05, "loss": 0.9141, "step": 18055 }, { "epoch": 2.18, "grad_norm": 0.2689098119735718, "learning_rate": 6.139820745820643e-05, "loss": 0.7734, "step": 18060 }, { "epoch": 2.18, "grad_norm": 0.2601291239261627, "learning_rate": 6.131519430842083e-05, "loss": 0.7885, "step": 18065 }, { "epoch": 2.18, "grad_norm": 0.26659342646598816, "learning_rate": 6.123222289664993e-05, "loss": 0.8574, "step": 18070 }, { "epoch": 2.18, "grad_norm": 0.260973185300827, "learning_rate": 6.114929326194281e-05, "loss": 0.7629, "step": 18075 }, { "epoch": 2.18, "grad_norm": 0.2958158850669861, "learning_rate": 6.106640544332894e-05, "loss": 0.8406, "step": 18080 }, { "epoch": 2.18, "grad_norm": 0.27698177099227905, "learning_rate": 6.0983559479817986e-05, "loss": 0.7944, "step": 18085 }, { "epoch": 2.18, "grad_norm": 0.26226866245269775, "learning_rate": 6.090075541040015e-05, "loss": 0.761, "step": 18090 }, { "epoch": 2.18, "grad_norm": 0.28929901123046875, "learning_rate": 6.08179932740457e-05, "loss": 0.7879, "step": 18095 }, { "epoch": 2.18, "grad_norm": 0.2864818871021271, "learning_rate": 6.073527310970527e-05, "loss": 0.7526, "step": 18100 }, { "epoch": 2.18, "grad_norm": 0.28640732169151306, "learning_rate": 6.065259495630966e-05, "loss": 0.7588, "step": 18105 }, { "epoch": 2.18, "grad_norm": 0.25879958271980286, "learning_rate": 6.0569958852770026e-05, "loss": 0.8613, "step": 18110 }, { "epoch": 2.18, "grad_norm": 0.26180025935173035, "learning_rate": 6.048736483797765e-05, "loss": 0.7988, "step": 18115 }, { "epoch": 2.18, "grad_norm": 0.28200724720954895, "learning_rate": 6.040481295080402e-05, "loss": 0.7577, "step": 18120 }, { "epoch": 2.18, "grad_norm": 0.27897992730140686, "learning_rate": 6.0322303230100706e-05, "loss": 0.7339, "step": 18125 }, { "epoch": 2.18, "grad_norm": 0.2738398313522339, "learning_rate": 6.0239835714699656e-05, "loss": 0.7929, "step": 18130 }, { "epoch": 2.19, "grad_norm": 0.2769078016281128, "learning_rate": 6.015741044341282e-05, "loss": 0.8469, "step": 18135 }, { "epoch": 2.19, "grad_norm": 0.2504939138889313, "learning_rate": 6.0075027455032154e-05, "loss": 0.806, "step": 18140 }, { "epoch": 2.19, "grad_norm": 0.27813395857810974, "learning_rate": 5.999268678832982e-05, "loss": 0.8322, "step": 18145 }, { "epoch": 2.19, "grad_norm": 0.25341561436653137, "learning_rate": 5.9910388482058196e-05, "loss": 0.8823, "step": 18150 }, { "epoch": 2.19, "grad_norm": 0.2485748827457428, "learning_rate": 5.982813257494954e-05, "loss": 0.7445, "step": 18155 }, { "epoch": 2.19, "grad_norm": 0.2577187120914459, "learning_rate": 5.9745919105716195e-05, "loss": 0.7756, "step": 18160 }, { "epoch": 2.19, "grad_norm": 0.28886187076568604, "learning_rate": 5.966374811305051e-05, "loss": 0.8127, "step": 18165 }, { "epoch": 2.19, "grad_norm": 0.27744728326797485, "learning_rate": 5.9581619635625014e-05, "loss": 0.815, "step": 18170 }, { "epoch": 2.19, "grad_norm": 0.2894884943962097, "learning_rate": 5.9499533712092e-05, "loss": 0.8005, "step": 18175 }, { "epoch": 2.19, "grad_norm": 0.3021494448184967, "learning_rate": 5.941749038108385e-05, "loss": 0.8278, "step": 18180 }, { "epoch": 2.19, "grad_norm": 0.2534330189228058, "learning_rate": 5.9335489681212835e-05, "loss": 0.8549, "step": 18185 }, { "epoch": 2.19, "grad_norm": 0.3106016516685486, "learning_rate": 5.92535316510713e-05, "loss": 0.6661, "step": 18190 }, { "epoch": 2.19, "grad_norm": 0.2976013422012329, "learning_rate": 5.9171616329231364e-05, "loss": 0.824, "step": 18195 }, { "epoch": 2.19, "grad_norm": 0.2501593232154846, "learning_rate": 5.90897437542451e-05, "loss": 0.787, "step": 18200 }, { "epoch": 2.19, "grad_norm": 0.28381872177124023, "learning_rate": 5.900791396464445e-05, "loss": 0.7354, "step": 18205 }, { "epoch": 2.19, "grad_norm": 0.259772390127182, "learning_rate": 5.892612699894127e-05, "loss": 0.789, "step": 18210 }, { "epoch": 2.19, "grad_norm": 0.24163563549518585, "learning_rate": 5.884438289562717e-05, "loss": 0.8713, "step": 18215 }, { "epoch": 2.2, "grad_norm": 0.2834208011627197, "learning_rate": 5.8762681693173675e-05, "loss": 0.841, "step": 18220 }, { "epoch": 2.2, "grad_norm": 0.2929078936576843, "learning_rate": 5.868102343003201e-05, "loss": 0.7945, "step": 18225 }, { "epoch": 2.2, "grad_norm": 0.30455702543258667, "learning_rate": 5.8599408144633405e-05, "loss": 0.7069, "step": 18230 }, { "epoch": 2.2, "grad_norm": 0.3452713191509247, "learning_rate": 5.851783587538863e-05, "loss": 0.8092, "step": 18235 }, { "epoch": 2.2, "grad_norm": 0.26134809851646423, "learning_rate": 5.843630666068832e-05, "loss": 0.8087, "step": 18240 }, { "epoch": 2.2, "grad_norm": 0.30902811884880066, "learning_rate": 5.835482053890278e-05, "loss": 0.8481, "step": 18245 }, { "epoch": 2.2, "grad_norm": 0.31992846727371216, "learning_rate": 5.827337754838218e-05, "loss": 0.7905, "step": 18250 }, { "epoch": 2.2, "grad_norm": 0.2618788182735443, "learning_rate": 5.819197772745627e-05, "loss": 0.7629, "step": 18255 }, { "epoch": 2.2, "grad_norm": 0.28527355194091797, "learning_rate": 5.811062111443447e-05, "loss": 0.826, "step": 18260 }, { "epoch": 2.2, "grad_norm": 0.31207770109176636, "learning_rate": 5.8029307747605905e-05, "loss": 0.7721, "step": 18265 }, { "epoch": 2.2, "grad_norm": 0.250207781791687, "learning_rate": 5.794803766523939e-05, "loss": 0.7803, "step": 18270 }, { "epoch": 2.2, "grad_norm": 0.29229018092155457, "learning_rate": 5.786681090558332e-05, "loss": 0.8023, "step": 18275 }, { "epoch": 2.2, "grad_norm": 0.273379385471344, "learning_rate": 5.778562750686568e-05, "loss": 0.7844, "step": 18280 }, { "epoch": 2.2, "grad_norm": 0.27281737327575684, "learning_rate": 5.770448750729408e-05, "loss": 0.8071, "step": 18285 }, { "epoch": 2.2, "grad_norm": 0.2896445095539093, "learning_rate": 5.762339094505569e-05, "loss": 0.7641, "step": 18290 }, { "epoch": 2.2, "grad_norm": 0.2542775571346283, "learning_rate": 5.7542337858317257e-05, "loss": 0.8674, "step": 18295 }, { "epoch": 2.2, "grad_norm": 0.2890925705432892, "learning_rate": 5.746132828522506e-05, "loss": 0.7799, "step": 18300 }, { "epoch": 2.21, "grad_norm": 0.303602397441864, "learning_rate": 5.738036226390483e-05, "loss": 0.7872, "step": 18305 }, { "epoch": 2.21, "grad_norm": 0.31776705384254456, "learning_rate": 5.729943983246198e-05, "loss": 0.8462, "step": 18310 }, { "epoch": 2.21, "grad_norm": 0.2585162818431854, "learning_rate": 5.721856102898121e-05, "loss": 0.8747, "step": 18315 }, { "epoch": 2.21, "grad_norm": 0.2587442696094513, "learning_rate": 5.713772589152682e-05, "loss": 0.8068, "step": 18320 }, { "epoch": 2.21, "grad_norm": 0.26109957695007324, "learning_rate": 5.705693445814243e-05, "loss": 0.7185, "step": 18325 }, { "epoch": 2.21, "grad_norm": 0.27592533826828003, "learning_rate": 5.697618676685127e-05, "loss": 0.8076, "step": 18330 }, { "epoch": 2.21, "grad_norm": 0.26613497734069824, "learning_rate": 5.689548285565585e-05, "loss": 0.803, "step": 18335 }, { "epoch": 2.21, "grad_norm": 0.2756407558917999, "learning_rate": 5.681482276253811e-05, "loss": 0.836, "step": 18340 }, { "epoch": 2.21, "grad_norm": 0.2865229845046997, "learning_rate": 5.6734206525459355e-05, "loss": 0.7962, "step": 18345 }, { "epoch": 2.21, "grad_norm": 0.2565017342567444, "learning_rate": 5.6653634182360267e-05, "loss": 0.8133, "step": 18350 }, { "epoch": 2.21, "grad_norm": 0.2665974497795105, "learning_rate": 5.6573105771160875e-05, "loss": 0.7833, "step": 18355 }, { "epoch": 2.21, "grad_norm": 0.3112146556377411, "learning_rate": 5.6492621329760524e-05, "loss": 0.8103, "step": 18360 }, { "epoch": 2.21, "grad_norm": 0.299140602350235, "learning_rate": 5.641218089603779e-05, "loss": 0.7744, "step": 18365 }, { "epoch": 2.21, "grad_norm": 0.25373366475105286, "learning_rate": 5.6331784507850744e-05, "loss": 0.8986, "step": 18370 }, { "epoch": 2.21, "grad_norm": 0.27519750595092773, "learning_rate": 5.6251432203036544e-05, "loss": 0.791, "step": 18375 }, { "epoch": 2.21, "grad_norm": 0.253791481256485, "learning_rate": 5.617112401941163e-05, "loss": 0.7265, "step": 18380 }, { "epoch": 2.22, "grad_norm": 0.26284411549568176, "learning_rate": 5.609085999477166e-05, "loss": 0.8349, "step": 18385 }, { "epoch": 2.22, "grad_norm": 0.26994234323501587, "learning_rate": 5.601064016689165e-05, "loss": 0.7947, "step": 18390 }, { "epoch": 2.22, "grad_norm": 0.3026762902736664, "learning_rate": 5.593046457352568e-05, "loss": 0.8011, "step": 18395 }, { "epoch": 2.22, "grad_norm": 0.2768292725086212, "learning_rate": 5.585033325240704e-05, "loss": 0.7135, "step": 18400 }, { "epoch": 2.22, "grad_norm": 0.28852131962776184, "learning_rate": 5.5770246241248125e-05, "loss": 0.8516, "step": 18405 }, { "epoch": 2.22, "grad_norm": 0.28244367241859436, "learning_rate": 5.5690203577740654e-05, "loss": 0.7431, "step": 18410 }, { "epoch": 2.22, "grad_norm": 0.2686423361301422, "learning_rate": 5.561020529955531e-05, "loss": 0.7382, "step": 18415 }, { "epoch": 2.22, "grad_norm": 0.2730972170829773, "learning_rate": 5.5530251444341936e-05, "loss": 0.8299, "step": 18420 }, { "epoch": 2.22, "grad_norm": 0.26289600133895874, "learning_rate": 5.5450342049729475e-05, "loss": 0.7141, "step": 18425 }, { "epoch": 2.22, "grad_norm": 0.2697264552116394, "learning_rate": 5.5370477153325945e-05, "loss": 0.7229, "step": 18430 }, { "epoch": 2.22, "grad_norm": 0.2662758231163025, "learning_rate": 5.529065679271841e-05, "loss": 0.8089, "step": 18435 }, { "epoch": 2.22, "grad_norm": 0.2873300313949585, "learning_rate": 5.5210881005473e-05, "loss": 0.8195, "step": 18440 }, { "epoch": 2.22, "grad_norm": 0.29123494029045105, "learning_rate": 5.513114982913479e-05, "loss": 0.8099, "step": 18445 }, { "epoch": 2.22, "grad_norm": 0.2564477324485779, "learning_rate": 5.5051463301228e-05, "loss": 0.675, "step": 18450 }, { "epoch": 2.22, "grad_norm": 0.29058778285980225, "learning_rate": 5.497182145925575e-05, "loss": 0.7853, "step": 18455 }, { "epoch": 2.22, "grad_norm": 0.32175374031066895, "learning_rate": 5.489222434070009e-05, "loss": 0.8176, "step": 18460 }, { "epoch": 2.22, "grad_norm": 0.291685551404953, "learning_rate": 5.4812671983022046e-05, "loss": 0.7884, "step": 18465 }, { "epoch": 2.23, "grad_norm": 0.25728997588157654, "learning_rate": 5.473316442366167e-05, "loss": 0.824, "step": 18470 }, { "epoch": 2.23, "grad_norm": 0.2623702883720398, "learning_rate": 5.465370170003785e-05, "loss": 0.8171, "step": 18475 }, { "epoch": 2.23, "grad_norm": 0.24209091067314148, "learning_rate": 5.4574283849548354e-05, "loss": 0.8101, "step": 18480 }, { "epoch": 2.23, "grad_norm": 0.2494385689496994, "learning_rate": 5.449491090956982e-05, "loss": 0.7616, "step": 18485 }, { "epoch": 2.23, "grad_norm": 0.307625949382782, "learning_rate": 5.44155829174579e-05, "loss": 0.8873, "step": 18490 }, { "epoch": 2.23, "grad_norm": 0.27069059014320374, "learning_rate": 5.433629991054691e-05, "loss": 0.8159, "step": 18495 }, { "epoch": 2.23, "grad_norm": 0.2697925567626953, "learning_rate": 5.425706192615007e-05, "loss": 0.8165, "step": 18500 }, { "epoch": 2.23, "grad_norm": 0.2592358887195587, "learning_rate": 5.417786900155942e-05, "loss": 0.7355, "step": 18505 }, { "epoch": 2.23, "grad_norm": 0.28814736008644104, "learning_rate": 5.409872117404577e-05, "loss": 0.8358, "step": 18510 }, { "epoch": 2.23, "grad_norm": 0.2974863350391388, "learning_rate": 5.401961848085871e-05, "loss": 0.7225, "step": 18515 }, { "epoch": 2.23, "grad_norm": 0.28759467601776123, "learning_rate": 5.394056095922662e-05, "loss": 0.74, "step": 18520 }, { "epoch": 2.23, "grad_norm": 0.3033956289291382, "learning_rate": 5.3861548646356514e-05, "loss": 0.7645, "step": 18525 }, { "epoch": 2.23, "grad_norm": 0.2899126410484314, "learning_rate": 5.3782581579434325e-05, "loss": 0.7309, "step": 18530 }, { "epoch": 2.23, "grad_norm": 0.2775956690311432, "learning_rate": 5.370365979562453e-05, "loss": 0.8238, "step": 18535 }, { "epoch": 2.23, "grad_norm": 0.2776409983634949, "learning_rate": 5.362478333207034e-05, "loss": 0.84, "step": 18540 }, { "epoch": 2.23, "grad_norm": 0.27701374888420105, "learning_rate": 5.354595222589358e-05, "loss": 0.7884, "step": 18545 }, { "epoch": 2.24, "grad_norm": 0.3034186065196991, "learning_rate": 5.34671665141949e-05, "loss": 0.7565, "step": 18550 }, { "epoch": 2.24, "grad_norm": 0.2622527480125427, "learning_rate": 5.3388426234053414e-05, "loss": 0.7612, "step": 18555 }, { "epoch": 2.24, "grad_norm": 0.28101763129234314, "learning_rate": 5.330973142252691e-05, "loss": 0.7192, "step": 18560 }, { "epoch": 2.24, "grad_norm": 0.2687520384788513, "learning_rate": 5.323108211665178e-05, "loss": 0.8082, "step": 18565 }, { "epoch": 2.24, "grad_norm": 0.31704583764076233, "learning_rate": 5.3152478353443006e-05, "loss": 0.6957, "step": 18570 }, { "epoch": 2.24, "grad_norm": 0.26655253767967224, "learning_rate": 5.307392016989413e-05, "loss": 0.8078, "step": 18575 }, { "epoch": 2.24, "grad_norm": 0.27396515011787415, "learning_rate": 5.299540760297717e-05, "loss": 0.8209, "step": 18580 }, { "epoch": 2.24, "grad_norm": 0.24590495228767395, "learning_rate": 5.2916940689642887e-05, "loss": 0.8348, "step": 18585 }, { "epoch": 2.24, "grad_norm": 0.23811030387878418, "learning_rate": 5.283851946682033e-05, "loss": 0.771, "step": 18590 }, { "epoch": 2.24, "grad_norm": 0.2999459505081177, "learning_rate": 5.2760143971417155e-05, "loss": 0.7364, "step": 18595 }, { "epoch": 2.24, "grad_norm": 0.257988840341568, "learning_rate": 5.2681814240319415e-05, "loss": 0.7833, "step": 18600 }, { "epoch": 2.24, "grad_norm": 0.27727702260017395, "learning_rate": 5.260353031039177e-05, "loss": 0.8248, "step": 18605 }, { "epoch": 2.24, "grad_norm": 0.24502165615558624, "learning_rate": 5.252529221847719e-05, "loss": 0.7083, "step": 18610 }, { "epoch": 2.24, "grad_norm": 0.2822337746620178, "learning_rate": 5.244710000139714e-05, "loss": 0.8521, "step": 18615 }, { "epoch": 2.24, "grad_norm": 0.2731589674949646, "learning_rate": 5.2368953695951405e-05, "loss": 0.8219, "step": 18620 }, { "epoch": 2.24, "grad_norm": 0.2679608464241028, "learning_rate": 5.229085333891834e-05, "loss": 0.73, "step": 18625 }, { "epoch": 2.24, "grad_norm": 0.2607372999191284, "learning_rate": 5.221279896705452e-05, "loss": 0.7845, "step": 18630 }, { "epoch": 2.25, "grad_norm": 0.2670726478099823, "learning_rate": 5.213479061709492e-05, "loss": 0.8545, "step": 18635 }, { "epoch": 2.25, "grad_norm": 0.29974955320358276, "learning_rate": 5.2056828325752855e-05, "loss": 0.8346, "step": 18640 }, { "epoch": 2.25, "grad_norm": 0.2804435193538666, "learning_rate": 5.1978912129719956e-05, "loss": 0.7781, "step": 18645 }, { "epoch": 2.25, "grad_norm": 0.27671873569488525, "learning_rate": 5.1901042065666214e-05, "loss": 0.8077, "step": 18650 }, { "epoch": 2.25, "grad_norm": 0.28457868099212646, "learning_rate": 5.182321817023983e-05, "loss": 0.8014, "step": 18655 }, { "epoch": 2.25, "grad_norm": 0.2779267728328705, "learning_rate": 5.174544048006726e-05, "loss": 0.8311, "step": 18660 }, { "epoch": 2.25, "grad_norm": 0.26867663860321045, "learning_rate": 5.1667709031753406e-05, "loss": 0.7116, "step": 18665 }, { "epoch": 2.25, "grad_norm": 0.27532848715782166, "learning_rate": 5.159002386188118e-05, "loss": 0.9608, "step": 18670 }, { "epoch": 2.25, "grad_norm": 0.2576589286327362, "learning_rate": 5.151238500701184e-05, "loss": 0.7806, "step": 18675 }, { "epoch": 2.25, "grad_norm": 0.29367679357528687, "learning_rate": 5.1434792503684716e-05, "loss": 0.7674, "step": 18680 }, { "epoch": 2.25, "grad_norm": 0.2842334806919098, "learning_rate": 5.135724638841755e-05, "loss": 0.7849, "step": 18685 }, { "epoch": 2.25, "grad_norm": 0.305401474237442, "learning_rate": 5.1279746697706055e-05, "loss": 0.7973, "step": 18690 }, { "epoch": 2.25, "grad_norm": 0.2906571328639984, "learning_rate": 5.120229346802416e-05, "loss": 0.808, "step": 18695 }, { "epoch": 2.25, "grad_norm": 0.2675495445728302, "learning_rate": 5.112488673582389e-05, "loss": 0.7582, "step": 18700 }, { "epoch": 2.25, "grad_norm": 0.2715294063091278, "learning_rate": 5.1047526537535504e-05, "loss": 0.779, "step": 18705 }, { "epoch": 2.25, "grad_norm": 0.29356223344802856, "learning_rate": 5.0970212909567236e-05, "loss": 0.8705, "step": 18710 }, { "epoch": 2.25, "grad_norm": 0.2562316656112671, "learning_rate": 5.0892945888305446e-05, "loss": 0.7458, "step": 18715 }, { "epoch": 2.26, "grad_norm": 0.31263595819473267, "learning_rate": 5.081572551011458e-05, "loss": 0.8115, "step": 18720 }, { "epoch": 2.26, "grad_norm": 0.31803205609321594, "learning_rate": 5.07385518113371e-05, "loss": 0.7623, "step": 18725 }, { "epoch": 2.26, "grad_norm": 0.25509113073349, "learning_rate": 5.06614248282935e-05, "loss": 0.8839, "step": 18730 }, { "epoch": 2.26, "grad_norm": 0.2617010474205017, "learning_rate": 5.058434459728229e-05, "loss": 0.7858, "step": 18735 }, { "epoch": 2.26, "grad_norm": 0.29557377099990845, "learning_rate": 5.0507311154579976e-05, "loss": 0.8331, "step": 18740 }, { "epoch": 2.26, "grad_norm": 0.28714311122894287, "learning_rate": 5.043032453644113e-05, "loss": 0.7831, "step": 18745 }, { "epoch": 2.26, "grad_norm": 0.27089929580688477, "learning_rate": 5.035338477909817e-05, "loss": 0.8846, "step": 18750 }, { "epoch": 2.26, "grad_norm": 0.2682959735393524, "learning_rate": 5.027649191876147e-05, "loss": 0.7482, "step": 18755 }, { "epoch": 2.26, "grad_norm": 0.26213210821151733, "learning_rate": 5.019964599161935e-05, "loss": 0.7949, "step": 18760 }, { "epoch": 2.26, "grad_norm": 0.30847010016441345, "learning_rate": 5.0122847033838156e-05, "loss": 0.7751, "step": 18765 }, { "epoch": 2.26, "grad_norm": 0.25752073526382446, "learning_rate": 5.004609508156196e-05, "loss": 0.8719, "step": 18770 }, { "epoch": 2.26, "grad_norm": 0.2857056260108948, "learning_rate": 4.996939017091278e-05, "loss": 0.7445, "step": 18775 }, { "epoch": 2.26, "grad_norm": 0.2874182164669037, "learning_rate": 4.989273233799051e-05, "loss": 0.7717, "step": 18780 }, { "epoch": 2.26, "grad_norm": 0.34927722811698914, "learning_rate": 4.981612161887285e-05, "loss": 0.7717, "step": 18785 }, { "epoch": 2.26, "grad_norm": 0.2875741422176361, "learning_rate": 4.973955804961536e-05, "loss": 0.8041, "step": 18790 }, { "epoch": 2.26, "grad_norm": 0.24138030409812927, "learning_rate": 4.966304166625139e-05, "loss": 0.8356, "step": 18795 }, { "epoch": 2.27, "grad_norm": 0.28111588954925537, "learning_rate": 4.958657250479208e-05, "loss": 0.7551, "step": 18800 }, { "epoch": 2.27, "grad_norm": 0.2540626525878906, "learning_rate": 4.9510150601226394e-05, "loss": 0.777, "step": 18805 }, { "epoch": 2.27, "grad_norm": 0.26399436593055725, "learning_rate": 4.9433775991521006e-05, "loss": 0.8436, "step": 18810 }, { "epoch": 2.27, "grad_norm": 0.27439209818840027, "learning_rate": 4.9357448711620345e-05, "loss": 0.7735, "step": 18815 }, { "epoch": 2.27, "grad_norm": 0.2720988988876343, "learning_rate": 4.9281168797446504e-05, "loss": 0.8395, "step": 18820 }, { "epoch": 2.27, "grad_norm": 0.30142033100128174, "learning_rate": 4.920493628489946e-05, "loss": 0.865, "step": 18825 }, { "epoch": 2.27, "grad_norm": 0.27852845191955566, "learning_rate": 4.9128751209856684e-05, "loss": 0.8549, "step": 18830 }, { "epoch": 2.27, "grad_norm": 0.2861536741256714, "learning_rate": 4.9052613608173425e-05, "loss": 0.77, "step": 18835 }, { "epoch": 2.27, "grad_norm": 0.26632633805274963, "learning_rate": 4.8976523515682505e-05, "loss": 0.8792, "step": 18840 }, { "epoch": 2.27, "grad_norm": 0.25601017475128174, "learning_rate": 4.890048096819456e-05, "loss": 0.6669, "step": 18845 }, { "epoch": 2.27, "grad_norm": 0.29297733306884766, "learning_rate": 4.882448600149767e-05, "loss": 0.8335, "step": 18850 }, { "epoch": 2.27, "grad_norm": 0.27074167132377625, "learning_rate": 4.874853865135761e-05, "loss": 0.7719, "step": 18855 }, { "epoch": 2.27, "grad_norm": 0.2736327648162842, "learning_rate": 4.8672638953517724e-05, "loss": 0.7569, "step": 18860 }, { "epoch": 2.27, "grad_norm": 0.2467767298221588, "learning_rate": 4.859678694369892e-05, "loss": 0.8277, "step": 18865 }, { "epoch": 2.27, "grad_norm": 0.26508867740631104, "learning_rate": 4.852098265759969e-05, "loss": 0.7552, "step": 18870 }, { "epoch": 2.27, "grad_norm": 0.295194149017334, "learning_rate": 4.844522613089601e-05, "loss": 0.7934, "step": 18875 }, { "epoch": 2.27, "grad_norm": 0.2907750606536865, "learning_rate": 4.836951739924141e-05, "loss": 0.8823, "step": 18880 }, { "epoch": 2.28, "grad_norm": 0.252069890499115, "learning_rate": 4.829385649826702e-05, "loss": 0.8101, "step": 18885 }, { "epoch": 2.28, "grad_norm": 0.263295978307724, "learning_rate": 4.82182434635813e-05, "loss": 0.8344, "step": 18890 }, { "epoch": 2.28, "grad_norm": 0.3141554892063141, "learning_rate": 4.814267833077029e-05, "loss": 0.8552, "step": 18895 }, { "epoch": 2.28, "grad_norm": 0.2977995276451111, "learning_rate": 4.806716113539737e-05, "loss": 0.7844, "step": 18900 }, { "epoch": 2.28, "grad_norm": 0.28713858127593994, "learning_rate": 4.799169191300357e-05, "loss": 0.7359, "step": 18905 }, { "epoch": 2.28, "grad_norm": 0.265524297952652, "learning_rate": 4.791627069910713e-05, "loss": 0.7948, "step": 18910 }, { "epoch": 2.28, "grad_norm": 0.26660722494125366, "learning_rate": 4.784089752920381e-05, "loss": 0.7537, "step": 18915 }, { "epoch": 2.28, "grad_norm": 0.3201920688152313, "learning_rate": 4.776557243876665e-05, "loss": 0.6509, "step": 18920 }, { "epoch": 2.28, "grad_norm": 0.25317713618278503, "learning_rate": 4.7690295463246255e-05, "loss": 0.8217, "step": 18925 }, { "epoch": 2.28, "grad_norm": 0.2925649881362915, "learning_rate": 4.761506663807047e-05, "loss": 0.8093, "step": 18930 }, { "epoch": 2.28, "grad_norm": 0.26825374364852905, "learning_rate": 4.7539885998644365e-05, "loss": 0.7968, "step": 18935 }, { "epoch": 2.28, "grad_norm": 0.2798418402671814, "learning_rate": 4.746475358035046e-05, "loss": 0.8392, "step": 18940 }, { "epoch": 2.28, "grad_norm": 0.25741979479789734, "learning_rate": 4.738966941854866e-05, "loss": 0.7922, "step": 18945 }, { "epoch": 2.28, "grad_norm": 0.27443379163742065, "learning_rate": 4.731463354857602e-05, "loss": 0.8369, "step": 18950 }, { "epoch": 2.28, "grad_norm": 0.2702893316745758, "learning_rate": 4.7239646005746895e-05, "loss": 0.7611, "step": 18955 }, { "epoch": 2.28, "grad_norm": 0.38152143359184265, "learning_rate": 4.716470682535289e-05, "loss": 0.8706, "step": 18960 }, { "epoch": 2.29, "grad_norm": 0.26766619086265564, "learning_rate": 4.708981604266296e-05, "loss": 0.8218, "step": 18965 }, { "epoch": 2.29, "grad_norm": 0.25141772627830505, "learning_rate": 4.701497369292313e-05, "loss": 0.8075, "step": 18970 }, { "epoch": 2.29, "grad_norm": 0.264624685049057, "learning_rate": 4.694017981135671e-05, "loss": 0.7692, "step": 18975 }, { "epoch": 2.29, "grad_norm": 0.2943498194217682, "learning_rate": 4.6865434433164125e-05, "loss": 0.7622, "step": 18980 }, { "epoch": 2.29, "grad_norm": 0.26615047454833984, "learning_rate": 4.679073759352315e-05, "loss": 0.7964, "step": 18985 }, { "epoch": 2.29, "grad_norm": 0.24245823919773102, "learning_rate": 4.671608932758853e-05, "loss": 0.7825, "step": 18990 }, { "epoch": 2.29, "grad_norm": 0.2933671772480011, "learning_rate": 4.664148967049221e-05, "loss": 0.8122, "step": 18995 }, { "epoch": 2.29, "grad_norm": 0.2749022841453552, "learning_rate": 4.656693865734331e-05, "loss": 0.7729, "step": 19000 }, { "epoch": 2.29, "grad_norm": 0.268289178609848, "learning_rate": 4.649243632322796e-05, "loss": 0.793, "step": 19005 }, { "epoch": 2.29, "grad_norm": 0.28177735209465027, "learning_rate": 4.641798270320948e-05, "loss": 0.8523, "step": 19010 }, { "epoch": 2.29, "grad_norm": 0.262329638004303, "learning_rate": 4.6343577832328176e-05, "loss": 0.8208, "step": 19015 }, { "epoch": 2.29, "grad_norm": 0.31374141573905945, "learning_rate": 4.626922174560142e-05, "loss": 0.8154, "step": 19020 }, { "epoch": 2.29, "grad_norm": 0.28644031286239624, "learning_rate": 4.6194914478023754e-05, "loss": 0.8359, "step": 19025 }, { "epoch": 2.29, "grad_norm": 0.2689576745033264, "learning_rate": 4.6120656064566604e-05, "loss": 0.9421, "step": 19030 }, { "epoch": 2.29, "grad_norm": 0.25812458992004395, "learning_rate": 4.604644654017843e-05, "loss": 0.8141, "step": 19035 }, { "epoch": 2.29, "grad_norm": 0.2505223751068115, "learning_rate": 4.597228593978464e-05, "loss": 0.8084, "step": 19040 }, { "epoch": 2.29, "grad_norm": 0.2971956431865692, "learning_rate": 4.589817429828781e-05, "loss": 0.7819, "step": 19045 }, { "epoch": 2.3, "grad_norm": 0.274215430021286, "learning_rate": 4.5824111650567264e-05, "loss": 0.7431, "step": 19050 }, { "epoch": 2.3, "grad_norm": 0.2821873724460602, "learning_rate": 4.575009803147929e-05, "loss": 0.8117, "step": 19055 }, { "epoch": 2.3, "grad_norm": 0.2926546037197113, "learning_rate": 4.567613347585727e-05, "loss": 0.8768, "step": 19060 }, { "epoch": 2.3, "grad_norm": 0.3123989701271057, "learning_rate": 4.560221801851133e-05, "loss": 0.7407, "step": 19065 }, { "epoch": 2.3, "grad_norm": 0.3110482394695282, "learning_rate": 4.552835169422854e-05, "loss": 0.7504, "step": 19070 }, { "epoch": 2.3, "grad_norm": 0.2745916545391083, "learning_rate": 4.5454534537772825e-05, "loss": 0.6989, "step": 19075 }, { "epoch": 2.3, "grad_norm": 0.2758617401123047, "learning_rate": 4.5380766583885016e-05, "loss": 0.7739, "step": 19080 }, { "epoch": 2.3, "grad_norm": 0.2954859733581543, "learning_rate": 4.5307047867282734e-05, "loss": 0.838, "step": 19085 }, { "epoch": 2.3, "grad_norm": 0.285125732421875, "learning_rate": 4.523337842266047e-05, "loss": 0.7301, "step": 19090 }, { "epoch": 2.3, "grad_norm": 0.2881282567977905, "learning_rate": 4.515975828468949e-05, "loss": 0.7589, "step": 19095 }, { "epoch": 2.3, "grad_norm": 0.30516520142555237, "learning_rate": 4.508618748801793e-05, "loss": 0.8142, "step": 19100 }, { "epoch": 2.3, "grad_norm": 0.315054714679718, "learning_rate": 4.50126660672706e-05, "loss": 0.7316, "step": 19105 }, { "epoch": 2.3, "grad_norm": 0.2825736999511719, "learning_rate": 4.493919405704917e-05, "loss": 0.777, "step": 19110 }, { "epoch": 2.3, "grad_norm": 0.2859044671058655, "learning_rate": 4.486577149193191e-05, "loss": 0.7306, "step": 19115 }, { "epoch": 2.3, "grad_norm": 0.29017677903175354, "learning_rate": 4.479239840647405e-05, "loss": 0.8264, "step": 19120 }, { "epoch": 2.3, "grad_norm": 0.28172457218170166, "learning_rate": 4.471907483520732e-05, "loss": 0.7622, "step": 19125 }, { "epoch": 2.3, "grad_norm": 0.28697964549064636, "learning_rate": 4.464580081264026e-05, "loss": 0.8655, "step": 19130 }, { "epoch": 2.31, "grad_norm": 0.290617972612381, "learning_rate": 4.4572576373257986e-05, "loss": 0.7366, "step": 19135 }, { "epoch": 2.31, "grad_norm": 0.2692755460739136, "learning_rate": 4.4499401551522504e-05, "loss": 0.7704, "step": 19140 }, { "epoch": 2.31, "grad_norm": 0.3180653750896454, "learning_rate": 4.442627638187216e-05, "loss": 0.7775, "step": 19145 }, { "epoch": 2.31, "grad_norm": 0.3238975703716278, "learning_rate": 4.435320089872217e-05, "loss": 0.7988, "step": 19150 }, { "epoch": 2.31, "grad_norm": 0.2899223864078522, "learning_rate": 4.428017513646418e-05, "loss": 0.7712, "step": 19155 }, { "epoch": 2.31, "grad_norm": 0.2541576027870178, "learning_rate": 4.4207199129466685e-05, "loss": 0.8302, "step": 19160 }, { "epoch": 2.31, "grad_norm": 0.3092675805091858, "learning_rate": 4.4134272912074546e-05, "loss": 0.7243, "step": 19165 }, { "epoch": 2.31, "grad_norm": 0.2940780818462372, "learning_rate": 4.406139651860928e-05, "loss": 0.7353, "step": 19170 }, { "epoch": 2.31, "grad_norm": 0.27896803617477417, "learning_rate": 4.398856998336885e-05, "loss": 0.7755, "step": 19175 }, { "epoch": 2.31, "grad_norm": 0.275955468416214, "learning_rate": 4.391579334062798e-05, "loss": 0.6845, "step": 19180 }, { "epoch": 2.31, "grad_norm": 0.27416709065437317, "learning_rate": 4.3843066624637705e-05, "loss": 0.8806, "step": 19185 }, { "epoch": 2.31, "grad_norm": 0.2871681749820709, "learning_rate": 4.3770389869625654e-05, "loss": 0.8377, "step": 19190 }, { "epoch": 2.31, "grad_norm": 0.2698437571525574, "learning_rate": 4.3697763109795845e-05, "loss": 0.7839, "step": 19195 }, { "epoch": 2.31, "grad_norm": 0.2793125510215759, "learning_rate": 4.362518637932896e-05, "loss": 0.7334, "step": 19200 }, { "epoch": 2.31, "grad_norm": 0.26299965381622314, "learning_rate": 4.3552659712381965e-05, "loss": 0.7914, "step": 19205 }, { "epoch": 2.31, "grad_norm": 0.29598289728164673, "learning_rate": 4.34801831430883e-05, "loss": 0.7296, "step": 19210 }, { "epoch": 2.32, "grad_norm": 0.30345529317855835, "learning_rate": 4.340775670555787e-05, "loss": 0.7574, "step": 19215 }, { "epoch": 2.32, "grad_norm": 0.27294403314590454, "learning_rate": 4.333538043387695e-05, "loss": 0.8409, "step": 19220 }, { "epoch": 2.32, "grad_norm": 0.2598150968551636, "learning_rate": 4.326305436210823e-05, "loss": 0.7326, "step": 19225 }, { "epoch": 2.32, "grad_norm": 0.24395301938056946, "learning_rate": 4.3190778524290755e-05, "loss": 0.8482, "step": 19230 }, { "epoch": 2.32, "grad_norm": 0.26434871554374695, "learning_rate": 4.311855295443987e-05, "loss": 0.7736, "step": 19235 }, { "epoch": 2.32, "grad_norm": 0.3252696394920349, "learning_rate": 4.304637768654744e-05, "loss": 0.7654, "step": 19240 }, { "epoch": 2.32, "grad_norm": 0.31384336948394775, "learning_rate": 4.29742527545815e-05, "loss": 0.7946, "step": 19245 }, { "epoch": 2.32, "grad_norm": 0.2744503617286682, "learning_rate": 4.290217819248641e-05, "loss": 0.8248, "step": 19250 }, { "epoch": 2.32, "grad_norm": 0.29785433411598206, "learning_rate": 4.283015403418284e-05, "loss": 0.777, "step": 19255 }, { "epoch": 2.32, "grad_norm": 0.26613983511924744, "learning_rate": 4.275818031356783e-05, "loss": 0.7121, "step": 19260 }, { "epoch": 2.32, "grad_norm": 0.2956700623035431, "learning_rate": 4.268625706451454e-05, "loss": 0.8807, "step": 19265 }, { "epoch": 2.32, "grad_norm": 0.2700934112071991, "learning_rate": 4.261438432087247e-05, "loss": 0.703, "step": 19270 }, { "epoch": 2.32, "grad_norm": 0.2920432984828949, "learning_rate": 4.2542562116467245e-05, "loss": 0.8218, "step": 19275 }, { "epoch": 2.32, "grad_norm": 0.2755813002586365, "learning_rate": 4.247079048510089e-05, "loss": 0.7852, "step": 19280 }, { "epoch": 2.32, "grad_norm": 0.3014310598373413, "learning_rate": 4.239906946055148e-05, "loss": 0.7117, "step": 19285 }, { "epoch": 2.32, "grad_norm": 0.32181209325790405, "learning_rate": 4.2327399076573284e-05, "loss": 0.8007, "step": 19290 }, { "epoch": 2.32, "grad_norm": 0.3137061297893524, "learning_rate": 4.225577936689677e-05, "loss": 0.7185, "step": 19295 }, { "epoch": 2.33, "grad_norm": 0.24845640361309052, "learning_rate": 4.2184210365228575e-05, "loss": 0.733, "step": 19300 }, { "epoch": 2.33, "grad_norm": 0.28396517038345337, "learning_rate": 4.2112692105251446e-05, "loss": 0.8673, "step": 19305 }, { "epoch": 2.33, "grad_norm": 0.2980553209781647, "learning_rate": 4.204122462062422e-05, "loss": 0.7282, "step": 19310 }, { "epoch": 2.33, "grad_norm": 0.31656941771507263, "learning_rate": 4.196980794498185e-05, "loss": 0.7511, "step": 19315 }, { "epoch": 2.33, "grad_norm": 0.26247191429138184, "learning_rate": 4.189844211193548e-05, "loss": 0.6768, "step": 19320 }, { "epoch": 2.33, "grad_norm": 0.26794061064720154, "learning_rate": 4.182712715507217e-05, "loss": 0.774, "step": 19325 }, { "epoch": 2.33, "grad_norm": 0.289065420627594, "learning_rate": 4.175586310795515e-05, "loss": 0.7939, "step": 19330 }, { "epoch": 2.33, "grad_norm": 0.27553194761276245, "learning_rate": 4.168465000412359e-05, "loss": 0.6774, "step": 19335 }, { "epoch": 2.33, "grad_norm": 0.29777318239212036, "learning_rate": 4.161348787709282e-05, "loss": 0.6694, "step": 19340 }, { "epoch": 2.33, "grad_norm": 0.289673388004303, "learning_rate": 4.1542376760354045e-05, "loss": 0.7335, "step": 19345 }, { "epoch": 2.33, "grad_norm": 0.27125170826911926, "learning_rate": 4.1471316687374556e-05, "loss": 0.8134, "step": 19350 }, { "epoch": 2.33, "grad_norm": 0.2760598063468933, "learning_rate": 4.140030769159755e-05, "loss": 0.8138, "step": 19355 }, { "epoch": 2.33, "grad_norm": 0.28096047043800354, "learning_rate": 4.132934980644223e-05, "loss": 0.7309, "step": 19360 }, { "epoch": 2.33, "grad_norm": 0.28885167837142944, "learning_rate": 4.125844306530373e-05, "loss": 0.752, "step": 19365 }, { "epoch": 2.33, "grad_norm": 0.2942561209201813, "learning_rate": 4.118758750155311e-05, "loss": 0.7993, "step": 19370 }, { "epoch": 2.33, "grad_norm": 0.24654746055603027, "learning_rate": 4.111678314853732e-05, "loss": 0.7266, "step": 19375 }, { "epoch": 2.34, "grad_norm": 0.2839394807815552, "learning_rate": 4.104603003957934e-05, "loss": 0.7632, "step": 19380 }, { "epoch": 2.34, "grad_norm": 0.27274009585380554, "learning_rate": 4.097532820797786e-05, "loss": 0.8847, "step": 19385 }, { "epoch": 2.34, "grad_norm": 0.2987731397151947, "learning_rate": 4.0904677687007524e-05, "loss": 0.7926, "step": 19390 }, { "epoch": 2.34, "grad_norm": 0.2836725413799286, "learning_rate": 4.0834078509918756e-05, "loss": 0.8595, "step": 19395 }, { "epoch": 2.34, "grad_norm": 0.3034559190273285, "learning_rate": 4.076353070993799e-05, "loss": 0.8338, "step": 19400 }, { "epoch": 2.34, "grad_norm": 0.3112902045249939, "learning_rate": 4.0693034320267295e-05, "loss": 0.7235, "step": 19405 }, { "epoch": 2.34, "grad_norm": 0.3092862367630005, "learning_rate": 4.062258937408464e-05, "loss": 0.7693, "step": 19410 }, { "epoch": 2.34, "grad_norm": 0.2575216591358185, "learning_rate": 4.055219590454366e-05, "loss": 0.769, "step": 19415 }, { "epoch": 2.34, "grad_norm": 0.28645190596580505, "learning_rate": 4.0481853944774e-05, "loss": 0.7985, "step": 19420 }, { "epoch": 2.34, "grad_norm": 0.29721811413764954, "learning_rate": 4.0411563527880856e-05, "loss": 0.878, "step": 19425 }, { "epoch": 2.34, "grad_norm": 0.27767476439476013, "learning_rate": 4.0341324686945237e-05, "loss": 0.8084, "step": 19430 }, { "epoch": 2.34, "grad_norm": 0.3142256736755371, "learning_rate": 4.027113745502388e-05, "loss": 0.7558, "step": 19435 }, { "epoch": 2.34, "grad_norm": 0.28294190764427185, "learning_rate": 4.0201001865149206e-05, "loss": 0.8359, "step": 19440 }, { "epoch": 2.34, "grad_norm": 0.31937623023986816, "learning_rate": 4.013091795032937e-05, "loss": 0.769, "step": 19445 }, { "epoch": 2.34, "grad_norm": 0.26449480652809143, "learning_rate": 4.006088574354819e-05, "loss": 0.7822, "step": 19450 }, { "epoch": 2.34, "grad_norm": 0.3096768856048584, "learning_rate": 3.999090527776509e-05, "loss": 0.7575, "step": 19455 }, { "epoch": 2.34, "grad_norm": 0.2941647171974182, "learning_rate": 3.9920976585915296e-05, "loss": 0.8384, "step": 19460 }, { "epoch": 2.35, "grad_norm": 0.3138192594051361, "learning_rate": 3.985109970090956e-05, "loss": 0.7938, "step": 19465 }, { "epoch": 2.35, "grad_norm": 0.24960915744304657, "learning_rate": 3.9781274655634205e-05, "loss": 0.7544, "step": 19470 }, { "epoch": 2.35, "grad_norm": 0.2531121075153351, "learning_rate": 3.971150148295123e-05, "loss": 0.9053, "step": 19475 }, { "epoch": 2.35, "grad_norm": 0.25435906648635864, "learning_rate": 3.964178021569825e-05, "loss": 0.7899, "step": 19480 }, { "epoch": 2.35, "grad_norm": 0.27135327458381653, "learning_rate": 3.957211088668838e-05, "loss": 0.7274, "step": 19485 }, { "epoch": 2.35, "grad_norm": 0.2866491675376892, "learning_rate": 3.950249352871034e-05, "loss": 0.7441, "step": 19490 }, { "epoch": 2.35, "grad_norm": 0.2988530397415161, "learning_rate": 3.94329281745283e-05, "loss": 0.7978, "step": 19495 }, { "epoch": 2.35, "grad_norm": 0.27263930439949036, "learning_rate": 3.9363414856882126e-05, "loss": 0.7804, "step": 19500 }, { "epoch": 2.35, "grad_norm": 0.2896611988544464, "learning_rate": 3.929395360848704e-05, "loss": 0.7589, "step": 19505 }, { "epoch": 2.35, "grad_norm": 0.28115859627723694, "learning_rate": 3.9224544462033844e-05, "loss": 0.7887, "step": 19510 }, { "epoch": 2.35, "grad_norm": 0.31314992904663086, "learning_rate": 3.915518745018873e-05, "loss": 0.7634, "step": 19515 }, { "epoch": 2.35, "grad_norm": 0.28734245896339417, "learning_rate": 3.9085882605593485e-05, "loss": 0.8887, "step": 19520 }, { "epoch": 2.35, "grad_norm": 0.26335301995277405, "learning_rate": 3.901662996086519e-05, "loss": 0.8303, "step": 19525 }, { "epoch": 2.35, "grad_norm": 0.2777862846851349, "learning_rate": 3.8947429548596506e-05, "loss": 0.7661, "step": 19530 }, { "epoch": 2.35, "grad_norm": 0.2924749553203583, "learning_rate": 3.8878281401355366e-05, "loss": 0.7203, "step": 19535 }, { "epoch": 2.35, "grad_norm": 0.265706866979599, "learning_rate": 3.880918555168528e-05, "loss": 0.8163, "step": 19540 }, { "epoch": 2.35, "grad_norm": 0.29131948947906494, "learning_rate": 3.8740142032105e-05, "loss": 0.8113, "step": 19545 }, { "epoch": 2.36, "grad_norm": 0.26033177971839905, "learning_rate": 3.8671150875108715e-05, "loss": 0.8122, "step": 19550 }, { "epoch": 2.36, "grad_norm": 0.2742202579975128, "learning_rate": 3.8602212113165886e-05, "loss": 0.8273, "step": 19555 }, { "epoch": 2.36, "grad_norm": 0.2769484221935272, "learning_rate": 3.85333257787215e-05, "loss": 0.7594, "step": 19560 }, { "epoch": 2.36, "grad_norm": 0.31669124960899353, "learning_rate": 3.846449190419569e-05, "loss": 0.8816, "step": 19565 }, { "epoch": 2.36, "grad_norm": 0.28259631991386414, "learning_rate": 3.839571052198398e-05, "loss": 0.7435, "step": 19570 }, { "epoch": 2.36, "grad_norm": 0.2604540288448334, "learning_rate": 3.832698166445715e-05, "loss": 0.7484, "step": 19575 }, { "epoch": 2.36, "grad_norm": 0.3139670491218567, "learning_rate": 3.82583053639613e-05, "loss": 0.7701, "step": 19580 }, { "epoch": 2.36, "grad_norm": 0.30153560638427734, "learning_rate": 3.818968165281777e-05, "loss": 0.8798, "step": 19585 }, { "epoch": 2.36, "grad_norm": 0.27177488803863525, "learning_rate": 3.8121110563323115e-05, "loss": 0.7226, "step": 19590 }, { "epoch": 2.36, "grad_norm": 0.27674952149391174, "learning_rate": 3.8052592127749265e-05, "loss": 0.8741, "step": 19595 }, { "epoch": 2.36, "grad_norm": 0.2683180570602417, "learning_rate": 3.798412637834321e-05, "loss": 0.767, "step": 19600 }, { "epoch": 2.36, "grad_norm": 0.25967836380004883, "learning_rate": 3.7915713347327225e-05, "loss": 0.7556, "step": 19605 }, { "epoch": 2.36, "grad_norm": 0.2514658570289612, "learning_rate": 3.784735306689869e-05, "loss": 0.7164, "step": 19610 }, { "epoch": 2.36, "grad_norm": 0.2914251983165741, "learning_rate": 3.777904556923031e-05, "loss": 0.9218, "step": 19615 }, { "epoch": 2.36, "grad_norm": 0.24623388051986694, "learning_rate": 3.7710790886469837e-05, "loss": 0.8447, "step": 19620 }, { "epoch": 2.36, "grad_norm": 0.295613169670105, "learning_rate": 3.764258905074018e-05, "loss": 0.735, "step": 19625 }, { "epoch": 2.37, "grad_norm": 0.3049282133579254, "learning_rate": 3.7574440094139364e-05, "loss": 0.7163, "step": 19630 }, { "epoch": 2.37, "grad_norm": 0.3349364101886749, "learning_rate": 3.75063440487406e-05, "loss": 0.8382, "step": 19635 }, { "epoch": 2.37, "grad_norm": 0.2821791172027588, "learning_rate": 3.743830094659212e-05, "loss": 0.7424, "step": 19640 }, { "epoch": 2.37, "grad_norm": 0.2976232171058655, "learning_rate": 3.737031081971729e-05, "loss": 0.7901, "step": 19645 }, { "epoch": 2.37, "grad_norm": 0.2489924281835556, "learning_rate": 3.73023737001145e-05, "loss": 0.7586, "step": 19650 }, { "epoch": 2.37, "grad_norm": 0.2726670503616333, "learning_rate": 3.723448961975722e-05, "loss": 0.8153, "step": 19655 }, { "epoch": 2.37, "grad_norm": 0.29014068841934204, "learning_rate": 3.7166658610593955e-05, "loss": 0.8317, "step": 19660 }, { "epoch": 2.37, "grad_norm": 0.28912895917892456, "learning_rate": 3.709888070454822e-05, "loss": 0.8345, "step": 19665 }, { "epoch": 2.37, "grad_norm": 0.2740095257759094, "learning_rate": 3.70311559335185e-05, "loss": 0.7405, "step": 19670 }, { "epoch": 2.37, "grad_norm": 0.3410658836364746, "learning_rate": 3.696348432937845e-05, "loss": 0.7129, "step": 19675 }, { "epoch": 2.37, "grad_norm": 0.2865118682384491, "learning_rate": 3.689586592397647e-05, "loss": 0.7907, "step": 19680 }, { "epoch": 2.37, "grad_norm": 0.26647239923477173, "learning_rate": 3.6828300749136085e-05, "loss": 0.8186, "step": 19685 }, { "epoch": 2.37, "grad_norm": 0.30425769090652466, "learning_rate": 3.6760788836655624e-05, "loss": 0.7776, "step": 19690 }, { "epoch": 2.37, "grad_norm": 0.28291577100753784, "learning_rate": 3.669333021830854e-05, "loss": 0.7774, "step": 19695 }, { "epoch": 2.37, "grad_norm": 0.294132262468338, "learning_rate": 3.662592492584306e-05, "loss": 0.7293, "step": 19700 }, { "epoch": 2.37, "grad_norm": 0.28489431738853455, "learning_rate": 3.655857299098233e-05, "loss": 0.7946, "step": 19705 }, { "epoch": 2.37, "grad_norm": 0.2683461308479309, "learning_rate": 3.64912744454244e-05, "loss": 0.7626, "step": 19710 }, { "epoch": 2.38, "grad_norm": 0.24661622941493988, "learning_rate": 3.6424029320842265e-05, "loss": 0.7492, "step": 19715 }, { "epoch": 2.38, "grad_norm": 0.266984760761261, "learning_rate": 3.635683764888367e-05, "loss": 0.7598, "step": 19720 }, { "epoch": 2.38, "grad_norm": 0.2817406952381134, "learning_rate": 3.628969946117129e-05, "loss": 0.852, "step": 19725 }, { "epoch": 2.38, "grad_norm": 0.2606806755065918, "learning_rate": 3.6222614789302454e-05, "loss": 0.863, "step": 19730 }, { "epoch": 2.38, "grad_norm": 0.2557438015937805, "learning_rate": 3.615558366484958e-05, "loss": 0.7349, "step": 19735 }, { "epoch": 2.38, "grad_norm": 0.27822768688201904, "learning_rate": 3.608860611935969e-05, "loss": 0.8482, "step": 19740 }, { "epoch": 2.38, "grad_norm": 0.2577543556690216, "learning_rate": 3.602168218435464e-05, "loss": 0.8088, "step": 19745 }, { "epoch": 2.38, "grad_norm": 0.3152078092098236, "learning_rate": 3.595481189133101e-05, "loss": 0.7895, "step": 19750 }, { "epoch": 2.38, "grad_norm": 0.2776154577732086, "learning_rate": 3.588799527176026e-05, "loss": 0.8115, "step": 19755 }, { "epoch": 2.38, "grad_norm": 0.2907949984073639, "learning_rate": 3.5821232357088493e-05, "loss": 0.8447, "step": 19760 }, { "epoch": 2.38, "grad_norm": 0.27817782759666443, "learning_rate": 3.575452317873653e-05, "loss": 0.7258, "step": 19765 }, { "epoch": 2.38, "grad_norm": 0.2644236087799072, "learning_rate": 3.56878677680999e-05, "loss": 0.7573, "step": 19770 }, { "epoch": 2.38, "grad_norm": 0.28008079528808594, "learning_rate": 3.5621266156548925e-05, "loss": 0.8521, "step": 19775 }, { "epoch": 2.38, "grad_norm": 0.2851678133010864, "learning_rate": 3.55547183754285e-05, "loss": 0.8946, "step": 19780 }, { "epoch": 2.38, "grad_norm": 0.29239290952682495, "learning_rate": 3.5488224456058215e-05, "loss": 0.7888, "step": 19785 }, { "epoch": 2.38, "grad_norm": 0.2597346305847168, "learning_rate": 3.5421784429732316e-05, "loss": 0.7592, "step": 19790 }, { "epoch": 2.39, "grad_norm": 0.2777891159057617, "learning_rate": 3.535539832771971e-05, "loss": 0.7478, "step": 19795 }, { "epoch": 2.39, "grad_norm": 0.2665190100669861, "learning_rate": 3.528906618126385e-05, "loss": 0.8196, "step": 19800 }, { "epoch": 2.39, "grad_norm": 0.2569584548473358, "learning_rate": 3.52227880215829e-05, "loss": 0.8045, "step": 19805 }, { "epoch": 2.39, "grad_norm": 0.2824585437774658, "learning_rate": 3.5156563879869494e-05, "loss": 0.7438, "step": 19810 }, { "epoch": 2.39, "grad_norm": 0.2764226496219635, "learning_rate": 3.509039378729099e-05, "loss": 0.7557, "step": 19815 }, { "epoch": 2.39, "grad_norm": 0.2752508521080017, "learning_rate": 3.5024277774989203e-05, "loss": 0.9046, "step": 19820 }, { "epoch": 2.39, "grad_norm": 0.28766295313835144, "learning_rate": 3.4958215874080526e-05, "loss": 0.7422, "step": 19825 }, { "epoch": 2.39, "grad_norm": 0.25225573778152466, "learning_rate": 3.4892208115655837e-05, "loss": 0.8565, "step": 19830 }, { "epoch": 2.39, "grad_norm": 0.3149223327636719, "learning_rate": 3.482625453078065e-05, "loss": 0.7516, "step": 19835 }, { "epoch": 2.39, "grad_norm": 0.30531787872314453, "learning_rate": 3.4760355150494895e-05, "loss": 0.7884, "step": 19840 }, { "epoch": 2.39, "grad_norm": 0.2835243046283722, "learning_rate": 3.4694510005812996e-05, "loss": 0.7924, "step": 19845 }, { "epoch": 2.39, "grad_norm": 0.2607024013996124, "learning_rate": 3.462871912772382e-05, "loss": 0.7911, "step": 19850 }, { "epoch": 2.39, "grad_norm": 0.29384374618530273, "learning_rate": 3.4562982547190824e-05, "loss": 0.8039, "step": 19855 }, { "epoch": 2.39, "grad_norm": 0.28320446610450745, "learning_rate": 3.449730029515179e-05, "loss": 0.7786, "step": 19860 }, { "epoch": 2.39, "grad_norm": 0.281463623046875, "learning_rate": 3.4431672402518955e-05, "loss": 0.7672, "step": 19865 }, { "epoch": 2.39, "grad_norm": 0.2654038369655609, "learning_rate": 3.436609890017901e-05, "loss": 0.8256, "step": 19870 }, { "epoch": 2.39, "grad_norm": 0.2744678556919098, "learning_rate": 3.430057981899298e-05, "loss": 0.7221, "step": 19875 }, { "epoch": 2.4, "grad_norm": 0.28920984268188477, "learning_rate": 3.4235115189796375e-05, "loss": 0.7532, "step": 19880 }, { "epoch": 2.4, "grad_norm": 0.3083685040473938, "learning_rate": 3.416970504339897e-05, "loss": 0.7365, "step": 19885 }, { "epoch": 2.4, "grad_norm": 0.25423839688301086, "learning_rate": 3.410434941058495e-05, "loss": 0.8097, "step": 19890 }, { "epoch": 2.4, "grad_norm": 0.25034499168395996, "learning_rate": 3.4039048322112917e-05, "loss": 0.7832, "step": 19895 }, { "epoch": 2.4, "grad_norm": 0.2709154188632965, "learning_rate": 3.3973801808715676e-05, "loss": 0.7133, "step": 19900 }, { "epoch": 2.4, "grad_norm": 0.2830277383327484, "learning_rate": 3.390860990110042e-05, "loss": 0.8309, "step": 19905 }, { "epoch": 2.4, "grad_norm": 0.26153650879859924, "learning_rate": 3.384347262994858e-05, "loss": 0.7983, "step": 19910 }, { "epoch": 2.4, "grad_norm": 0.23845182359218597, "learning_rate": 3.377839002591599e-05, "loss": 0.84, "step": 19915 }, { "epoch": 2.4, "grad_norm": 0.3157477080821991, "learning_rate": 3.371336211963268e-05, "loss": 0.7391, "step": 19920 }, { "epoch": 2.4, "grad_norm": 0.25639525055885315, "learning_rate": 3.364838894170289e-05, "loss": 0.7765, "step": 19925 }, { "epoch": 2.4, "grad_norm": 0.27470090985298157, "learning_rate": 3.358347052270515e-05, "loss": 0.7211, "step": 19930 }, { "epoch": 2.4, "grad_norm": 0.3165142238140106, "learning_rate": 3.351860689319234e-05, "loss": 0.7079, "step": 19935 }, { "epoch": 2.4, "grad_norm": 0.2669844329357147, "learning_rate": 3.345379808369132e-05, "loss": 0.7023, "step": 19940 }, { "epoch": 2.4, "grad_norm": 0.29056012630462646, "learning_rate": 3.338904412470328e-05, "loss": 0.8274, "step": 19945 }, { "epoch": 2.4, "grad_norm": 0.304496169090271, "learning_rate": 3.332434504670358e-05, "loss": 0.821, "step": 19950 }, { "epoch": 2.4, "grad_norm": 0.29382607340812683, "learning_rate": 3.3259700880141815e-05, "loss": 0.7403, "step": 19955 }, { "epoch": 2.4, "grad_norm": 0.27669087052345276, "learning_rate": 3.319511165544165e-05, "loss": 0.6459, "step": 19960 }, { "epoch": 2.41, "grad_norm": 0.2903481125831604, "learning_rate": 3.313057740300089e-05, "loss": 0.8414, "step": 19965 }, { "epoch": 2.41, "grad_norm": 0.2781134247779846, "learning_rate": 3.30660981531915e-05, "loss": 0.7587, "step": 19970 }, { "epoch": 2.41, "grad_norm": 0.24643659591674805, "learning_rate": 3.3001673936359604e-05, "loss": 0.7535, "step": 19975 }, { "epoch": 2.41, "grad_norm": 0.306083083152771, "learning_rate": 3.293730478282534e-05, "loss": 0.7106, "step": 19980 }, { "epoch": 2.41, "grad_norm": 0.26486045122146606, "learning_rate": 3.2872990722882984e-05, "loss": 0.8252, "step": 19985 }, { "epoch": 2.41, "grad_norm": 0.2603921890258789, "learning_rate": 3.280873178680082e-05, "loss": 0.7914, "step": 19990 }, { "epoch": 2.41, "grad_norm": 0.31112536787986755, "learning_rate": 3.274452800482133e-05, "loss": 0.7106, "step": 19995 }, { "epoch": 2.41, "grad_norm": 0.26250556111335754, "learning_rate": 3.2680379407160886e-05, "loss": 0.7859, "step": 20000 }, { "epoch": 2.41, "grad_norm": 0.28084659576416016, "learning_rate": 3.261628602400995e-05, "loss": 0.7564, "step": 20005 }, { "epoch": 2.41, "grad_norm": 0.2960032820701599, "learning_rate": 3.2552247885533005e-05, "loss": 0.8016, "step": 20010 }, { "epoch": 2.41, "grad_norm": 0.3024326264858246, "learning_rate": 3.248826502186854e-05, "loss": 0.8589, "step": 20015 }, { "epoch": 2.41, "grad_norm": 0.28151461482048035, "learning_rate": 3.242433746312899e-05, "loss": 0.8336, "step": 20020 }, { "epoch": 2.41, "grad_norm": 0.2718256711959839, "learning_rate": 3.23604652394008e-05, "loss": 0.6869, "step": 20025 }, { "epoch": 2.41, "grad_norm": 0.31024929881095886, "learning_rate": 3.229664838074431e-05, "loss": 0.9043, "step": 20030 }, { "epoch": 2.41, "grad_norm": 0.26419252157211304, "learning_rate": 3.223288691719394e-05, "loss": 0.7494, "step": 20035 }, { "epoch": 2.41, "grad_norm": 0.27617117762565613, "learning_rate": 3.216918087875792e-05, "loss": 0.7913, "step": 20040 }, { "epoch": 2.42, "grad_norm": 0.2638927102088928, "learning_rate": 3.21055302954184e-05, "loss": 0.7151, "step": 20045 }, { "epoch": 2.42, "grad_norm": 0.2976774573326111, "learning_rate": 3.204193519713146e-05, "loss": 0.7635, "step": 20050 }, { "epoch": 2.42, "grad_norm": 0.256143182516098, "learning_rate": 3.197839561382711e-05, "loss": 0.8376, "step": 20055 }, { "epoch": 2.42, "grad_norm": 0.2871249318122864, "learning_rate": 3.1914911575409165e-05, "loss": 0.707, "step": 20060 }, { "epoch": 2.42, "grad_norm": 0.269939661026001, "learning_rate": 3.1851483111755276e-05, "loss": 0.8975, "step": 20065 }, { "epoch": 2.42, "grad_norm": 0.2935774326324463, "learning_rate": 3.178811025271705e-05, "loss": 0.787, "step": 20070 }, { "epoch": 2.42, "grad_norm": 0.2772122621536255, "learning_rate": 3.1724793028119846e-05, "loss": 0.6818, "step": 20075 }, { "epoch": 2.42, "grad_norm": 0.3376406133174896, "learning_rate": 3.1661531467762866e-05, "loss": 0.6406, "step": 20080 }, { "epoch": 2.42, "grad_norm": 0.24912644922733307, "learning_rate": 3.159832560141904e-05, "loss": 0.8038, "step": 20085 }, { "epoch": 2.42, "grad_norm": 0.26904556155204773, "learning_rate": 3.15351754588352e-05, "loss": 0.8043, "step": 20090 }, { "epoch": 2.42, "grad_norm": 0.27094265818595886, "learning_rate": 3.147208106973189e-05, "loss": 0.8588, "step": 20095 }, { "epoch": 2.42, "grad_norm": 0.26303452253341675, "learning_rate": 3.1409042463803406e-05, "loss": 0.7485, "step": 20100 }, { "epoch": 2.42, "grad_norm": 0.29558247327804565, "learning_rate": 3.1346059670717783e-05, "loss": 0.8514, "step": 20105 }, { "epoch": 2.42, "grad_norm": 0.2617008090019226, "learning_rate": 3.12831327201169e-05, "loss": 0.7401, "step": 20110 }, { "epoch": 2.42, "grad_norm": 0.29769590497016907, "learning_rate": 3.1220261641616226e-05, "loss": 0.6958, "step": 20115 }, { "epoch": 2.42, "grad_norm": 0.2647980749607086, "learning_rate": 3.1157446464804966e-05, "loss": 0.824, "step": 20120 }, { "epoch": 2.42, "grad_norm": 0.2724788784980774, "learning_rate": 3.1094687219246025e-05, "loss": 0.8486, "step": 20125 }, { "epoch": 2.43, "grad_norm": 0.2676187753677368, "learning_rate": 3.103198393447601e-05, "loss": 0.8293, "step": 20130 }, { "epoch": 2.43, "grad_norm": 0.254800945520401, "learning_rate": 3.096933664000518e-05, "loss": 0.8403, "step": 20135 }, { "epoch": 2.43, "grad_norm": 0.2803385853767395, "learning_rate": 3.090674536531741e-05, "loss": 0.7535, "step": 20140 }, { "epoch": 2.43, "grad_norm": 0.3000488877296448, "learning_rate": 3.084421013987023e-05, "loss": 0.7952, "step": 20145 }, { "epoch": 2.43, "grad_norm": 0.24971972405910492, "learning_rate": 3.07817309930948e-05, "loss": 0.7537, "step": 20150 }, { "epoch": 2.43, "grad_norm": 0.29271334409713745, "learning_rate": 3.0719307954395886e-05, "loss": 0.8457, "step": 20155 }, { "epoch": 2.43, "grad_norm": 0.27046826481819153, "learning_rate": 3.0656941053151846e-05, "loss": 0.7313, "step": 20160 }, { "epoch": 2.43, "grad_norm": 0.3202589154243469, "learning_rate": 3.059463031871456e-05, "loss": 0.7813, "step": 20165 }, { "epoch": 2.43, "grad_norm": 0.2895718514919281, "learning_rate": 3.05323757804096e-05, "loss": 0.768, "step": 20170 }, { "epoch": 2.43, "grad_norm": 0.28640422224998474, "learning_rate": 3.0470177467536006e-05, "loss": 0.7901, "step": 20175 }, { "epoch": 2.43, "grad_norm": 0.2655580937862396, "learning_rate": 3.040803540936637e-05, "loss": 0.8789, "step": 20180 }, { "epoch": 2.43, "grad_norm": 0.26338696479797363, "learning_rate": 3.034594963514674e-05, "loss": 0.8176, "step": 20185 }, { "epoch": 2.43, "grad_norm": 0.27758362889289856, "learning_rate": 3.028392017409685e-05, "loss": 0.7612, "step": 20190 }, { "epoch": 2.43, "grad_norm": 0.31428468227386475, "learning_rate": 3.0221947055409766e-05, "loss": 0.7512, "step": 20195 }, { "epoch": 2.43, "grad_norm": 0.3056136965751648, "learning_rate": 3.0160030308252113e-05, "loss": 0.8479, "step": 20200 }, { "epoch": 2.43, "grad_norm": 0.2786805331707001, "learning_rate": 3.0098169961763913e-05, "loss": 0.8396, "step": 20205 }, { "epoch": 2.44, "grad_norm": 0.2735007405281067, "learning_rate": 3.003636604505879e-05, "loss": 0.7215, "step": 20210 }, { "epoch": 2.44, "grad_norm": 0.2933249771595001, "learning_rate": 2.9974618587223665e-05, "loss": 0.8251, "step": 20215 }, { "epoch": 2.44, "grad_norm": 0.24182629585266113, "learning_rate": 2.991292761731897e-05, "loss": 0.7686, "step": 20220 }, { "epoch": 2.44, "grad_norm": 0.31849405169487, "learning_rate": 2.9851293164378488e-05, "loss": 0.8349, "step": 20225 }, { "epoch": 2.44, "grad_norm": 0.2779251039028168, "learning_rate": 2.9789715257409475e-05, "loss": 0.7816, "step": 20230 }, { "epoch": 2.44, "grad_norm": 0.2377801239490509, "learning_rate": 2.9728193925392502e-05, "loss": 0.8013, "step": 20235 }, { "epoch": 2.44, "grad_norm": 0.29114991426467896, "learning_rate": 2.966672919728159e-05, "loss": 0.7782, "step": 20240 }, { "epoch": 2.44, "grad_norm": 0.2626573443412781, "learning_rate": 2.960532110200402e-05, "loss": 0.7895, "step": 20245 }, { "epoch": 2.44, "grad_norm": 0.26689302921295166, "learning_rate": 2.9543969668460556e-05, "loss": 0.7706, "step": 20250 }, { "epoch": 2.44, "grad_norm": 0.29479920864105225, "learning_rate": 2.948267492552518e-05, "loss": 0.8125, "step": 20255 }, { "epoch": 2.44, "grad_norm": 0.2591875195503235, "learning_rate": 2.9421436902045255e-05, "loss": 0.8254, "step": 20260 }, { "epoch": 2.44, "grad_norm": 0.23983192443847656, "learning_rate": 2.9360255626841374e-05, "loss": 0.731, "step": 20265 }, { "epoch": 2.44, "grad_norm": 0.27949729561805725, "learning_rate": 2.9299131128707537e-05, "loss": 0.7235, "step": 20270 }, { "epoch": 2.44, "grad_norm": 0.29695627093315125, "learning_rate": 2.923806343641097e-05, "loss": 0.8012, "step": 20275 }, { "epoch": 2.44, "grad_norm": 0.26276320219039917, "learning_rate": 2.917705257869211e-05, "loss": 0.7519, "step": 20280 }, { "epoch": 2.44, "grad_norm": 0.2703682482242584, "learning_rate": 2.911609858426468e-05, "loss": 0.8297, "step": 20285 }, { "epoch": 2.44, "grad_norm": 0.28486260771751404, "learning_rate": 2.9055201481815727e-05, "loss": 0.8341, "step": 20290 }, { "epoch": 2.45, "grad_norm": 0.266369104385376, "learning_rate": 2.8994361300005407e-05, "loss": 0.6976, "step": 20295 }, { "epoch": 2.45, "grad_norm": 0.2816215753555298, "learning_rate": 2.8933578067467123e-05, "loss": 0.846, "step": 20300 }, { "epoch": 2.45, "grad_norm": 0.273666650056839, "learning_rate": 2.8872851812807502e-05, "loss": 0.7309, "step": 20305 }, { "epoch": 2.45, "grad_norm": 0.30615535378456116, "learning_rate": 2.8812182564606334e-05, "loss": 0.7782, "step": 20310 }, { "epoch": 2.45, "grad_norm": 0.2994655966758728, "learning_rate": 2.8751570351416564e-05, "loss": 0.6678, "step": 20315 }, { "epoch": 2.45, "grad_norm": 0.2747027277946472, "learning_rate": 2.8691015201764323e-05, "loss": 0.7504, "step": 20320 }, { "epoch": 2.45, "grad_norm": 0.28770124912261963, "learning_rate": 2.863051714414883e-05, "loss": 0.7577, "step": 20325 }, { "epoch": 2.45, "grad_norm": 0.27923446893692017, "learning_rate": 2.8570076207042574e-05, "loss": 0.7865, "step": 20330 }, { "epoch": 2.45, "grad_norm": 0.26224157214164734, "learning_rate": 2.8509692418891024e-05, "loss": 0.7961, "step": 20335 }, { "epoch": 2.45, "grad_norm": 0.2644881308078766, "learning_rate": 2.8449365808112773e-05, "loss": 0.7063, "step": 20340 }, { "epoch": 2.45, "grad_norm": 0.2863686680793762, "learning_rate": 2.8389096403099515e-05, "loss": 0.8939, "step": 20345 }, { "epoch": 2.45, "grad_norm": 0.27196264266967773, "learning_rate": 2.832888423221611e-05, "loss": 0.803, "step": 20350 }, { "epoch": 2.45, "grad_norm": 0.2805080711841583, "learning_rate": 2.826872932380034e-05, "loss": 0.8347, "step": 20355 }, { "epoch": 2.45, "grad_norm": 0.27300190925598145, "learning_rate": 2.820863170616312e-05, "loss": 0.7662, "step": 20360 }, { "epoch": 2.45, "grad_norm": 0.2929331660270691, "learning_rate": 2.81485914075884e-05, "loss": 0.8332, "step": 20365 }, { "epoch": 2.45, "grad_norm": 0.3110920190811157, "learning_rate": 2.808860845633312e-05, "loss": 0.7865, "step": 20370 }, { "epoch": 2.45, "grad_norm": 0.2958769202232361, "learning_rate": 2.8028682880627255e-05, "loss": 0.7955, "step": 20375 }, { "epoch": 2.46, "grad_norm": 0.2582022547721863, "learning_rate": 2.7968814708673758e-05, "loss": 0.8322, "step": 20380 }, { "epoch": 2.46, "grad_norm": 0.27582964301109314, "learning_rate": 2.7909003968648557e-05, "loss": 0.8729, "step": 20385 }, { "epoch": 2.46, "grad_norm": 0.29320433735847473, "learning_rate": 2.7849250688700637e-05, "loss": 0.7828, "step": 20390 }, { "epoch": 2.46, "grad_norm": 0.2722160220146179, "learning_rate": 2.7789554896951865e-05, "loss": 0.7481, "step": 20395 }, { "epoch": 2.46, "grad_norm": 0.3223639130592346, "learning_rate": 2.7729916621497022e-05, "loss": 0.9109, "step": 20400 }, { "epoch": 2.46, "grad_norm": 0.2686358690261841, "learning_rate": 2.7670335890403837e-05, "loss": 0.7711, "step": 20405 }, { "epoch": 2.46, "grad_norm": 0.2783448398113251, "learning_rate": 2.7610812731713084e-05, "loss": 0.8384, "step": 20410 }, { "epoch": 2.46, "grad_norm": 0.3094173073768616, "learning_rate": 2.7551347173438267e-05, "loss": 0.7424, "step": 20415 }, { "epoch": 2.46, "grad_norm": 0.29645413160324097, "learning_rate": 2.7491939243565862e-05, "loss": 0.813, "step": 20420 }, { "epoch": 2.46, "grad_norm": 0.31017208099365234, "learning_rate": 2.7432588970055182e-05, "loss": 0.8065, "step": 20425 }, { "epoch": 2.46, "grad_norm": 0.29746440052986145, "learning_rate": 2.7373296380838505e-05, "loss": 0.7532, "step": 20430 }, { "epoch": 2.46, "grad_norm": 0.2688598930835724, "learning_rate": 2.7314061503820865e-05, "loss": 0.7811, "step": 20435 }, { "epoch": 2.46, "grad_norm": 0.30386409163475037, "learning_rate": 2.7254884366880146e-05, "loss": 0.7961, "step": 20440 }, { "epoch": 2.46, "grad_norm": 0.28410372138023376, "learning_rate": 2.7195764997867085e-05, "loss": 0.9115, "step": 20445 }, { "epoch": 2.46, "grad_norm": 0.2933676242828369, "learning_rate": 2.7136703424605228e-05, "loss": 0.8635, "step": 20450 }, { "epoch": 2.46, "grad_norm": 0.3018134832382202, "learning_rate": 2.7077699674890903e-05, "loss": 0.7459, "step": 20455 }, { "epoch": 2.47, "grad_norm": 0.2924741208553314, "learning_rate": 2.7018753776493213e-05, "loss": 0.8325, "step": 20460 }, { "epoch": 2.47, "grad_norm": 0.2926197946071625, "learning_rate": 2.6959865757154054e-05, "loss": 0.722, "step": 20465 }, { "epoch": 2.47, "grad_norm": 0.285312294960022, "learning_rate": 2.6901035644588154e-05, "loss": 0.8077, "step": 20470 }, { "epoch": 2.47, "grad_norm": 0.2824037969112396, "learning_rate": 2.684226346648285e-05, "loss": 0.6966, "step": 20475 }, { "epoch": 2.47, "grad_norm": 0.29435649514198303, "learning_rate": 2.6783549250498315e-05, "loss": 0.6975, "step": 20480 }, { "epoch": 2.47, "grad_norm": 0.26435035467147827, "learning_rate": 2.6724893024267348e-05, "loss": 0.8694, "step": 20485 }, { "epoch": 2.47, "grad_norm": 0.29340675473213196, "learning_rate": 2.6666294815395612e-05, "loss": 0.7117, "step": 20490 }, { "epoch": 2.47, "grad_norm": 0.30014726519584656, "learning_rate": 2.6607754651461317e-05, "loss": 0.7437, "step": 20495 }, { "epoch": 2.47, "grad_norm": 0.31490999460220337, "learning_rate": 2.6549272560015406e-05, "loss": 0.7764, "step": 20500 }, { "epoch": 2.47, "grad_norm": 0.26503077149391174, "learning_rate": 2.6490848568581462e-05, "loss": 0.7501, "step": 20505 }, { "epoch": 2.47, "grad_norm": 0.29323646426200867, "learning_rate": 2.6432482704655845e-05, "loss": 0.8158, "step": 20510 }, { "epoch": 2.47, "grad_norm": 0.2801787555217743, "learning_rate": 2.637417499570741e-05, "loss": 0.7489, "step": 20515 }, { "epoch": 2.47, "grad_norm": 0.2741035223007202, "learning_rate": 2.631592546917774e-05, "loss": 0.8378, "step": 20520 }, { "epoch": 2.47, "grad_norm": 0.300493448972702, "learning_rate": 2.6257734152480893e-05, "loss": 0.7904, "step": 20525 }, { "epoch": 2.47, "grad_norm": 0.2564448118209839, "learning_rate": 2.619960107300374e-05, "loss": 0.7423, "step": 20530 }, { "epoch": 2.47, "grad_norm": 0.26825183629989624, "learning_rate": 2.6141526258105615e-05, "loss": 0.7046, "step": 20535 }, { "epoch": 2.47, "grad_norm": 0.2867426872253418, "learning_rate": 2.608350973511844e-05, "loss": 0.8206, "step": 20540 }, { "epoch": 2.48, "grad_norm": 0.2587963938713074, "learning_rate": 2.6025551531346688e-05, "loss": 0.7434, "step": 20545 }, { "epoch": 2.48, "grad_norm": 0.3124673664569855, "learning_rate": 2.5967651674067493e-05, "loss": 0.7841, "step": 20550 }, { "epoch": 2.48, "grad_norm": 0.28737810254096985, "learning_rate": 2.59098101905304e-05, "loss": 0.7924, "step": 20555 }, { "epoch": 2.48, "grad_norm": 0.2854047417640686, "learning_rate": 2.585202710795754e-05, "loss": 0.7533, "step": 20560 }, { "epoch": 2.48, "grad_norm": 0.24524690210819244, "learning_rate": 2.5794302453543524e-05, "loss": 0.8201, "step": 20565 }, { "epoch": 2.48, "grad_norm": 0.30324557423591614, "learning_rate": 2.573663625445554e-05, "loss": 0.8067, "step": 20570 }, { "epoch": 2.48, "grad_norm": 0.29187995195388794, "learning_rate": 2.567902853783319e-05, "loss": 0.8313, "step": 20575 }, { "epoch": 2.48, "grad_norm": 0.26502034068107605, "learning_rate": 2.562147933078859e-05, "loss": 0.8097, "step": 20580 }, { "epoch": 2.48, "grad_norm": 0.32918286323547363, "learning_rate": 2.556398866040631e-05, "loss": 0.8703, "step": 20585 }, { "epoch": 2.48, "grad_norm": 0.3019461929798126, "learning_rate": 2.5506556553743334e-05, "loss": 0.8317, "step": 20590 }, { "epoch": 2.48, "grad_norm": 0.28017765283584595, "learning_rate": 2.5449183037829163e-05, "loss": 0.8315, "step": 20595 }, { "epoch": 2.48, "grad_norm": 0.31728842854499817, "learning_rate": 2.5391868139665612e-05, "loss": 0.8539, "step": 20600 }, { "epoch": 2.48, "grad_norm": 0.26865053176879883, "learning_rate": 2.533461188622707e-05, "loss": 0.7489, "step": 20605 }, { "epoch": 2.48, "grad_norm": 0.24447381496429443, "learning_rate": 2.527741430446018e-05, "loss": 0.7674, "step": 20610 }, { "epoch": 2.48, "grad_norm": 0.3278535306453705, "learning_rate": 2.5220275421284015e-05, "loss": 0.7342, "step": 20615 }, { "epoch": 2.48, "grad_norm": 0.30096837878227234, "learning_rate": 2.5163195263590026e-05, "loss": 0.7506, "step": 20620 }, { "epoch": 2.49, "grad_norm": 0.28471359610557556, "learning_rate": 2.5106173858242073e-05, "loss": 0.7999, "step": 20625 }, { "epoch": 2.49, "grad_norm": 0.2582080364227295, "learning_rate": 2.5049211232076295e-05, "loss": 0.8253, "step": 20630 }, { "epoch": 2.49, "grad_norm": 0.2530338168144226, "learning_rate": 2.499230741190121e-05, "loss": 0.8581, "step": 20635 }, { "epoch": 2.49, "grad_norm": 0.25447461009025574, "learning_rate": 2.4935462424497576e-05, "loss": 0.7793, "step": 20640 }, { "epoch": 2.49, "grad_norm": 0.24925333261489868, "learning_rate": 2.487867629661865e-05, "loss": 0.7587, "step": 20645 }, { "epoch": 2.49, "grad_norm": 0.2856590151786804, "learning_rate": 2.4821949054989797e-05, "loss": 0.7551, "step": 20650 }, { "epoch": 2.49, "grad_norm": 0.2937513589859009, "learning_rate": 2.476528072630875e-05, "loss": 0.7448, "step": 20655 }, { "epoch": 2.49, "grad_norm": 0.2708790600299835, "learning_rate": 2.470867133724552e-05, "loss": 0.7374, "step": 20660 }, { "epoch": 2.49, "grad_norm": 0.2769574820995331, "learning_rate": 2.4652120914442352e-05, "loss": 0.8323, "step": 20665 }, { "epoch": 2.49, "grad_norm": 0.2852955162525177, "learning_rate": 2.459562948451375e-05, "loss": 0.7135, "step": 20670 }, { "epoch": 2.49, "grad_norm": 0.2564711272716522, "learning_rate": 2.4539197074046467e-05, "loss": 0.7752, "step": 20675 }, { "epoch": 2.49, "grad_norm": 0.2757887840270996, "learning_rate": 2.448282370959944e-05, "loss": 0.7853, "step": 20680 }, { "epoch": 2.49, "grad_norm": 0.2913254201412201, "learning_rate": 2.44265094177039e-05, "loss": 0.8027, "step": 20685 }, { "epoch": 2.49, "grad_norm": 0.26618868112564087, "learning_rate": 2.437025422486319e-05, "loss": 0.7059, "step": 20690 }, { "epoch": 2.49, "grad_norm": 0.24177007377147675, "learning_rate": 2.4314058157552885e-05, "loss": 0.7975, "step": 20695 }, { "epoch": 2.49, "grad_norm": 0.26704952120780945, "learning_rate": 2.4257921242220663e-05, "loss": 0.751, "step": 20700 }, { "epoch": 2.49, "grad_norm": 0.28091442584991455, "learning_rate": 2.4201843505286507e-05, "loss": 0.8622, "step": 20705 }, { "epoch": 2.5, "grad_norm": 0.2535339891910553, "learning_rate": 2.4145824973142435e-05, "loss": 0.6663, "step": 20710 }, { "epoch": 2.5, "grad_norm": 0.26714247465133667, "learning_rate": 2.4089865672152613e-05, "loss": 0.759, "step": 20715 }, { "epoch": 2.5, "grad_norm": 0.2989547550678253, "learning_rate": 2.4033965628653323e-05, "loss": 0.8327, "step": 20720 }, { "epoch": 2.5, "grad_norm": 0.270155131816864, "learning_rate": 2.3978124868953037e-05, "loss": 0.7935, "step": 20725 }, { "epoch": 2.5, "grad_norm": 0.29163941740989685, "learning_rate": 2.3922343419332247e-05, "loss": 0.7799, "step": 20730 }, { "epoch": 2.5, "grad_norm": 0.27277231216430664, "learning_rate": 2.386662130604359e-05, "loss": 0.7378, "step": 20735 }, { "epoch": 2.5, "grad_norm": 0.308982253074646, "learning_rate": 2.3810958555311647e-05, "loss": 0.7067, "step": 20740 }, { "epoch": 2.5, "grad_norm": 0.24645307660102844, "learning_rate": 2.375535519333324e-05, "loss": 0.6404, "step": 20745 }, { "epoch": 2.5, "grad_norm": 0.30211979150772095, "learning_rate": 2.3699811246277133e-05, "loss": 0.7035, "step": 20750 }, { "epoch": 2.5, "grad_norm": 0.27554258704185486, "learning_rate": 2.3644326740284147e-05, "loss": 0.8621, "step": 20755 }, { "epoch": 2.5, "grad_norm": 0.24029062688350677, "learning_rate": 2.358890170146711e-05, "loss": 0.7073, "step": 20760 }, { "epoch": 2.5, "grad_norm": 0.2912255525588989, "learning_rate": 2.353353615591096e-05, "loss": 0.7371, "step": 20765 }, { "epoch": 2.5, "grad_norm": 0.29591822624206543, "learning_rate": 2.3478230129672498e-05, "loss": 0.6795, "step": 20770 }, { "epoch": 2.5, "grad_norm": 0.274454802274704, "learning_rate": 2.3422983648780606e-05, "loss": 0.8204, "step": 20775 }, { "epoch": 2.5, "grad_norm": 0.26619449257850647, "learning_rate": 2.336779673923607e-05, "loss": 0.8263, "step": 20780 }, { "epoch": 2.5, "grad_norm": 0.2655404806137085, "learning_rate": 2.3312669427011748e-05, "loss": 0.8028, "step": 20785 }, { "epoch": 2.5, "grad_norm": 0.2552018165588379, "learning_rate": 2.3257601738052352e-05, "loss": 0.8012, "step": 20790 }, { "epoch": 2.51, "grad_norm": 0.2989104390144348, "learning_rate": 2.3202593698274546e-05, "loss": 0.8466, "step": 20795 }, { "epoch": 2.51, "grad_norm": 0.25715914368629456, "learning_rate": 2.3147645333566977e-05, "loss": 0.7009, "step": 20800 }, { "epoch": 2.51, "grad_norm": 0.29836973547935486, "learning_rate": 2.309275666979014e-05, "loss": 0.7897, "step": 20805 }, { "epoch": 2.51, "grad_norm": 0.28271016478538513, "learning_rate": 2.3037927732776472e-05, "loss": 0.7738, "step": 20810 }, { "epoch": 2.51, "grad_norm": 0.31232964992523193, "learning_rate": 2.29831585483303e-05, "loss": 0.7341, "step": 20815 }, { "epoch": 2.51, "grad_norm": 0.29633277654647827, "learning_rate": 2.292844914222777e-05, "loss": 0.8116, "step": 20820 }, { "epoch": 2.51, "grad_norm": 0.2850092649459839, "learning_rate": 2.287379954021703e-05, "loss": 0.7608, "step": 20825 }, { "epoch": 2.51, "grad_norm": 0.2637297809123993, "learning_rate": 2.2819209768017965e-05, "loss": 0.7451, "step": 20830 }, { "epoch": 2.51, "grad_norm": 0.239261195063591, "learning_rate": 2.276467985132232e-05, "loss": 0.8302, "step": 20835 }, { "epoch": 2.51, "grad_norm": 0.3218871057033539, "learning_rate": 2.271020981579365e-05, "loss": 0.7533, "step": 20840 }, { "epoch": 2.51, "grad_norm": 0.26522281765937805, "learning_rate": 2.265579968706745e-05, "loss": 0.6176, "step": 20845 }, { "epoch": 2.51, "grad_norm": 0.3309444189071655, "learning_rate": 2.2601449490750867e-05, "loss": 0.68, "step": 20850 }, { "epoch": 2.51, "grad_norm": 0.295600026845932, "learning_rate": 2.254715925242294e-05, "loss": 0.7778, "step": 20855 }, { "epoch": 2.51, "grad_norm": 0.27975109219551086, "learning_rate": 2.249292899763442e-05, "loss": 0.8367, "step": 20860 }, { "epoch": 2.51, "grad_norm": 0.301899790763855, "learning_rate": 2.2438758751907933e-05, "loss": 0.7383, "step": 20865 }, { "epoch": 2.51, "grad_norm": 0.2855704724788666, "learning_rate": 2.2384648540737736e-05, "loss": 0.8029, "step": 20870 }, { "epoch": 2.52, "grad_norm": 0.31557753682136536, "learning_rate": 2.2330598389589915e-05, "loss": 0.7673, "step": 20875 }, { "epoch": 2.52, "grad_norm": 0.2771788239479065, "learning_rate": 2.2276608323902266e-05, "loss": 0.7175, "step": 20880 }, { "epoch": 2.52, "grad_norm": 0.305113285779953, "learning_rate": 2.22226783690843e-05, "loss": 0.7742, "step": 20885 }, { "epoch": 2.52, "grad_norm": 0.24871449172496796, "learning_rate": 2.216880855051725e-05, "loss": 0.678, "step": 20890 }, { "epoch": 2.52, "grad_norm": 0.2921499013900757, "learning_rate": 2.2114998893554042e-05, "loss": 0.7413, "step": 20895 }, { "epoch": 2.52, "grad_norm": 0.3036426603794098, "learning_rate": 2.2061249423519244e-05, "loss": 0.7398, "step": 20900 }, { "epoch": 2.52, "grad_norm": 0.27135762572288513, "learning_rate": 2.200756016570922e-05, "loss": 0.7812, "step": 20905 }, { "epoch": 2.52, "grad_norm": 0.2812870442867279, "learning_rate": 2.1953931145391872e-05, "loss": 0.7961, "step": 20910 }, { "epoch": 2.52, "grad_norm": 0.3006354570388794, "learning_rate": 2.1900362387806812e-05, "loss": 0.7816, "step": 20915 }, { "epoch": 2.52, "grad_norm": 0.27853453159332275, "learning_rate": 2.1846853918165223e-05, "loss": 0.7039, "step": 20920 }, { "epoch": 2.52, "grad_norm": 0.29205799102783203, "learning_rate": 2.1793405761650062e-05, "loss": 0.8397, "step": 20925 }, { "epoch": 2.52, "grad_norm": 0.27953484654426575, "learning_rate": 2.1740017943415754e-05, "loss": 0.8072, "step": 20930 }, { "epoch": 2.52, "grad_norm": 0.3323310315608978, "learning_rate": 2.1686690488588387e-05, "loss": 0.7661, "step": 20935 }, { "epoch": 2.52, "grad_norm": 0.33003172278404236, "learning_rate": 2.16334234222656e-05, "loss": 0.7778, "step": 20940 }, { "epoch": 2.52, "grad_norm": 0.24858631193637848, "learning_rate": 2.1580216769516738e-05, "loss": 0.914, "step": 20945 }, { "epoch": 2.52, "grad_norm": 0.2743765115737915, "learning_rate": 2.152707055538251e-05, "loss": 0.9042, "step": 20950 }, { "epoch": 2.52, "grad_norm": 0.283892959356308, "learning_rate": 2.1473984804875332e-05, "loss": 0.8793, "step": 20955 }, { "epoch": 2.53, "grad_norm": 0.28925827145576477, "learning_rate": 2.1420959542979095e-05, "loss": 0.7409, "step": 20960 }, { "epoch": 2.53, "grad_norm": 0.24598172307014465, "learning_rate": 2.1367994794649312e-05, "loss": 0.7843, "step": 20965 }, { "epoch": 2.53, "grad_norm": 0.2719762623310089, "learning_rate": 2.1315090584812917e-05, "loss": 0.9156, "step": 20970 }, { "epoch": 2.53, "grad_norm": 0.29581862688064575, "learning_rate": 2.126224693836837e-05, "loss": 0.7598, "step": 20975 }, { "epoch": 2.53, "grad_norm": 0.27417975664138794, "learning_rate": 2.120946388018565e-05, "loss": 0.8218, "step": 20980 }, { "epoch": 2.53, "grad_norm": 0.27194756269454956, "learning_rate": 2.115674143510625e-05, "loss": 0.8978, "step": 20985 }, { "epoch": 2.53, "grad_norm": 0.27011337876319885, "learning_rate": 2.1104079627943087e-05, "loss": 0.8051, "step": 20990 }, { "epoch": 2.53, "grad_norm": 0.26955413818359375, "learning_rate": 2.1051478483480555e-05, "loss": 0.7435, "step": 20995 }, { "epoch": 2.53, "grad_norm": 0.26142939925193787, "learning_rate": 2.099893802647445e-05, "loss": 0.7034, "step": 21000 }, { "epoch": 2.53, "grad_norm": 0.27008065581321716, "learning_rate": 2.0946458281652155e-05, "loss": 0.7948, "step": 21005 }, { "epoch": 2.53, "grad_norm": 0.2838139235973358, "learning_rate": 2.089403927371232e-05, "loss": 0.7694, "step": 21010 }, { "epoch": 2.53, "grad_norm": 0.2932431399822235, "learning_rate": 2.0841681027325075e-05, "loss": 0.8528, "step": 21015 }, { "epoch": 2.53, "grad_norm": 0.28293853998184204, "learning_rate": 2.0789383567131955e-05, "loss": 0.7757, "step": 21020 }, { "epoch": 2.53, "grad_norm": 0.27947354316711426, "learning_rate": 2.0737146917745878e-05, "loss": 0.7972, "step": 21025 }, { "epoch": 2.53, "grad_norm": 0.2797410190105438, "learning_rate": 2.0684971103751158e-05, "loss": 0.8079, "step": 21030 }, { "epoch": 2.53, "grad_norm": 0.2733669877052307, "learning_rate": 2.0632856149703454e-05, "loss": 0.7684, "step": 21035 }, { "epoch": 2.54, "grad_norm": 0.2936958074569702, "learning_rate": 2.0580802080129744e-05, "loss": 0.7623, "step": 21040 }, { "epoch": 2.54, "grad_norm": 0.2874435782432556, "learning_rate": 2.05288089195285e-05, "loss": 0.752, "step": 21045 }, { "epoch": 2.54, "grad_norm": 0.2876211404800415, "learning_rate": 2.0476876692369398e-05, "loss": 0.7191, "step": 21050 }, { "epoch": 2.54, "grad_norm": 0.3623277544975281, "learning_rate": 2.0425005423093437e-05, "loss": 0.7588, "step": 21055 }, { "epoch": 2.54, "grad_norm": 0.30501341819763184, "learning_rate": 2.037319513611294e-05, "loss": 0.723, "step": 21060 }, { "epoch": 2.54, "grad_norm": 0.29336845874786377, "learning_rate": 2.0321445855811635e-05, "loss": 0.8807, "step": 21065 }, { "epoch": 2.54, "grad_norm": 0.285491019487381, "learning_rate": 2.026975760654439e-05, "loss": 0.8104, "step": 21070 }, { "epoch": 2.54, "grad_norm": 0.27054548263549805, "learning_rate": 2.0218130412637397e-05, "loss": 0.7187, "step": 21075 }, { "epoch": 2.54, "grad_norm": 0.28753402829170227, "learning_rate": 2.0166564298388182e-05, "loss": 0.8717, "step": 21080 }, { "epoch": 2.54, "grad_norm": 0.25017327070236206, "learning_rate": 2.0115059288065437e-05, "loss": 0.7553, "step": 21085 }, { "epoch": 2.54, "grad_norm": 0.2623300850391388, "learning_rate": 2.006361540590914e-05, "loss": 0.8769, "step": 21090 }, { "epoch": 2.54, "grad_norm": 0.2736659348011017, "learning_rate": 2.0012232676130503e-05, "loss": 0.8399, "step": 21095 }, { "epoch": 2.54, "grad_norm": 0.3095528483390808, "learning_rate": 1.9960911122911915e-05, "loss": 0.6941, "step": 21100 }, { "epoch": 2.54, "grad_norm": 0.2801484763622284, "learning_rate": 1.990965077040701e-05, "loss": 0.8403, "step": 21105 }, { "epoch": 2.54, "grad_norm": 0.3449302017688751, "learning_rate": 1.9858451642740637e-05, "loss": 0.7571, "step": 21110 }, { "epoch": 2.54, "grad_norm": 0.27275073528289795, "learning_rate": 1.9807313764008738e-05, "loss": 0.8058, "step": 21115 }, { "epoch": 2.54, "grad_norm": 0.3031749129295349, "learning_rate": 1.9756237158278593e-05, "loss": 0.7587, "step": 21120 }, { "epoch": 2.55, "grad_norm": 0.27661368250846863, "learning_rate": 1.9705221849588493e-05, "loss": 0.8447, "step": 21125 }, { "epoch": 2.55, "grad_norm": 0.3269336223602295, "learning_rate": 1.9654267861947946e-05, "loss": 0.8247, "step": 21130 }, { "epoch": 2.55, "grad_norm": 0.2593838572502136, "learning_rate": 1.9603375219337573e-05, "loss": 0.8053, "step": 21135 }, { "epoch": 2.55, "grad_norm": 0.2844938039779663, "learning_rate": 1.9552543945709183e-05, "loss": 0.8455, "step": 21140 }, { "epoch": 2.55, "grad_norm": 0.28493261337280273, "learning_rate": 1.950177406498563e-05, "loss": 0.8471, "step": 21145 }, { "epoch": 2.55, "grad_norm": 0.2901061773300171, "learning_rate": 1.9451065601060927e-05, "loss": 0.6899, "step": 21150 }, { "epoch": 2.55, "grad_norm": 0.27742087841033936, "learning_rate": 1.9400418577800148e-05, "loss": 0.7318, "step": 21155 }, { "epoch": 2.55, "grad_norm": 0.2615971565246582, "learning_rate": 1.9349833019039457e-05, "loss": 0.8078, "step": 21160 }, { "epoch": 2.55, "grad_norm": 0.3021378219127655, "learning_rate": 1.929930894858611e-05, "loss": 0.812, "step": 21165 }, { "epoch": 2.55, "grad_norm": 0.29150012135505676, "learning_rate": 1.924884639021841e-05, "loss": 0.9014, "step": 21170 }, { "epoch": 2.55, "grad_norm": 0.2782303988933563, "learning_rate": 1.9198445367685668e-05, "loss": 0.7307, "step": 21175 }, { "epoch": 2.55, "grad_norm": 0.26657816767692566, "learning_rate": 1.914810590470836e-05, "loss": 0.8146, "step": 21180 }, { "epoch": 2.55, "grad_norm": 0.27062734961509705, "learning_rate": 1.909782802497786e-05, "loss": 0.8376, "step": 21185 }, { "epoch": 2.55, "grad_norm": 0.3097752630710602, "learning_rate": 1.9047611752156628e-05, "loss": 0.7168, "step": 21190 }, { "epoch": 2.55, "grad_norm": 0.2954844534397125, "learning_rate": 1.8997457109878067e-05, "loss": 0.6942, "step": 21195 }, { "epoch": 2.55, "grad_norm": 0.29877805709838867, "learning_rate": 1.8947364121746677e-05, "loss": 0.7444, "step": 21200 }, { "epoch": 2.55, "grad_norm": 0.303915798664093, "learning_rate": 1.8897332811337868e-05, "loss": 0.7517, "step": 21205 }, { "epoch": 2.56, "grad_norm": 0.2891033887863159, "learning_rate": 1.8847363202198017e-05, "loss": 0.7324, "step": 21210 }, { "epoch": 2.56, "grad_norm": 0.27175503969192505, "learning_rate": 1.8797455317844452e-05, "loss": 0.7523, "step": 21215 }, { "epoch": 2.56, "grad_norm": 0.32238414883613586, "learning_rate": 1.8747609181765543e-05, "loss": 0.7668, "step": 21220 }, { "epoch": 2.56, "grad_norm": 0.28903838992118835, "learning_rate": 1.8697824817420514e-05, "loss": 0.796, "step": 21225 }, { "epoch": 2.56, "grad_norm": 0.25564032793045044, "learning_rate": 1.8648102248239537e-05, "loss": 0.8034, "step": 21230 }, { "epoch": 2.56, "grad_norm": 0.30182021856307983, "learning_rate": 1.859844149762371e-05, "loss": 0.7384, "step": 21235 }, { "epoch": 2.56, "grad_norm": 0.33114859461784363, "learning_rate": 1.8548842588945023e-05, "loss": 0.8908, "step": 21240 }, { "epoch": 2.56, "grad_norm": 0.2683655917644501, "learning_rate": 1.8499305545546367e-05, "loss": 0.7694, "step": 21245 }, { "epoch": 2.56, "grad_norm": 0.30291497707366943, "learning_rate": 1.8449830390741526e-05, "loss": 0.8817, "step": 21250 }, { "epoch": 2.56, "grad_norm": 0.2633529007434845, "learning_rate": 1.840041714781511e-05, "loss": 0.737, "step": 21255 }, { "epoch": 2.56, "grad_norm": 0.27262598276138306, "learning_rate": 1.8351065840022715e-05, "loss": 0.7905, "step": 21260 }, { "epoch": 2.56, "grad_norm": 0.28199708461761475, "learning_rate": 1.8301776490590664e-05, "loss": 0.7542, "step": 21265 }, { "epoch": 2.56, "grad_norm": 0.3084707260131836, "learning_rate": 1.8252549122716147e-05, "loss": 0.6572, "step": 21270 }, { "epoch": 2.56, "grad_norm": 0.27886369824409485, "learning_rate": 1.8203383759567186e-05, "loss": 0.7748, "step": 21275 }, { "epoch": 2.56, "grad_norm": 0.2864547669887543, "learning_rate": 1.8154280424282685e-05, "loss": 0.8352, "step": 21280 }, { "epoch": 2.56, "grad_norm": 0.26778092980384827, "learning_rate": 1.8105239139972277e-05, "loss": 0.7049, "step": 21285 }, { "epoch": 2.57, "grad_norm": 0.28581252694129944, "learning_rate": 1.8056259929716417e-05, "loss": 0.7679, "step": 21290 }, { "epoch": 2.57, "grad_norm": 0.30422863364219666, "learning_rate": 1.800734281656633e-05, "loss": 0.7548, "step": 21295 }, { "epoch": 2.57, "grad_norm": 0.2808743715286255, "learning_rate": 1.7958487823544082e-05, "loss": 0.7986, "step": 21300 }, { "epoch": 2.57, "grad_norm": 0.3032390773296356, "learning_rate": 1.790969497364242e-05, "loss": 0.8185, "step": 21305 }, { "epoch": 2.57, "grad_norm": 0.2701072096824646, "learning_rate": 1.7860964289824885e-05, "loss": 0.7967, "step": 21310 }, { "epoch": 2.57, "grad_norm": 0.28802967071533203, "learning_rate": 1.7812295795025773e-05, "loss": 0.7764, "step": 21315 }, { "epoch": 2.57, "grad_norm": 0.2924157381057739, "learning_rate": 1.776368951215007e-05, "loss": 0.7171, "step": 21320 }, { "epoch": 2.57, "grad_norm": 0.2745770812034607, "learning_rate": 1.7715145464073498e-05, "loss": 0.806, "step": 21325 }, { "epoch": 2.57, "grad_norm": 0.2635575830936432, "learning_rate": 1.7666663673642535e-05, "loss": 0.7911, "step": 21330 }, { "epoch": 2.57, "grad_norm": 0.2602815330028534, "learning_rate": 1.761824416367427e-05, "loss": 0.8123, "step": 21335 }, { "epoch": 2.57, "grad_norm": 0.30612412095069885, "learning_rate": 1.7569886956956585e-05, "loss": 0.8126, "step": 21340 }, { "epoch": 2.57, "grad_norm": 0.2764699161052704, "learning_rate": 1.752159207624797e-05, "loss": 0.8279, "step": 21345 }, { "epoch": 2.57, "grad_norm": 0.2635042667388916, "learning_rate": 1.7473359544277594e-05, "loss": 0.6854, "step": 21350 }, { "epoch": 2.57, "grad_norm": 0.25210273265838623, "learning_rate": 1.7425189383745265e-05, "loss": 0.8533, "step": 21355 }, { "epoch": 2.57, "grad_norm": 0.26929739117622375, "learning_rate": 1.7377081617321513e-05, "loss": 0.7162, "step": 21360 }, { "epoch": 2.57, "grad_norm": 0.27640727162361145, "learning_rate": 1.7329036267647417e-05, "loss": 0.7429, "step": 21365 }, { "epoch": 2.57, "grad_norm": 0.2647760510444641, "learning_rate": 1.7281053357334717e-05, "loss": 0.8501, "step": 21370 }, { "epoch": 2.58, "grad_norm": 0.28919151425361633, "learning_rate": 1.7233132908965784e-05, "loss": 0.7655, "step": 21375 }, { "epoch": 2.58, "grad_norm": 0.295827180147171, "learning_rate": 1.7185274945093547e-05, "loss": 0.8074, "step": 21380 }, { "epoch": 2.58, "grad_norm": 0.2644157409667969, "learning_rate": 1.713747948824155e-05, "loss": 0.8458, "step": 21385 }, { "epoch": 2.58, "grad_norm": 0.2902376055717468, "learning_rate": 1.7089746560903956e-05, "loss": 0.8744, "step": 21390 }, { "epoch": 2.58, "grad_norm": 0.25895097851753235, "learning_rate": 1.7042076185545388e-05, "loss": 0.7331, "step": 21395 }, { "epoch": 2.58, "grad_norm": 0.2989761233329773, "learning_rate": 1.6994468384601206e-05, "loss": 0.7485, "step": 21400 }, { "epoch": 2.58, "grad_norm": 0.26505047082901, "learning_rate": 1.6946923180477183e-05, "loss": 0.7844, "step": 21405 }, { "epoch": 2.58, "grad_norm": 0.26280462741851807, "learning_rate": 1.6899440595549674e-05, "loss": 0.7854, "step": 21410 }, { "epoch": 2.58, "grad_norm": 0.2874717712402344, "learning_rate": 1.6852020652165515e-05, "loss": 0.7819, "step": 21415 }, { "epoch": 2.58, "grad_norm": 0.28164345026016235, "learning_rate": 1.6804663372642175e-05, "loss": 0.7315, "step": 21420 }, { "epoch": 2.58, "grad_norm": 0.2647458016872406, "learning_rate": 1.6757368779267522e-05, "loss": 0.8445, "step": 21425 }, { "epoch": 2.58, "grad_norm": 0.26932641863822937, "learning_rate": 1.6710136894299987e-05, "loss": 0.763, "step": 21430 }, { "epoch": 2.58, "grad_norm": 0.2817295491695404, "learning_rate": 1.6662967739968402e-05, "loss": 0.8031, "step": 21435 }, { "epoch": 2.58, "grad_norm": 0.303753137588501, "learning_rate": 1.661586133847223e-05, "loss": 0.6969, "step": 21440 }, { "epoch": 2.58, "grad_norm": 0.27712610363960266, "learning_rate": 1.656881771198125e-05, "loss": 0.8331, "step": 21445 }, { "epoch": 2.58, "grad_norm": 0.28993546962738037, "learning_rate": 1.6521836882635775e-05, "loss": 0.7937, "step": 21450 }, { "epoch": 2.59, "grad_norm": 0.2543039619922638, "learning_rate": 1.6474918872546545e-05, "loss": 0.7681, "step": 21455 }, { "epoch": 2.59, "grad_norm": 0.3048137426376343, "learning_rate": 1.6428063703794714e-05, "loss": 0.8169, "step": 21460 }, { "epoch": 2.59, "grad_norm": 0.30479833483695984, "learning_rate": 1.6381271398431894e-05, "loss": 0.7523, "step": 21465 }, { "epoch": 2.59, "grad_norm": 0.311480849981308, "learning_rate": 1.6334541978480087e-05, "loss": 0.7312, "step": 21470 }, { "epoch": 2.59, "grad_norm": 0.2890738844871521, "learning_rate": 1.6287875465931698e-05, "loss": 0.746, "step": 21475 }, { "epoch": 2.59, "grad_norm": 0.2721187472343445, "learning_rate": 1.624127188274958e-05, "loss": 0.827, "step": 21480 }, { "epoch": 2.59, "grad_norm": 0.2833477556705475, "learning_rate": 1.61947312508669e-05, "loss": 0.7932, "step": 21485 }, { "epoch": 2.59, "grad_norm": 0.28032922744750977, "learning_rate": 1.614825359218724e-05, "loss": 0.7861, "step": 21490 }, { "epoch": 2.59, "grad_norm": 0.2863568067550659, "learning_rate": 1.6101838928584483e-05, "loss": 0.6808, "step": 21495 }, { "epoch": 2.59, "grad_norm": 0.31328871846199036, "learning_rate": 1.6055487281902973e-05, "loss": 0.8056, "step": 21500 }, { "epoch": 2.59, "grad_norm": 0.28002050518989563, "learning_rate": 1.6009198673957313e-05, "loss": 0.84, "step": 21505 }, { "epoch": 2.59, "grad_norm": 0.28880271315574646, "learning_rate": 1.5962973126532457e-05, "loss": 0.8734, "step": 21510 }, { "epoch": 2.59, "grad_norm": 0.2959395945072174, "learning_rate": 1.5916810661383638e-05, "loss": 0.7527, "step": 21515 }, { "epoch": 2.59, "grad_norm": 0.28988808393478394, "learning_rate": 1.5870711300236527e-05, "loss": 0.6993, "step": 21520 }, { "epoch": 2.59, "grad_norm": 0.3125680983066559, "learning_rate": 1.5824675064786968e-05, "loss": 0.7884, "step": 21525 }, { "epoch": 2.59, "grad_norm": 0.31437787413597107, "learning_rate": 1.577870197670118e-05, "loss": 0.6855, "step": 21530 }, { "epoch": 2.59, "grad_norm": 0.2919071316719055, "learning_rate": 1.573279205761554e-05, "loss": 0.8805, "step": 21535 }, { "epoch": 2.6, "grad_norm": 0.288947194814682, "learning_rate": 1.5686945329136865e-05, "loss": 0.7189, "step": 21540 }, { "epoch": 2.6, "grad_norm": 0.28742465376853943, "learning_rate": 1.5641161812842105e-05, "loss": 0.8336, "step": 21545 }, { "epoch": 2.6, "grad_norm": 0.2684331238269806, "learning_rate": 1.5595441530278517e-05, "loss": 0.7574, "step": 21550 }, { "epoch": 2.6, "grad_norm": 0.29591086506843567, "learning_rate": 1.5549784502963554e-05, "loss": 0.7995, "step": 21555 }, { "epoch": 2.6, "grad_norm": 0.25457319617271423, "learning_rate": 1.5504190752384987e-05, "loss": 0.7078, "step": 21560 }, { "epoch": 2.6, "grad_norm": 0.28611642122268677, "learning_rate": 1.5458660300000725e-05, "loss": 0.7801, "step": 21565 }, { "epoch": 2.6, "grad_norm": 0.2762448489665985, "learning_rate": 1.5413193167238908e-05, "loss": 0.7721, "step": 21570 }, { "epoch": 2.6, "grad_norm": 0.29619264602661133, "learning_rate": 1.5367789375497836e-05, "loss": 0.8263, "step": 21575 }, { "epoch": 2.6, "grad_norm": 0.27710941433906555, "learning_rate": 1.5322448946146114e-05, "loss": 0.9136, "step": 21580 }, { "epoch": 2.6, "grad_norm": 0.2905004322528839, "learning_rate": 1.5277171900522428e-05, "loss": 0.7735, "step": 21585 }, { "epoch": 2.6, "grad_norm": 0.26848629117012024, "learning_rate": 1.5231958259935646e-05, "loss": 0.7816, "step": 21590 }, { "epoch": 2.6, "grad_norm": 0.28664660453796387, "learning_rate": 1.5186808045664812e-05, "loss": 0.7792, "step": 21595 }, { "epoch": 2.6, "grad_norm": 0.3380061984062195, "learning_rate": 1.5141721278959112e-05, "loss": 0.7895, "step": 21600 }, { "epoch": 2.6, "grad_norm": 0.26987069845199585, "learning_rate": 1.5096697981037909e-05, "loss": 0.7762, "step": 21605 }, { "epoch": 2.6, "grad_norm": 0.2945181131362915, "learning_rate": 1.5051738173090582e-05, "loss": 0.7818, "step": 21610 }, { "epoch": 2.6, "grad_norm": 0.30163273215293884, "learning_rate": 1.500684187627681e-05, "loss": 0.7005, "step": 21615 }, { "epoch": 2.6, "grad_norm": 0.28872594237327576, "learning_rate": 1.496200911172622e-05, "loss": 0.7485, "step": 21620 }, { "epoch": 2.61, "grad_norm": 0.2600472569465637, "learning_rate": 1.491723990053862e-05, "loss": 0.7653, "step": 21625 }, { "epoch": 2.61, "grad_norm": 0.2789260745048523, "learning_rate": 1.487253426378387e-05, "loss": 0.6609, "step": 21630 }, { "epoch": 2.61, "grad_norm": 0.23983633518218994, "learning_rate": 1.482789222250194e-05, "loss": 0.7225, "step": 21635 }, { "epoch": 2.61, "grad_norm": 0.27560335397720337, "learning_rate": 1.4783313797702878e-05, "loss": 0.8625, "step": 21640 }, { "epoch": 2.61, "grad_norm": 0.2681236267089844, "learning_rate": 1.4738799010366747e-05, "loss": 0.7163, "step": 21645 }, { "epoch": 2.61, "grad_norm": 0.2975192070007324, "learning_rate": 1.4694347881443659e-05, "loss": 0.7637, "step": 21650 }, { "epoch": 2.61, "grad_norm": 0.3074178397655487, "learning_rate": 1.4649960431853842e-05, "loss": 0.7499, "step": 21655 }, { "epoch": 2.61, "grad_norm": 0.28553348779678345, "learning_rate": 1.4605636682487486e-05, "loss": 0.8284, "step": 21660 }, { "epoch": 2.61, "grad_norm": 0.29183608293533325, "learning_rate": 1.456137665420481e-05, "loss": 0.7162, "step": 21665 }, { "epoch": 2.61, "grad_norm": 0.2632047235965729, "learning_rate": 1.4517180367836062e-05, "loss": 0.8519, "step": 21670 }, { "epoch": 2.61, "grad_norm": 0.28933554887771606, "learning_rate": 1.4473047844181474e-05, "loss": 0.7592, "step": 21675 }, { "epoch": 2.61, "grad_norm": 0.30451327562332153, "learning_rate": 1.4428979104011295e-05, "loss": 0.7692, "step": 21680 }, { "epoch": 2.61, "grad_norm": 0.27428263425827026, "learning_rate": 1.4384974168065705e-05, "loss": 0.7196, "step": 21685 }, { "epoch": 2.61, "grad_norm": 0.2668101191520691, "learning_rate": 1.4341033057054885e-05, "loss": 0.7442, "step": 21690 }, { "epoch": 2.61, "grad_norm": 0.27039745450019836, "learning_rate": 1.4297155791659044e-05, "loss": 0.7293, "step": 21695 }, { "epoch": 2.61, "grad_norm": 0.2836028039455414, "learning_rate": 1.4253342392528227e-05, "loss": 0.8122, "step": 21700 }, { "epoch": 2.62, "grad_norm": 0.26885733008384705, "learning_rate": 1.4209592880282494e-05, "loss": 0.6921, "step": 21705 }, { "epoch": 2.62, "grad_norm": 0.274857759475708, "learning_rate": 1.4165907275511773e-05, "loss": 0.7754, "step": 21710 }, { "epoch": 2.62, "grad_norm": 0.29404956102371216, "learning_rate": 1.4122285598776035e-05, "loss": 0.7997, "step": 21715 }, { "epoch": 2.62, "grad_norm": 0.29239895939826965, "learning_rate": 1.4078727870605056e-05, "loss": 0.6922, "step": 21720 }, { "epoch": 2.62, "grad_norm": 0.2807917892932892, "learning_rate": 1.4035234111498539e-05, "loss": 0.8521, "step": 21725 }, { "epoch": 2.62, "grad_norm": 0.26643988490104675, "learning_rate": 1.3991804341926077e-05, "loss": 0.8503, "step": 21730 }, { "epoch": 2.62, "grad_norm": 0.2644132375717163, "learning_rate": 1.394843858232722e-05, "loss": 0.7678, "step": 21735 }, { "epoch": 2.62, "grad_norm": 0.2644047141075134, "learning_rate": 1.390513685311131e-05, "loss": 0.8423, "step": 21740 }, { "epoch": 2.62, "grad_norm": 0.2851656675338745, "learning_rate": 1.3861899174657542e-05, "loss": 0.7707, "step": 21745 }, { "epoch": 2.62, "grad_norm": 0.2632659673690796, "learning_rate": 1.381872556731501e-05, "loss": 0.8024, "step": 21750 }, { "epoch": 2.62, "grad_norm": 0.2766389846801758, "learning_rate": 1.3775616051402689e-05, "loss": 0.7274, "step": 21755 }, { "epoch": 2.62, "grad_norm": 0.3009210228919983, "learning_rate": 1.3732570647209334e-05, "loss": 0.6989, "step": 21760 }, { "epoch": 2.62, "grad_norm": 0.2986798584461212, "learning_rate": 1.3689589374993526e-05, "loss": 0.7043, "step": 21765 }, { "epoch": 2.62, "grad_norm": 0.2332366406917572, "learning_rate": 1.3646672254983649e-05, "loss": 0.7085, "step": 21770 }, { "epoch": 2.62, "grad_norm": 0.2506479322910309, "learning_rate": 1.3603819307378011e-05, "loss": 0.8323, "step": 21775 }, { "epoch": 2.62, "grad_norm": 0.2667887806892395, "learning_rate": 1.3561030552344566e-05, "loss": 0.8099, "step": 21780 }, { "epoch": 2.62, "grad_norm": 0.26609328389167786, "learning_rate": 1.3518306010021152e-05, "loss": 0.805, "step": 21785 }, { "epoch": 2.63, "grad_norm": 0.2911878824234009, "learning_rate": 1.3475645700515319e-05, "loss": 0.8367, "step": 21790 }, { "epoch": 2.63, "grad_norm": 0.2831841707229614, "learning_rate": 1.3433049643904476e-05, "loss": 0.8377, "step": 21795 }, { "epoch": 2.63, "grad_norm": 0.2804235517978668, "learning_rate": 1.3390517860235717e-05, "loss": 0.8026, "step": 21800 }, { "epoch": 2.63, "grad_norm": 0.28793367743492126, "learning_rate": 1.3348050369525931e-05, "loss": 0.7157, "step": 21805 }, { "epoch": 2.63, "grad_norm": 0.28661012649536133, "learning_rate": 1.3305647191761699e-05, "loss": 0.729, "step": 21810 }, { "epoch": 2.63, "grad_norm": 0.2898370027542114, "learning_rate": 1.3263308346899371e-05, "loss": 0.7045, "step": 21815 }, { "epoch": 2.63, "grad_norm": 0.26269808411598206, "learning_rate": 1.3221033854865027e-05, "loss": 0.7042, "step": 21820 }, { "epoch": 2.63, "grad_norm": 0.29441672563552856, "learning_rate": 1.3178823735554434e-05, "loss": 0.7476, "step": 21825 }, { "epoch": 2.63, "grad_norm": 0.25631454586982727, "learning_rate": 1.3136678008833069e-05, "loss": 0.8577, "step": 21830 }, { "epoch": 2.63, "grad_norm": 0.32068613171577454, "learning_rate": 1.309459669453613e-05, "loss": 0.8105, "step": 21835 }, { "epoch": 2.63, "grad_norm": 0.23884128034114838, "learning_rate": 1.3052579812468494e-05, "loss": 0.7782, "step": 21840 }, { "epoch": 2.63, "grad_norm": 0.3259485363960266, "learning_rate": 1.3010627382404675e-05, "loss": 0.8539, "step": 21845 }, { "epoch": 2.63, "grad_norm": 0.30076032876968384, "learning_rate": 1.2968739424088848e-05, "loss": 0.818, "step": 21850 }, { "epoch": 2.63, "grad_norm": 0.26317545771598816, "learning_rate": 1.2926915957234957e-05, "loss": 0.766, "step": 21855 }, { "epoch": 2.63, "grad_norm": 0.23999077081680298, "learning_rate": 1.2885157001526459e-05, "loss": 0.7286, "step": 21860 }, { "epoch": 2.63, "grad_norm": 0.23871669173240662, "learning_rate": 1.2843462576616531e-05, "loss": 0.7398, "step": 21865 }, { "epoch": 2.64, "grad_norm": 0.25987398624420166, "learning_rate": 1.2801832702127912e-05, "loss": 0.7984, "step": 21870 }, { "epoch": 2.64, "grad_norm": 0.25374579429626465, "learning_rate": 1.2760267397653063e-05, "loss": 0.7778, "step": 21875 }, { "epoch": 2.64, "grad_norm": 0.3066677749156952, "learning_rate": 1.2718766682753966e-05, "loss": 0.8205, "step": 21880 }, { "epoch": 2.64, "grad_norm": 0.2812236547470093, "learning_rate": 1.2677330576962235e-05, "loss": 0.8151, "step": 21885 }, { "epoch": 2.64, "grad_norm": 0.2993893027305603, "learning_rate": 1.263595909977907e-05, "loss": 0.7551, "step": 21890 }, { "epoch": 2.64, "grad_norm": 0.2752331495285034, "learning_rate": 1.2594652270675293e-05, "loss": 0.8551, "step": 21895 }, { "epoch": 2.64, "grad_norm": 0.29263609647750854, "learning_rate": 1.2553410109091221e-05, "loss": 0.846, "step": 21900 }, { "epoch": 2.64, "grad_norm": 0.28177276253700256, "learning_rate": 1.251223263443683e-05, "loss": 0.7788, "step": 21905 }, { "epoch": 2.64, "grad_norm": 0.3226982355117798, "learning_rate": 1.2479337242326714e-05, "loss": 0.7536, "step": 21910 }, { "epoch": 2.64, "grad_norm": 0.29417189955711365, "learning_rate": 1.2438276252961555e-05, "loss": 0.7601, "step": 21915 }, { "epoch": 2.64, "grad_norm": 0.26856791973114014, "learning_rate": 1.2397280004711845e-05, "loss": 0.719, "step": 21920 }, { "epoch": 2.64, "grad_norm": 0.2941911518573761, "learning_rate": 1.2356348516871839e-05, "loss": 0.8129, "step": 21925 }, { "epoch": 2.64, "grad_norm": 0.3045237064361572, "learning_rate": 1.2315481808705224e-05, "loss": 0.8177, "step": 21930 }, { "epoch": 2.64, "grad_norm": 0.26458317041397095, "learning_rate": 1.2274679899445234e-05, "loss": 0.8406, "step": 21935 }, { "epoch": 2.64, "grad_norm": 0.3065710663795471, "learning_rate": 1.2233942808294573e-05, "loss": 0.7369, "step": 21940 }, { "epoch": 2.64, "grad_norm": 0.28398415446281433, "learning_rate": 1.2193270554425521e-05, "loss": 0.8903, "step": 21945 }, { "epoch": 2.64, "grad_norm": 0.29608169198036194, "learning_rate": 1.215266315697977e-05, "loss": 0.7931, "step": 21950 }, { "epoch": 2.65, "grad_norm": 0.2909848093986511, "learning_rate": 1.2112120635068495e-05, "loss": 0.8049, "step": 21955 }, { "epoch": 2.65, "grad_norm": 0.28937968611717224, "learning_rate": 1.2071643007772353e-05, "loss": 0.7373, "step": 21960 }, { "epoch": 2.65, "grad_norm": 0.2677673399448395, "learning_rate": 1.2031230294141486e-05, "loss": 0.7057, "step": 21965 }, { "epoch": 2.65, "grad_norm": 0.27190762758255005, "learning_rate": 1.199088251319545e-05, "loss": 0.7522, "step": 21970 }, { "epoch": 2.65, "grad_norm": 0.2613365650177002, "learning_rate": 1.1950599683923234e-05, "loss": 0.8177, "step": 21975 }, { "epoch": 2.65, "grad_norm": 0.27314963936805725, "learning_rate": 1.1910381825283294e-05, "loss": 0.7326, "step": 21980 }, { "epoch": 2.65, "grad_norm": 0.23859882354736328, "learning_rate": 1.1870228956203487e-05, "loss": 0.7208, "step": 21985 }, { "epoch": 2.65, "grad_norm": 0.2868862450122833, "learning_rate": 1.183014109558108e-05, "loss": 0.7957, "step": 21990 }, { "epoch": 2.65, "grad_norm": 0.2559562027454376, "learning_rate": 1.1790118262282715e-05, "loss": 0.7123, "step": 21995 }, { "epoch": 2.65, "grad_norm": 0.30129274725914, "learning_rate": 1.1750160475144543e-05, "loss": 0.7947, "step": 22000 }, { "epoch": 2.65, "grad_norm": 0.26090654730796814, "learning_rate": 1.171026775297197e-05, "loss": 0.6892, "step": 22005 }, { "epoch": 2.65, "grad_norm": 0.28930899500846863, "learning_rate": 1.1670440114539858e-05, "loss": 0.6845, "step": 22010 }, { "epoch": 2.65, "grad_norm": 0.3468032777309418, "learning_rate": 1.1630677578592401e-05, "loss": 0.7648, "step": 22015 }, { "epoch": 2.65, "grad_norm": 0.2959340810775757, "learning_rate": 1.1590980163843194e-05, "loss": 0.8748, "step": 22020 }, { "epoch": 2.65, "grad_norm": 0.28836220502853394, "learning_rate": 1.1551347888975126e-05, "loss": 0.7935, "step": 22025 }, { "epoch": 2.65, "grad_norm": 0.29400843381881714, "learning_rate": 1.1511780772640494e-05, "loss": 0.8172, "step": 22030 }, { "epoch": 2.65, "grad_norm": 0.28731659054756165, "learning_rate": 1.1472278833460886e-05, "loss": 0.7621, "step": 22035 }, { "epoch": 2.66, "grad_norm": 0.26251596212387085, "learning_rate": 1.1432842090027227e-05, "loss": 0.7288, "step": 22040 }, { "epoch": 2.66, "grad_norm": 0.2831023037433624, "learning_rate": 1.1393470560899742e-05, "loss": 0.799, "step": 22045 }, { "epoch": 2.66, "grad_norm": 0.28475290536880493, "learning_rate": 1.135416426460799e-05, "loss": 0.8506, "step": 22050 }, { "epoch": 2.66, "grad_norm": 0.31778624653816223, "learning_rate": 1.1314923219650807e-05, "loss": 0.7712, "step": 22055 }, { "epoch": 2.66, "grad_norm": 0.28575217723846436, "learning_rate": 1.1275747444496353e-05, "loss": 0.783, "step": 22060 }, { "epoch": 2.66, "grad_norm": 0.3009990155696869, "learning_rate": 1.1236636957582062e-05, "loss": 0.7318, "step": 22065 }, { "epoch": 2.66, "grad_norm": 0.27844470739364624, "learning_rate": 1.1197591777314597e-05, "loss": 0.8126, "step": 22070 }, { "epoch": 2.66, "grad_norm": 0.2680925130844116, "learning_rate": 1.1158611922069904e-05, "loss": 0.7173, "step": 22075 }, { "epoch": 2.66, "grad_norm": 0.2805362343788147, "learning_rate": 1.1119697410193246e-05, "loss": 0.8169, "step": 22080 }, { "epoch": 2.66, "grad_norm": 0.2496698796749115, "learning_rate": 1.1080848259999054e-05, "loss": 0.7369, "step": 22085 }, { "epoch": 2.66, "grad_norm": 0.2835550606250763, "learning_rate": 1.1042064489771035e-05, "loss": 0.7064, "step": 22090 }, { "epoch": 2.66, "grad_norm": 0.29828065633773804, "learning_rate": 1.100334611776209e-05, "loss": 0.7352, "step": 22095 }, { "epoch": 2.66, "grad_norm": 0.28538069128990173, "learning_rate": 1.0964693162194427e-05, "loss": 0.6496, "step": 22100 }, { "epoch": 2.66, "grad_norm": 0.2641838490962982, "learning_rate": 1.0926105641259392e-05, "loss": 0.7463, "step": 22105 }, { "epoch": 2.66, "grad_norm": 0.2766837775707245, "learning_rate": 1.0887583573117526e-05, "loss": 0.806, "step": 22110 }, { "epoch": 2.66, "grad_norm": 0.28771060705184937, "learning_rate": 1.0849126975898626e-05, "loss": 0.9518, "step": 22115 }, { "epoch": 2.67, "grad_norm": 0.2707420587539673, "learning_rate": 1.0810735867701614e-05, "loss": 0.7166, "step": 22120 }, { "epoch": 2.67, "grad_norm": 0.2899439036846161, "learning_rate": 1.077241026659464e-05, "loss": 0.7041, "step": 22125 }, { "epoch": 2.67, "grad_norm": 0.25429069995880127, "learning_rate": 1.0734150190615005e-05, "loss": 0.7543, "step": 22130 }, { "epoch": 2.67, "grad_norm": 0.29701635241508484, "learning_rate": 1.069595565776914e-05, "loss": 0.7908, "step": 22135 }, { "epoch": 2.67, "grad_norm": 0.27902600169181824, "learning_rate": 1.06578266860327e-05, "loss": 0.8859, "step": 22140 }, { "epoch": 2.67, "grad_norm": 0.29597392678260803, "learning_rate": 1.0619763293350447e-05, "loss": 0.865, "step": 22145 }, { "epoch": 2.67, "grad_norm": 0.24774394929409027, "learning_rate": 1.0581765497636253e-05, "loss": 0.767, "step": 22150 }, { "epoch": 2.67, "grad_norm": 0.2774536907672882, "learning_rate": 1.0543833316773127e-05, "loss": 0.8292, "step": 22155 }, { "epoch": 2.67, "grad_norm": 0.2544390857219696, "learning_rate": 1.0505966768613273e-05, "loss": 0.8584, "step": 22160 }, { "epoch": 2.67, "grad_norm": 0.2902020215988159, "learning_rate": 1.0468165870977901e-05, "loss": 0.6859, "step": 22165 }, { "epoch": 2.67, "grad_norm": 0.292767196893692, "learning_rate": 1.0430430641657383e-05, "loss": 0.8134, "step": 22170 }, { "epoch": 2.67, "grad_norm": 0.2644735872745514, "learning_rate": 1.0392761098411146e-05, "loss": 0.6928, "step": 22175 }, { "epoch": 2.67, "grad_norm": 0.26982855796813965, "learning_rate": 1.0355157258967772e-05, "loss": 0.7652, "step": 22180 }, { "epoch": 2.67, "grad_norm": 0.2815554141998291, "learning_rate": 1.0317619141024858e-05, "loss": 0.8082, "step": 22185 }, { "epoch": 2.67, "grad_norm": 0.25484582781791687, "learning_rate": 1.028014676224907e-05, "loss": 0.7836, "step": 22190 }, { "epoch": 2.67, "grad_norm": 0.26840999722480774, "learning_rate": 1.0242740140276185e-05, "loss": 0.7332, "step": 22195 }, { "epoch": 2.67, "grad_norm": 0.27440059185028076, "learning_rate": 1.0205399292710969e-05, "loss": 0.7333, "step": 22200 }, { "epoch": 2.68, "grad_norm": 0.28358691930770874, "learning_rate": 1.0168124237127301e-05, "loss": 0.7612, "step": 22205 }, { "epoch": 2.68, "grad_norm": 0.2975500226020813, "learning_rate": 1.0130914991068028e-05, "loss": 0.7863, "step": 22210 }, { "epoch": 2.68, "grad_norm": 0.3028092682361603, "learning_rate": 1.0093771572045045e-05, "loss": 0.6993, "step": 22215 }, { "epoch": 2.68, "grad_norm": 0.28230762481689453, "learning_rate": 1.0056693997539317e-05, "loss": 0.7698, "step": 22220 }, { "epoch": 2.68, "grad_norm": 0.2743557393550873, "learning_rate": 1.0019682285000785e-05, "loss": 0.7347, "step": 22225 }, { "epoch": 2.68, "grad_norm": 0.25398826599121094, "learning_rate": 9.982736451848367e-06, "loss": 0.7027, "step": 22230 }, { "epoch": 2.68, "grad_norm": 0.2597667872905731, "learning_rate": 9.94585651546997e-06, "loss": 0.7767, "step": 22235 }, { "epoch": 2.68, "grad_norm": 0.2996768355369568, "learning_rate": 9.90904249322259e-06, "loss": 0.8047, "step": 22240 }, { "epoch": 2.68, "grad_norm": 0.3190852105617523, "learning_rate": 9.872294402432074e-06, "loss": 0.7218, "step": 22245 }, { "epoch": 2.68, "grad_norm": 0.27263718843460083, "learning_rate": 9.835612260393316e-06, "loss": 0.8415, "step": 22250 }, { "epoch": 2.68, "grad_norm": 0.29362040758132935, "learning_rate": 9.798996084370143e-06, "loss": 0.8255, "step": 22255 }, { "epoch": 2.68, "grad_norm": 0.30126357078552246, "learning_rate": 9.76244589159535e-06, "loss": 0.726, "step": 22260 }, { "epoch": 2.68, "grad_norm": 0.28834068775177, "learning_rate": 9.725961699270662e-06, "loss": 0.8232, "step": 22265 }, { "epoch": 2.68, "grad_norm": 0.2588998079299927, "learning_rate": 9.689543524566746e-06, "loss": 0.8129, "step": 22270 }, { "epoch": 2.68, "grad_norm": 0.24092987179756165, "learning_rate": 9.653191384623204e-06, "loss": 0.7258, "step": 22275 }, { "epoch": 2.68, "grad_norm": 0.2920580506324768, "learning_rate": 9.616905296548588e-06, "loss": 0.863, "step": 22280 }, { "epoch": 2.69, "grad_norm": 0.2646152079105377, "learning_rate": 9.58068527742032e-06, "loss": 0.7227, "step": 22285 }, { "epoch": 2.69, "grad_norm": 0.26156240701675415, "learning_rate": 9.544531344284745e-06, "loss": 0.7387, "step": 22290 }, { "epoch": 2.69, "grad_norm": 0.259880930185318, "learning_rate": 9.50844351415707e-06, "loss": 0.8297, "step": 22295 }, { "epoch": 2.69, "grad_norm": 0.2808079421520233, "learning_rate": 9.47242180402148e-06, "loss": 0.8567, "step": 22300 }, { "epoch": 2.69, "grad_norm": 0.2962208688259125, "learning_rate": 9.436466230830958e-06, "loss": 0.6969, "step": 22305 }, { "epoch": 2.69, "grad_norm": 0.30381715297698975, "learning_rate": 9.40057681150742e-06, "loss": 0.6078, "step": 22310 }, { "epoch": 2.69, "grad_norm": 0.29196277260780334, "learning_rate": 9.364753562941556e-06, "loss": 0.7343, "step": 22315 }, { "epoch": 2.69, "grad_norm": 0.2667737603187561, "learning_rate": 9.328996501993047e-06, "loss": 0.8018, "step": 22320 }, { "epoch": 2.69, "grad_norm": 0.27238935232162476, "learning_rate": 9.29330564549033e-06, "loss": 0.7812, "step": 22325 }, { "epoch": 2.69, "grad_norm": 0.28509876132011414, "learning_rate": 9.257681010230683e-06, "loss": 0.8695, "step": 22330 }, { "epoch": 2.69, "grad_norm": 0.2799687683582306, "learning_rate": 9.222122612980281e-06, "loss": 0.8006, "step": 22335 }, { "epoch": 2.69, "grad_norm": 0.2903033494949341, "learning_rate": 9.186630470474054e-06, "loss": 0.8644, "step": 22340 }, { "epoch": 2.69, "grad_norm": 0.2779753506183624, "learning_rate": 9.15120459941579e-06, "loss": 0.7324, "step": 22345 }, { "epoch": 2.69, "grad_norm": 0.29276126623153687, "learning_rate": 9.1158450164781e-06, "loss": 0.7755, "step": 22350 }, { "epoch": 2.69, "grad_norm": 0.2905157804489136, "learning_rate": 9.080551738302328e-06, "loss": 0.7065, "step": 22355 }, { "epoch": 2.69, "grad_norm": 0.2716805636882782, "learning_rate": 9.04532478149873e-06, "loss": 0.8302, "step": 22360 }, { "epoch": 2.69, "grad_norm": 0.2544827461242676, "learning_rate": 9.010164162646249e-06, "loss": 0.8856, "step": 22365 }, { "epoch": 2.7, "grad_norm": 0.30418241024017334, "learning_rate": 8.975069898292647e-06, "loss": 0.8106, "step": 22370 }, { "epoch": 2.7, "grad_norm": 0.2704138457775116, "learning_rate": 8.940042004954412e-06, "loss": 0.8389, "step": 22375 }, { "epoch": 2.7, "grad_norm": 0.28807222843170166, "learning_rate": 8.90508049911689e-06, "loss": 0.7696, "step": 22380 }, { "epoch": 2.7, "grad_norm": 0.28206029534339905, "learning_rate": 8.870185397234086e-06, "loss": 0.7857, "step": 22385 }, { "epoch": 2.7, "grad_norm": 0.26948267221450806, "learning_rate": 8.83535671572883e-06, "loss": 0.8041, "step": 22390 }, { "epoch": 2.7, "grad_norm": 0.2844690680503845, "learning_rate": 8.800594470992611e-06, "loss": 0.8143, "step": 22395 }, { "epoch": 2.7, "grad_norm": 0.28157171607017517, "learning_rate": 8.765898679385742e-06, "loss": 0.7778, "step": 22400 }, { "epoch": 2.7, "grad_norm": 0.28640446066856384, "learning_rate": 8.731269357237192e-06, "loss": 0.8495, "step": 22405 }, { "epoch": 2.7, "grad_norm": 0.28528541326522827, "learning_rate": 8.696706520844693e-06, "loss": 0.8586, "step": 22410 }, { "epoch": 2.7, "grad_norm": 0.2665455937385559, "learning_rate": 8.6622101864746e-06, "loss": 0.759, "step": 22415 }, { "epoch": 2.7, "grad_norm": 0.2542749047279358, "learning_rate": 8.627780370362108e-06, "loss": 0.7185, "step": 22420 }, { "epoch": 2.7, "grad_norm": 0.2605261504650116, "learning_rate": 8.593417088710992e-06, "loss": 0.8301, "step": 22425 }, { "epoch": 2.7, "grad_norm": 0.27719512581825256, "learning_rate": 8.55912035769376e-06, "loss": 0.8109, "step": 22430 }, { "epoch": 2.7, "grad_norm": 0.27308791875839233, "learning_rate": 8.524890193451573e-06, "loss": 0.722, "step": 22435 }, { "epoch": 2.7, "grad_norm": 0.26642659306526184, "learning_rate": 8.490726612094323e-06, "loss": 0.7022, "step": 22440 }, { "epoch": 2.7, "grad_norm": 0.2815400958061218, "learning_rate": 8.456629629700518e-06, "loss": 0.887, "step": 22445 }, { "epoch": 2.7, "grad_norm": 0.27426496148109436, "learning_rate": 8.422599262317303e-06, "loss": 0.7021, "step": 22450 }, { "epoch": 2.71, "grad_norm": 0.27988290786743164, "learning_rate": 8.388635525960503e-06, "loss": 0.7661, "step": 22455 }, { "epoch": 2.71, "grad_norm": 0.31152406334877014, "learning_rate": 8.35473843661461e-06, "loss": 0.7652, "step": 22460 }, { "epoch": 2.71, "grad_norm": 0.2869008481502533, "learning_rate": 8.320908010232702e-06, "loss": 0.7633, "step": 22465 }, { "epoch": 2.71, "grad_norm": 0.28940802812576294, "learning_rate": 8.287144262736506e-06, "loss": 0.749, "step": 22470 }, { "epoch": 2.71, "grad_norm": 0.27460524439811707, "learning_rate": 8.253447210016363e-06, "loss": 0.7667, "step": 22475 }, { "epoch": 2.71, "grad_norm": 0.2848700284957886, "learning_rate": 8.219816867931218e-06, "loss": 0.7629, "step": 22480 }, { "epoch": 2.71, "grad_norm": 0.2719096541404724, "learning_rate": 8.18625325230861e-06, "loss": 0.7007, "step": 22485 }, { "epoch": 2.71, "grad_norm": 0.2773694396018982, "learning_rate": 8.152756378944708e-06, "loss": 0.7708, "step": 22490 }, { "epoch": 2.71, "grad_norm": 0.2669026851654053, "learning_rate": 8.119326263604281e-06, "loss": 0.8003, "step": 22495 }, { "epoch": 2.71, "grad_norm": 0.27180686593055725, "learning_rate": 8.085962922020611e-06, "loss": 0.8165, "step": 22500 }, { "epoch": 2.71, "grad_norm": 0.2707700729370117, "learning_rate": 8.052666369895622e-06, "loss": 0.7817, "step": 22505 }, { "epoch": 2.71, "grad_norm": 0.2820728123188019, "learning_rate": 8.019436622899727e-06, "loss": 0.7616, "step": 22510 }, { "epoch": 2.71, "grad_norm": 0.27186763286590576, "learning_rate": 7.986273696672019e-06, "loss": 0.7928, "step": 22515 }, { "epoch": 2.71, "grad_norm": 0.29205748438835144, "learning_rate": 7.953177606820044e-06, "loss": 0.8722, "step": 22520 }, { "epoch": 2.71, "grad_norm": 0.2753778398036957, "learning_rate": 7.92014836891991e-06, "loss": 0.6978, "step": 22525 }, { "epoch": 2.71, "grad_norm": 0.2816515564918518, "learning_rate": 7.887185998516266e-06, "loss": 0.7766, "step": 22530 }, { "epoch": 2.72, "grad_norm": 0.2690742313861847, "learning_rate": 7.85429051112232e-06, "loss": 0.7389, "step": 22535 }, { "epoch": 2.72, "grad_norm": 0.26442471146583557, "learning_rate": 7.821461922219769e-06, "loss": 0.8625, "step": 22540 }, { "epoch": 2.72, "grad_norm": 0.2712923288345337, "learning_rate": 7.788700247258855e-06, "loss": 0.734, "step": 22545 }, { "epoch": 2.72, "grad_norm": 0.2940865159034729, "learning_rate": 7.756005501658297e-06, "loss": 0.7485, "step": 22550 }, { "epoch": 2.72, "grad_norm": 0.25925499200820923, "learning_rate": 7.723377700805316e-06, "loss": 0.7913, "step": 22555 }, { "epoch": 2.72, "grad_norm": 0.23880963027477264, "learning_rate": 7.690816860055648e-06, "loss": 0.7763, "step": 22560 }, { "epoch": 2.72, "grad_norm": 0.2328793704509735, "learning_rate": 7.658322994733517e-06, "loss": 0.7291, "step": 22565 }, { "epoch": 2.72, "grad_norm": 0.23459170758724213, "learning_rate": 7.625896120131575e-06, "loss": 0.8022, "step": 22570 }, { "epoch": 2.72, "grad_norm": 0.26686781644821167, "learning_rate": 7.593536251511045e-06, "loss": 0.751, "step": 22575 }, { "epoch": 2.72, "grad_norm": 0.2697281837463379, "learning_rate": 7.5612434041015305e-06, "loss": 0.737, "step": 22580 }, { "epoch": 2.72, "grad_norm": 0.30781927704811096, "learning_rate": 7.529017593101105e-06, "loss": 0.8416, "step": 22585 }, { "epoch": 2.72, "grad_norm": 0.2810058891773224, "learning_rate": 7.496858833676306e-06, "loss": 0.7729, "step": 22590 }, { "epoch": 2.72, "grad_norm": 0.24234548211097717, "learning_rate": 7.464767140962124e-06, "loss": 0.7394, "step": 22595 }, { "epoch": 2.72, "grad_norm": 0.263785719871521, "learning_rate": 7.432742530061997e-06, "loss": 0.8007, "step": 22600 }, { "epoch": 2.72, "grad_norm": 0.27168524265289307, "learning_rate": 7.400785016047733e-06, "loss": 0.8045, "step": 22605 }, { "epoch": 2.72, "grad_norm": 0.2800976634025574, "learning_rate": 7.3688946139596025e-06, "loss": 0.8114, "step": 22610 }, { "epoch": 2.72, "grad_norm": 0.29521191120147705, "learning_rate": 7.3370713388063165e-06, "loss": 0.7672, "step": 22615 }, { "epoch": 2.73, "grad_norm": 0.27929726243019104, "learning_rate": 7.30531520556496e-06, "loss": 0.7258, "step": 22620 }, { "epoch": 2.73, "grad_norm": 0.25764504075050354, "learning_rate": 7.273626229180995e-06, "loss": 0.8177, "step": 22625 }, { "epoch": 2.73, "grad_norm": 0.26366397738456726, "learning_rate": 7.242004424568309e-06, "loss": 0.7124, "step": 22630 }, { "epoch": 2.73, "grad_norm": 0.30640193819999695, "learning_rate": 7.210449806609197e-06, "loss": 0.8062, "step": 22635 }, { "epoch": 2.73, "grad_norm": 0.2728465795516968, "learning_rate": 7.178962390154314e-06, "loss": 0.8824, "step": 22640 }, { "epoch": 2.73, "grad_norm": 0.3235665559768677, "learning_rate": 7.1475421900226705e-06, "loss": 0.6664, "step": 22645 }, { "epoch": 2.73, "grad_norm": 0.3118913471698761, "learning_rate": 7.116189221001622e-06, "loss": 0.7975, "step": 22650 }, { "epoch": 2.73, "grad_norm": 0.2510865032672882, "learning_rate": 7.084903497846983e-06, "loss": 0.7969, "step": 22655 }, { "epoch": 2.73, "grad_norm": 0.28625842928886414, "learning_rate": 7.053685035282808e-06, "loss": 0.7862, "step": 22660 }, { "epoch": 2.73, "grad_norm": 0.30692657828330994, "learning_rate": 7.02253384800156e-06, "loss": 0.6927, "step": 22665 }, { "epoch": 2.73, "grad_norm": 0.30167925357818604, "learning_rate": 6.9914499506640135e-06, "loss": 0.7441, "step": 22670 }, { "epoch": 2.73, "grad_norm": 0.30318981409072876, "learning_rate": 6.960433357899281e-06, "loss": 0.8208, "step": 22675 }, { "epoch": 2.73, "grad_norm": 0.268376886844635, "learning_rate": 6.929484084304837e-06, "loss": 0.732, "step": 22680 }, { "epoch": 2.73, "grad_norm": 0.26787877082824707, "learning_rate": 6.89860214444638e-06, "loss": 0.7454, "step": 22685 }, { "epoch": 2.73, "grad_norm": 0.29671844840049744, "learning_rate": 6.8677875528580176e-06, "loss": 0.767, "step": 22690 }, { "epoch": 2.73, "grad_norm": 0.27219846844673157, "learning_rate": 6.8370403240421146e-06, "loss": 0.7894, "step": 22695 }, { "epoch": 2.74, "grad_norm": 0.3147854804992676, "learning_rate": 6.806360472469313e-06, "loss": 0.8088, "step": 22700 }, { "epoch": 2.74, "grad_norm": 0.27097561955451965, "learning_rate": 6.775748012578597e-06, "loss": 0.756, "step": 22705 }, { "epoch": 2.74, "grad_norm": 0.25546738505363464, "learning_rate": 6.745202958777174e-06, "loss": 0.8471, "step": 22710 }, { "epoch": 2.74, "grad_norm": 0.27763062715530396, "learning_rate": 6.714725325440595e-06, "loss": 0.7465, "step": 22715 }, { "epoch": 2.74, "grad_norm": 0.265898197889328, "learning_rate": 6.684315126912654e-06, "loss": 0.7873, "step": 22720 }, { "epoch": 2.74, "grad_norm": 0.292896032333374, "learning_rate": 6.653972377505368e-06, "loss": 0.7363, "step": 22725 }, { "epoch": 2.74, "grad_norm": 0.2958504557609558, "learning_rate": 6.623697091499031e-06, "loss": 0.7821, "step": 22730 }, { "epoch": 2.74, "grad_norm": 0.28079283237457275, "learning_rate": 6.5934892831422616e-06, "loss": 0.7395, "step": 22735 }, { "epoch": 2.74, "grad_norm": 0.26290813088417053, "learning_rate": 6.563348966651805e-06, "loss": 0.7934, "step": 22740 }, { "epoch": 2.74, "grad_norm": 0.3071194291114807, "learning_rate": 6.533276156212697e-06, "loss": 0.8421, "step": 22745 }, { "epoch": 2.74, "grad_norm": 0.2782292366027832, "learning_rate": 6.503270865978216e-06, "loss": 0.749, "step": 22750 }, { "epoch": 2.74, "grad_norm": 0.2784491181373596, "learning_rate": 6.47333311006985e-06, "loss": 0.7861, "step": 22755 }, { "epoch": 2.74, "grad_norm": 0.2715974450111389, "learning_rate": 6.443462902577296e-06, "loss": 0.8529, "step": 22760 }, { "epoch": 2.74, "grad_norm": 0.2865915894508362, "learning_rate": 6.413660257558473e-06, "loss": 0.8501, "step": 22765 }, { "epoch": 2.74, "grad_norm": 0.30358344316482544, "learning_rate": 6.383925189039479e-06, "loss": 0.7397, "step": 22770 }, { "epoch": 2.74, "grad_norm": 0.2950074076652527, "learning_rate": 6.3542577110146375e-06, "loss": 0.781, "step": 22775 }, { "epoch": 2.74, "grad_norm": 0.2862589955329895, "learning_rate": 6.324657837446445e-06, "loss": 0.7687, "step": 22780 }, { "epoch": 2.75, "grad_norm": 0.2698042392730713, "learning_rate": 6.295125582265575e-06, "loss": 0.8329, "step": 22785 }, { "epoch": 2.75, "grad_norm": 0.2783750295639038, "learning_rate": 6.265660959370894e-06, "loss": 0.7668, "step": 22790 }, { "epoch": 2.75, "grad_norm": 0.2662694752216339, "learning_rate": 6.236263982629441e-06, "loss": 0.7561, "step": 22795 }, { "epoch": 2.75, "grad_norm": 0.35889652371406555, "learning_rate": 6.206934665876417e-06, "loss": 0.8031, "step": 22800 }, { "epoch": 2.75, "grad_norm": 0.2936451733112335, "learning_rate": 6.177673022915147e-06, "loss": 0.7609, "step": 22805 }, { "epoch": 2.75, "grad_norm": 0.299941748380661, "learning_rate": 6.1484790675171315e-06, "loss": 0.6878, "step": 22810 }, { "epoch": 2.75, "grad_norm": 0.30326518416404724, "learning_rate": 6.119352813422046e-06, "loss": 0.8334, "step": 22815 }, { "epoch": 2.75, "grad_norm": 0.2659267783164978, "learning_rate": 6.09029427433766e-06, "loss": 0.7561, "step": 22820 }, { "epoch": 2.75, "grad_norm": 0.2844538986682892, "learning_rate": 6.061303463939882e-06, "loss": 0.8201, "step": 22825 }, { "epoch": 2.75, "grad_norm": 0.274046391248703, "learning_rate": 6.032380395872732e-06, "loss": 0.7778, "step": 22830 }, { "epoch": 2.75, "grad_norm": 0.25698041915893555, "learning_rate": 6.003525083748406e-06, "loss": 0.7376, "step": 22835 }, { "epoch": 2.75, "grad_norm": 0.2826564908027649, "learning_rate": 5.97473754114714e-06, "loss": 0.8031, "step": 22840 }, { "epoch": 2.75, "grad_norm": 0.2923043370246887, "learning_rate": 5.946017781617329e-06, "loss": 0.7715, "step": 22845 }, { "epoch": 2.75, "grad_norm": 0.2728569507598877, "learning_rate": 5.917365818675396e-06, "loss": 0.8772, "step": 22850 }, { "epoch": 2.75, "grad_norm": 0.2695605754852295, "learning_rate": 5.888781665805986e-06, "loss": 0.9072, "step": 22855 }, { "epoch": 2.75, "grad_norm": 0.25165680050849915, "learning_rate": 5.860265336461689e-06, "loss": 0.6898, "step": 22860 }, { "epoch": 2.75, "grad_norm": 0.2680535614490509, "learning_rate": 5.8318168440632695e-06, "loss": 0.8437, "step": 22865 }, { "epoch": 2.76, "grad_norm": 0.28424060344696045, "learning_rate": 5.8034362019995e-06, "loss": 0.7131, "step": 22870 }, { "epoch": 2.76, "grad_norm": 0.25301888585090637, "learning_rate": 5.775123423627298e-06, "loss": 0.7976, "step": 22875 }, { "epoch": 2.76, "grad_norm": 0.27347350120544434, "learning_rate": 5.746878522271553e-06, "loss": 0.711, "step": 22880 }, { "epoch": 2.76, "grad_norm": 0.26733747124671936, "learning_rate": 5.718701511225299e-06, "loss": 0.6827, "step": 22885 }, { "epoch": 2.76, "grad_norm": 0.2814013361930847, "learning_rate": 5.690592403749511e-06, "loss": 0.7927, "step": 22890 }, { "epoch": 2.76, "grad_norm": 0.264848530292511, "learning_rate": 5.662551213073324e-06, "loss": 0.9032, "step": 22895 }, { "epoch": 2.76, "grad_norm": 0.28370270133018494, "learning_rate": 5.634577952393848e-06, "loss": 0.7843, "step": 22900 }, { "epoch": 2.76, "grad_norm": 0.2731534242630005, "learning_rate": 5.606672634876203e-06, "loss": 0.8433, "step": 22905 }, { "epoch": 2.76, "grad_norm": 0.24351301789283752, "learning_rate": 5.578835273653581e-06, "loss": 0.7116, "step": 22910 }, { "epoch": 2.76, "grad_norm": 0.29447489976882935, "learning_rate": 5.551065881827138e-06, "loss": 0.7504, "step": 22915 }, { "epoch": 2.76, "grad_norm": 0.2870446443557739, "learning_rate": 5.523364472466118e-06, "loss": 0.7319, "step": 22920 }, { "epoch": 2.76, "grad_norm": 0.2608989179134369, "learning_rate": 5.495731058607677e-06, "loss": 0.8248, "step": 22925 }, { "epoch": 2.76, "grad_norm": 0.2600563168525696, "learning_rate": 5.468165653257028e-06, "loss": 0.8477, "step": 22930 }, { "epoch": 2.76, "grad_norm": 0.2777158319950104, "learning_rate": 5.440668269387394e-06, "loss": 0.7868, "step": 22935 }, { "epoch": 2.76, "grad_norm": 0.2984352707862854, "learning_rate": 5.4132389199399384e-06, "loss": 0.7796, "step": 22940 }, { "epoch": 2.76, "grad_norm": 0.3199765682220459, "learning_rate": 5.385877617823819e-06, "loss": 0.8571, "step": 22945 }, { "epoch": 2.77, "grad_norm": 0.2830100953578949, "learning_rate": 5.35858437591617e-06, "loss": 0.775, "step": 22950 }, { "epoch": 2.77, "grad_norm": 0.23226149380207062, "learning_rate": 5.33135920706213e-06, "loss": 0.706, "step": 22955 }, { "epoch": 2.77, "grad_norm": 0.28286078572273254, "learning_rate": 5.304202124074736e-06, "loss": 0.7191, "step": 22960 }, { "epoch": 2.77, "grad_norm": 0.3073217570781708, "learning_rate": 5.277113139735012e-06, "loss": 0.7106, "step": 22965 }, { "epoch": 2.77, "grad_norm": 0.2731364667415619, "learning_rate": 5.250092266791944e-06, "loss": 0.7427, "step": 22970 }, { "epoch": 2.77, "grad_norm": 0.2613232135772705, "learning_rate": 5.22313951796246e-06, "loss": 0.7188, "step": 22975 }, { "epoch": 2.77, "grad_norm": 0.25765460729599, "learning_rate": 5.196254905931413e-06, "loss": 0.9263, "step": 22980 }, { "epoch": 2.77, "grad_norm": 0.29852619767189026, "learning_rate": 5.169438443351581e-06, "loss": 0.6889, "step": 22985 }, { "epoch": 2.77, "grad_norm": 0.2636108100414276, "learning_rate": 5.142690142843703e-06, "loss": 0.6934, "step": 22990 }, { "epoch": 2.77, "grad_norm": 0.2629612684249878, "learning_rate": 5.116010016996392e-06, "loss": 0.8041, "step": 22995 }, { "epoch": 2.77, "grad_norm": 0.2794385552406311, "learning_rate": 5.089398078366219e-06, "loss": 0.8116, "step": 23000 }, { "epoch": 2.77, "grad_norm": 0.2664497494697571, "learning_rate": 5.062854339477634e-06, "loss": 0.8767, "step": 23005 }, { "epoch": 2.77, "grad_norm": 0.30518075823783875, "learning_rate": 5.036378812823028e-06, "loss": 0.7819, "step": 23010 }, { "epoch": 2.77, "grad_norm": 0.2723018229007721, "learning_rate": 5.0099715108626485e-06, "loss": 0.82, "step": 23015 }, { "epoch": 2.77, "grad_norm": 0.27372339367866516, "learning_rate": 4.983632446024638e-06, "loss": 0.8301, "step": 23020 }, { "epoch": 2.77, "grad_norm": 0.29022863507270813, "learning_rate": 4.957361630705031e-06, "loss": 0.7167, "step": 23025 }, { "epoch": 2.77, "grad_norm": 0.2727348804473877, "learning_rate": 4.931159077267771e-06, "loss": 0.8002, "step": 23030 }, { "epoch": 2.78, "grad_norm": 0.2919345200061798, "learning_rate": 4.905024798044627e-06, "loss": 0.7175, "step": 23035 }, { "epoch": 2.78, "grad_norm": 0.25965866446495056, "learning_rate": 4.878958805335276e-06, "loss": 0.7755, "step": 23040 }, { "epoch": 2.78, "grad_norm": 0.4287550151348114, "learning_rate": 4.852961111407239e-06, "loss": 0.787, "step": 23045 }, { "epoch": 2.78, "grad_norm": 0.30621278285980225, "learning_rate": 4.827031728495878e-06, "loss": 0.8296, "step": 23050 }, { "epoch": 2.78, "grad_norm": 0.29937830567359924, "learning_rate": 4.801170668804433e-06, "loss": 0.7147, "step": 23055 }, { "epoch": 2.78, "grad_norm": 0.2686162292957306, "learning_rate": 4.775377944503983e-06, "loss": 0.8084, "step": 23060 }, { "epoch": 2.78, "grad_norm": 0.2906171381473541, "learning_rate": 4.749653567733402e-06, "loss": 0.7937, "step": 23065 }, { "epoch": 2.78, "grad_norm": 0.3064448833465576, "learning_rate": 4.72399755059949e-06, "loss": 0.758, "step": 23070 }, { "epoch": 2.78, "grad_norm": 0.3103048801422119, "learning_rate": 4.698409905176803e-06, "loss": 0.7798, "step": 23075 }, { "epoch": 2.78, "grad_norm": 0.3002449870109558, "learning_rate": 4.672890643507727e-06, "loss": 0.8077, "step": 23080 }, { "epoch": 2.78, "grad_norm": 0.26185083389282227, "learning_rate": 4.647439777602469e-06, "loss": 0.6943, "step": 23085 }, { "epoch": 2.78, "grad_norm": 0.2598731517791748, "learning_rate": 4.6220573194390655e-06, "loss": 0.7912, "step": 23090 }, { "epoch": 2.78, "grad_norm": 0.283272385597229, "learning_rate": 4.596743280963344e-06, "loss": 0.7841, "step": 23095 }, { "epoch": 2.78, "grad_norm": 0.29078394174575806, "learning_rate": 4.571497674088925e-06, "loss": 0.8184, "step": 23100 }, { "epoch": 2.78, "grad_norm": 0.26592543721199036, "learning_rate": 4.546320510697221e-06, "loss": 0.7237, "step": 23105 }, { "epoch": 2.78, "grad_norm": 0.2742617130279541, "learning_rate": 4.52121180263747e-06, "loss": 0.7937, "step": 23110 }, { "epoch": 2.79, "grad_norm": 0.2708317041397095, "learning_rate": 4.496171561726636e-06, "loss": 0.8146, "step": 23115 }, { "epoch": 2.79, "grad_norm": 0.2601563036441803, "learning_rate": 4.471199799749508e-06, "loss": 0.719, "step": 23120 }, { "epoch": 2.79, "grad_norm": 0.2519415318965912, "learning_rate": 4.446296528458604e-06, "loss": 0.8397, "step": 23125 }, { "epoch": 2.79, "grad_norm": 0.2795381247997284, "learning_rate": 4.421461759574247e-06, "loss": 0.708, "step": 23130 }, { "epoch": 2.79, "grad_norm": 0.2789604067802429, "learning_rate": 4.396695504784503e-06, "loss": 0.7077, "step": 23135 }, { "epoch": 2.79, "grad_norm": 0.30929040908813477, "learning_rate": 4.371997775745184e-06, "loss": 0.6999, "step": 23140 }, { "epoch": 2.79, "grad_norm": 0.2771584689617157, "learning_rate": 4.347368584079858e-06, "loss": 0.8967, "step": 23145 }, { "epoch": 2.79, "grad_norm": 0.290021687746048, "learning_rate": 4.322807941379869e-06, "loss": 0.7981, "step": 23150 }, { "epoch": 2.79, "grad_norm": 0.2715446352958679, "learning_rate": 4.298315859204254e-06, "loss": 0.7353, "step": 23155 }, { "epoch": 2.79, "grad_norm": 0.27552729845046997, "learning_rate": 4.273892349079794e-06, "loss": 0.8716, "step": 23160 }, { "epoch": 2.79, "grad_norm": 0.2995644509792328, "learning_rate": 4.249537422500992e-06, "loss": 0.7564, "step": 23165 }, { "epoch": 2.79, "grad_norm": 0.3228721022605896, "learning_rate": 4.225251090930132e-06, "loss": 0.7422, "step": 23170 }, { "epoch": 2.79, "grad_norm": 0.2784271538257599, "learning_rate": 4.201033365797119e-06, "loss": 0.7439, "step": 23175 }, { "epoch": 2.79, "grad_norm": 0.2869255840778351, "learning_rate": 4.176884258499652e-06, "loss": 0.8764, "step": 23180 }, { "epoch": 2.79, "grad_norm": 0.2846572697162628, "learning_rate": 4.152803780403058e-06, "loss": 0.8168, "step": 23185 }, { "epoch": 2.79, "grad_norm": 0.2854968011379242, "learning_rate": 4.12879194284047e-06, "loss": 0.84, "step": 23190 }, { "epoch": 2.79, "grad_norm": 0.2596077620983124, "learning_rate": 4.104848757112616e-06, "loss": 0.7741, "step": 23195 }, { "epoch": 2.8, "grad_norm": 0.2705884575843811, "learning_rate": 4.080974234487966e-06, "loss": 0.6468, "step": 23200 }, { "epoch": 2.8, "grad_norm": 0.2820831835269928, "learning_rate": 4.057168386202681e-06, "loss": 0.7343, "step": 23205 }, { "epoch": 2.8, "grad_norm": 0.2777920663356781, "learning_rate": 4.033431223460548e-06, "loss": 0.825, "step": 23210 }, { "epoch": 2.8, "grad_norm": 0.25113433599472046, "learning_rate": 4.0097627574330825e-06, "loss": 0.8486, "step": 23215 }, { "epoch": 2.8, "grad_norm": 0.26045823097229004, "learning_rate": 3.9861629992594405e-06, "loss": 0.8892, "step": 23220 }, { "epoch": 2.8, "grad_norm": 0.2702331840991974, "learning_rate": 3.962631960046453e-06, "loss": 0.8179, "step": 23225 }, { "epoch": 2.8, "grad_norm": 0.2872564494609833, "learning_rate": 3.939169650868645e-06, "loss": 0.7651, "step": 23230 }, { "epoch": 2.8, "grad_norm": 0.28898364305496216, "learning_rate": 3.915776082768118e-06, "loss": 0.7968, "step": 23235 }, { "epoch": 2.8, "grad_norm": 0.2918056845664978, "learning_rate": 3.8924512667546645e-06, "loss": 0.7513, "step": 23240 }, { "epoch": 2.8, "grad_norm": 0.28438860177993774, "learning_rate": 3.86919521380572e-06, "loss": 0.8384, "step": 23245 }, { "epoch": 2.8, "grad_norm": 0.29474344849586487, "learning_rate": 3.8460079348663795e-06, "loss": 0.8156, "step": 23250 }, { "epoch": 2.8, "grad_norm": 0.28143933415412903, "learning_rate": 3.82288944084933e-06, "loss": 0.7344, "step": 23255 }, { "epoch": 2.8, "grad_norm": 0.28403791785240173, "learning_rate": 3.7998397426349024e-06, "loss": 0.7604, "step": 23260 }, { "epoch": 2.8, "grad_norm": 0.29978764057159424, "learning_rate": 3.7768588510710353e-06, "loss": 0.7076, "step": 23265 }, { "epoch": 2.8, "grad_norm": 0.2770501375198364, "learning_rate": 3.753946776973327e-06, "loss": 0.8365, "step": 23270 }, { "epoch": 2.8, "grad_norm": 0.27618712186813354, "learning_rate": 3.7311035311249348e-06, "loss": 0.7706, "step": 23275 }, { "epoch": 2.8, "grad_norm": 0.2640831768512726, "learning_rate": 3.7083291242766764e-06, "loss": 0.7948, "step": 23280 }, { "epoch": 2.81, "grad_norm": 0.2883791923522949, "learning_rate": 3.685623567146895e-06, "loss": 0.8043, "step": 23285 }, { "epoch": 2.81, "grad_norm": 0.280841201543808, "learning_rate": 3.6629868704216415e-06, "loss": 0.9078, "step": 23290 }, { "epoch": 2.81, "grad_norm": 0.25826311111450195, "learning_rate": 3.640419044754461e-06, "loss": 0.8755, "step": 23295 }, { "epoch": 2.81, "grad_norm": 0.2830309271812439, "learning_rate": 3.6179201007665413e-06, "loss": 0.8145, "step": 23300 }, { "epoch": 2.81, "grad_norm": 0.290726900100708, "learning_rate": 3.5954900490465956e-06, "loss": 0.8466, "step": 23305 }, { "epoch": 2.81, "grad_norm": 0.28319695591926575, "learning_rate": 3.5731289001509954e-06, "loss": 0.7302, "step": 23310 }, { "epoch": 2.81, "grad_norm": 0.27678796648979187, "learning_rate": 3.5508366646036236e-06, "loss": 0.8587, "step": 23315 }, { "epoch": 2.81, "grad_norm": 0.26364395022392273, "learning_rate": 3.528613352895937e-06, "loss": 0.858, "step": 23320 }, { "epoch": 2.81, "grad_norm": 0.2955693006515503, "learning_rate": 3.506458975486953e-06, "loss": 0.6728, "step": 23325 }, { "epoch": 2.81, "grad_norm": 0.272214412689209, "learning_rate": 3.484373542803298e-06, "loss": 0.794, "step": 23330 }, { "epoch": 2.81, "grad_norm": 0.2752835154533386, "learning_rate": 3.4623570652390743e-06, "loss": 0.6453, "step": 23335 }, { "epoch": 2.81, "grad_norm": 0.28152868151664734, "learning_rate": 3.440409553155993e-06, "loss": 0.7392, "step": 23340 }, { "epoch": 2.81, "grad_norm": 0.25148865580558777, "learning_rate": 3.4185310168832748e-06, "loss": 0.777, "step": 23345 }, { "epoch": 2.81, "grad_norm": 0.2435861974954605, "learning_rate": 3.3967214667176666e-06, "loss": 0.7149, "step": 23350 }, { "epoch": 2.81, "grad_norm": 0.33531370759010315, "learning_rate": 3.3749809129234752e-06, "loss": 0.7284, "step": 23355 }, { "epoch": 2.81, "grad_norm": 0.30802983045578003, "learning_rate": 3.353309365732548e-06, "loss": 0.714, "step": 23360 }, { "epoch": 2.82, "grad_norm": 0.2919432818889618, "learning_rate": 3.3317068353441945e-06, "loss": 0.8152, "step": 23365 }, { "epoch": 2.82, "grad_norm": 0.2593708634376526, "learning_rate": 3.310173331925331e-06, "loss": 0.6821, "step": 23370 }, { "epoch": 2.82, "grad_norm": 0.27010101079940796, "learning_rate": 3.288708865610318e-06, "loss": 0.753, "step": 23375 }, { "epoch": 2.82, "grad_norm": 0.28530481457710266, "learning_rate": 3.267313446501041e-06, "loss": 0.7806, "step": 23380 }, { "epoch": 2.82, "grad_norm": 0.27056190371513367, "learning_rate": 3.245987084666879e-06, "loss": 0.6671, "step": 23385 }, { "epoch": 2.82, "grad_norm": 0.33442938327789307, "learning_rate": 3.2247297901447534e-06, "loss": 0.7138, "step": 23390 }, { "epoch": 2.82, "grad_norm": 0.2769376039505005, "learning_rate": 3.2035415729390613e-06, "loss": 0.838, "step": 23395 }, { "epoch": 2.82, "grad_norm": 0.29188603162765503, "learning_rate": 3.1824224430216425e-06, "loss": 0.7758, "step": 23400 }, { "epoch": 2.82, "grad_norm": 0.27181747555732727, "learning_rate": 3.161372410331897e-06, "loss": 0.8393, "step": 23405 }, { "epoch": 2.82, "grad_norm": 0.2749415338039398, "learning_rate": 3.1403914847766497e-06, "loss": 0.8015, "step": 23410 }, { "epoch": 2.82, "grad_norm": 0.28852447867393494, "learning_rate": 3.1194796762302353e-06, "loss": 0.8061, "step": 23415 }, { "epoch": 2.82, "grad_norm": 0.2778397798538208, "learning_rate": 3.0986369945344312e-06, "loss": 0.829, "step": 23420 }, { "epoch": 2.82, "grad_norm": 0.3023031949996948, "learning_rate": 3.0778634494984912e-06, "loss": 0.7389, "step": 23425 }, { "epoch": 2.82, "grad_norm": 0.3010731339454651, "learning_rate": 3.0571590508991607e-06, "loss": 0.8619, "step": 23430 }, { "epoch": 2.82, "grad_norm": 0.28016921877861023, "learning_rate": 3.0365238084805955e-06, "loss": 0.7831, "step": 23435 }, { "epoch": 2.82, "grad_norm": 0.2581021189689636, "learning_rate": 3.015957731954427e-06, "loss": 0.7122, "step": 23440 }, { "epoch": 2.82, "grad_norm": 0.2601596713066101, "learning_rate": 2.9954608309997296e-06, "loss": 0.784, "step": 23445 }, { "epoch": 2.83, "grad_norm": 0.2719314992427826, "learning_rate": 2.9750331152630535e-06, "loss": 0.7701, "step": 23450 }, { "epoch": 2.83, "grad_norm": 0.25035297870635986, "learning_rate": 2.9546745943583418e-06, "loss": 0.785, "step": 23455 }, { "epoch": 2.83, "grad_norm": 0.2835271656513214, "learning_rate": 2.93438527786698e-06, "loss": 0.7819, "step": 23460 }, { "epoch": 2.83, "grad_norm": 0.2592954635620117, "learning_rate": 2.914165175337796e-06, "loss": 0.7281, "step": 23465 }, { "epoch": 2.83, "grad_norm": 0.29438310861587524, "learning_rate": 2.8940142962870784e-06, "loss": 0.7564, "step": 23470 }, { "epoch": 2.83, "grad_norm": 0.2590184807777405, "learning_rate": 2.873932650198457e-06, "loss": 0.862, "step": 23475 }, { "epoch": 2.83, "grad_norm": 0.2566207945346832, "learning_rate": 2.853920246523023e-06, "loss": 0.7873, "step": 23480 }, { "epoch": 2.83, "grad_norm": 0.2697911560535431, "learning_rate": 2.8339770946793073e-06, "loss": 0.8254, "step": 23485 }, { "epoch": 2.83, "grad_norm": 0.2645513117313385, "learning_rate": 2.814103204053203e-06, "loss": 0.7198, "step": 23490 }, { "epoch": 2.83, "grad_norm": 0.294327974319458, "learning_rate": 2.794298583998028e-06, "loss": 0.8079, "step": 23495 }, { "epoch": 2.83, "grad_norm": 0.26449063420295715, "learning_rate": 2.77456324383446e-06, "loss": 0.6838, "step": 23500 }, { "epoch": 2.83, "grad_norm": 0.27684205770492554, "learning_rate": 2.7548971928506693e-06, "loss": 0.7777, "step": 23505 }, { "epoch": 2.83, "grad_norm": 0.29637137055397034, "learning_rate": 2.7353004403021017e-06, "loss": 0.8073, "step": 23510 }, { "epoch": 2.83, "grad_norm": 0.297203004360199, "learning_rate": 2.7157729954116634e-06, "loss": 0.671, "step": 23515 }, { "epoch": 2.83, "grad_norm": 0.27864888310432434, "learning_rate": 2.6963148673696034e-06, "loss": 0.7304, "step": 23520 }, { "epoch": 2.83, "grad_norm": 0.30311015248298645, "learning_rate": 2.676926065333562e-06, "loss": 0.7151, "step": 23525 }, { "epoch": 2.84, "grad_norm": 0.2594775855541229, "learning_rate": 2.657606598428591e-06, "loss": 0.8637, "step": 23530 }, { "epoch": 2.84, "grad_norm": 0.2853897213935852, "learning_rate": 2.6383564757470168e-06, "loss": 0.7382, "step": 23535 }, { "epoch": 2.84, "grad_norm": 0.27156689763069153, "learning_rate": 2.6191757063486252e-06, "loss": 0.7312, "step": 23540 }, { "epoch": 2.84, "grad_norm": 0.26867052912712097, "learning_rate": 2.6000642992605127e-06, "loss": 0.7705, "step": 23545 }, { "epoch": 2.84, "grad_norm": 0.27880826592445374, "learning_rate": 2.581022263477134e-06, "loss": 0.703, "step": 23550 }, { "epoch": 2.84, "grad_norm": 0.26525866985321045, "learning_rate": 2.5620496079603205e-06, "loss": 0.7129, "step": 23555 }, { "epoch": 2.84, "grad_norm": 0.36034145951271057, "learning_rate": 2.5431463416392296e-06, "loss": 0.7513, "step": 23560 }, { "epoch": 2.84, "grad_norm": 0.28856807947158813, "learning_rate": 2.5243124734103616e-06, "loss": 0.7044, "step": 23565 }, { "epoch": 2.84, "grad_norm": 0.27370092272758484, "learning_rate": 2.5055480121375426e-06, "loss": 0.7817, "step": 23570 }, { "epoch": 2.84, "grad_norm": 0.28846797347068787, "learning_rate": 2.486852966651992e-06, "loss": 0.7372, "step": 23575 }, { "epoch": 2.84, "grad_norm": 0.26241225004196167, "learning_rate": 2.468227345752155e-06, "loss": 0.7602, "step": 23580 }, { "epoch": 2.84, "grad_norm": 0.30139365792274475, "learning_rate": 2.4496711582039365e-06, "loss": 0.7023, "step": 23585 }, { "epoch": 2.84, "grad_norm": 0.29070574045181274, "learning_rate": 2.4311844127404668e-06, "loss": 0.7737, "step": 23590 }, { "epoch": 2.84, "grad_norm": 0.3289770781993866, "learning_rate": 2.41276711806222e-06, "loss": 0.8992, "step": 23595 }, { "epoch": 2.84, "grad_norm": 0.2710113525390625, "learning_rate": 2.394419282836979e-06, "loss": 0.7763, "step": 23600 }, { "epoch": 2.84, "grad_norm": 0.31186643242836, "learning_rate": 2.3761409156998532e-06, "loss": 0.7533, "step": 23605 }, { "epoch": 2.84, "grad_norm": 0.2570800185203552, "learning_rate": 2.357932025253262e-06, "loss": 0.7039, "step": 23610 }, { "epoch": 2.85, "grad_norm": 0.2939087450504303, "learning_rate": 2.3397926200668994e-06, "loss": 0.8794, "step": 23615 }, { "epoch": 2.85, "grad_norm": 0.26302385330200195, "learning_rate": 2.3217227086777533e-06, "loss": 0.6978, "step": 23620 }, { "epoch": 2.85, "grad_norm": 0.27785399556159973, "learning_rate": 2.3037222995901716e-06, "loss": 0.7772, "step": 23625 }, { "epoch": 2.85, "grad_norm": 0.2980126738548279, "learning_rate": 2.2857914012757107e-06, "loss": 0.7843, "step": 23630 }, { "epoch": 2.85, "grad_norm": 0.30063748359680176, "learning_rate": 2.267930022173253e-06, "loss": 0.7381, "step": 23635 }, { "epoch": 2.85, "grad_norm": 0.2854771316051483, "learning_rate": 2.250138170688942e-06, "loss": 0.7182, "step": 23640 }, { "epoch": 2.85, "grad_norm": 0.2724713683128357, "learning_rate": 2.2324158551962457e-06, "loss": 0.8062, "step": 23645 }, { "epoch": 2.85, "grad_norm": 0.2653485834598541, "learning_rate": 2.214763084035842e-06, "loss": 0.6749, "step": 23650 }, { "epoch": 2.85, "grad_norm": 0.27311810851097107, "learning_rate": 2.197179865515736e-06, "loss": 0.6745, "step": 23655 }, { "epoch": 2.85, "grad_norm": 0.3039811849594116, "learning_rate": 2.179666207911157e-06, "loss": 0.7771, "step": 23660 }, { "epoch": 2.85, "grad_norm": 0.30827796459198, "learning_rate": 2.1622221194646294e-06, "loss": 0.7983, "step": 23665 }, { "epoch": 2.85, "grad_norm": 0.27435705065727234, "learning_rate": 2.144847608385919e-06, "loss": 0.9009, "step": 23670 }, { "epoch": 2.85, "grad_norm": 0.2926265299320221, "learning_rate": 2.1275426828520347e-06, "loss": 0.7032, "step": 23675 }, { "epoch": 2.85, "grad_norm": 0.30964577198028564, "learning_rate": 2.110307351007262e-06, "loss": 0.7401, "step": 23680 }, { "epoch": 2.85, "grad_norm": 0.2811949849128723, "learning_rate": 2.0931416209631126e-06, "loss": 0.6413, "step": 23685 }, { "epoch": 2.85, "grad_norm": 0.2701903283596039, "learning_rate": 2.07604550079834e-06, "loss": 0.8361, "step": 23690 }, { "epoch": 2.85, "grad_norm": 0.26354488730430603, "learning_rate": 2.0590189985589755e-06, "loss": 0.8271, "step": 23695 }, { "epoch": 2.86, "grad_norm": 0.2724680006504059, "learning_rate": 2.0420621222582255e-06, "loss": 0.7632, "step": 23700 }, { "epoch": 2.86, "grad_norm": 0.28979775309562683, "learning_rate": 2.02517487987659e-06, "loss": 0.7434, "step": 23705 }, { "epoch": 2.86, "grad_norm": 0.2960839867591858, "learning_rate": 2.0083572793617274e-06, "loss": 0.717, "step": 23710 }, { "epoch": 2.86, "grad_norm": 0.25892457365989685, "learning_rate": 1.9916093286285904e-06, "loss": 0.8393, "step": 23715 }, { "epoch": 2.86, "grad_norm": 0.2716827690601349, "learning_rate": 1.9749310355592907e-06, "loss": 0.8344, "step": 23720 }, { "epoch": 2.86, "grad_norm": 0.25879597663879395, "learning_rate": 1.958322408003232e-06, "loss": 0.7689, "step": 23725 }, { "epoch": 2.86, "grad_norm": 0.2955610752105713, "learning_rate": 1.9417834537769463e-06, "loss": 0.7773, "step": 23730 }, { "epoch": 2.86, "grad_norm": 0.2859145402908325, "learning_rate": 1.925314180664239e-06, "loss": 0.7632, "step": 23735 }, { "epoch": 2.86, "grad_norm": 0.28816118836402893, "learning_rate": 1.9089145964160614e-06, "loss": 0.8108, "step": 23740 }, { "epoch": 2.86, "grad_norm": 0.2830142080783844, "learning_rate": 1.892584708750655e-06, "loss": 0.7739, "step": 23745 }, { "epoch": 2.86, "grad_norm": 0.28230804204940796, "learning_rate": 1.876324525353373e-06, "loss": 0.8458, "step": 23750 }, { "epoch": 2.86, "grad_norm": 0.28245803713798523, "learning_rate": 1.8601340538767938e-06, "loss": 0.7592, "step": 23755 }, { "epoch": 2.86, "grad_norm": 0.29182082414627075, "learning_rate": 1.8440133019407056e-06, "loss": 0.701, "step": 23760 }, { "epoch": 2.86, "grad_norm": 0.2928227186203003, "learning_rate": 1.8279622771320723e-06, "loss": 0.7389, "step": 23765 }, { "epoch": 2.86, "grad_norm": 0.2605980932712555, "learning_rate": 1.8119809870050351e-06, "loss": 0.8737, "step": 23770 }, { "epoch": 2.86, "grad_norm": 0.27855974435806274, "learning_rate": 1.7960694390809105e-06, "loss": 0.8722, "step": 23775 }, { "epoch": 2.87, "grad_norm": 0.2521946430206299, "learning_rate": 1.7802276408482086e-06, "loss": 0.8727, "step": 23780 }, { "epoch": 2.87, "grad_norm": 0.2708735167980194, "learning_rate": 1.7644555997626153e-06, "loss": 0.844, "step": 23785 }, { "epoch": 2.87, "grad_norm": 0.26479655504226685, "learning_rate": 1.7487533232469597e-06, "loss": 0.801, "step": 23790 }, { "epoch": 2.87, "grad_norm": 0.32824069261550903, "learning_rate": 1.7331208186912472e-06, "loss": 0.7282, "step": 23795 }, { "epoch": 2.87, "grad_norm": 0.2727472186088562, "learning_rate": 1.7175580934526756e-06, "loss": 0.7332, "step": 23800 }, { "epoch": 2.87, "grad_norm": 0.2697487771511078, "learning_rate": 1.7020651548555863e-06, "loss": 0.8685, "step": 23805 }, { "epoch": 2.87, "grad_norm": 0.3158542513847351, "learning_rate": 1.6866420101914468e-06, "loss": 0.7077, "step": 23810 }, { "epoch": 2.87, "grad_norm": 0.2666032612323761, "learning_rate": 1.6712886667189173e-06, "loss": 0.784, "step": 23815 }, { "epoch": 2.87, "grad_norm": 0.28167471289634705, "learning_rate": 1.6560051316637678e-06, "loss": 0.6687, "step": 23820 }, { "epoch": 2.87, "grad_norm": 0.2855972945690155, "learning_rate": 1.640791412218978e-06, "loss": 0.8986, "step": 23825 }, { "epoch": 2.87, "grad_norm": 0.28779542446136475, "learning_rate": 1.6256475155446037e-06, "loss": 0.7325, "step": 23830 }, { "epoch": 2.87, "grad_norm": 0.27318522334098816, "learning_rate": 1.61057344876786e-06, "loss": 0.783, "step": 23835 }, { "epoch": 2.87, "grad_norm": 0.29986461997032166, "learning_rate": 1.595569218983106e-06, "loss": 0.8758, "step": 23840 }, { "epoch": 2.87, "grad_norm": 0.2557889223098755, "learning_rate": 1.5806348332518593e-06, "loss": 0.78, "step": 23845 }, { "epoch": 2.87, "grad_norm": 0.26995155215263367, "learning_rate": 1.5657702986026976e-06, "loss": 0.8988, "step": 23850 }, { "epoch": 2.87, "grad_norm": 0.2958948612213135, "learning_rate": 1.550975622031375e-06, "loss": 0.7705, "step": 23855 }, { "epoch": 2.87, "grad_norm": 0.27343350648880005, "learning_rate": 1.5362508105007721e-06, "loss": 0.8335, "step": 23860 }, { "epoch": 2.88, "grad_norm": 0.34859418869018555, "learning_rate": 1.5215958709408783e-06, "loss": 0.8144, "step": 23865 }, { "epoch": 2.88, "grad_norm": 0.26644057035446167, "learning_rate": 1.50701081024876e-06, "loss": 0.7872, "step": 23870 }, { "epoch": 2.88, "grad_norm": 0.26625731587409973, "learning_rate": 1.4924956352886763e-06, "loss": 0.769, "step": 23875 }, { "epoch": 2.88, "grad_norm": 0.28103673458099365, "learning_rate": 1.478050352891913e-06, "loss": 0.8232, "step": 23880 }, { "epoch": 2.88, "grad_norm": 0.3763957619667053, "learning_rate": 1.463674969856915e-06, "loss": 0.7883, "step": 23885 }, { "epoch": 2.88, "grad_norm": 0.28736910223960876, "learning_rate": 1.4493694929492206e-06, "loss": 0.722, "step": 23890 }, { "epoch": 2.88, "grad_norm": 0.2688366770744324, "learning_rate": 1.4351339289014608e-06, "loss": 0.9024, "step": 23895 }, { "epoch": 2.88, "grad_norm": 0.27338284254074097, "learning_rate": 1.420968284413343e-06, "loss": 0.8657, "step": 23900 }, { "epoch": 2.88, "grad_norm": 0.2942239046096802, "learning_rate": 1.4068725661517343e-06, "loss": 0.7344, "step": 23905 }, { "epoch": 2.88, "grad_norm": 0.26615390181541443, "learning_rate": 1.392846780750495e-06, "loss": 0.7641, "step": 23910 }, { "epoch": 2.88, "grad_norm": 0.27061372995376587, "learning_rate": 1.378890934810678e-06, "loss": 0.7982, "step": 23915 }, { "epoch": 2.88, "grad_norm": 0.2560684084892273, "learning_rate": 1.3650050349003294e-06, "loss": 0.7434, "step": 23920 }, { "epoch": 2.88, "grad_norm": 0.2993808388710022, "learning_rate": 1.3511890875546217e-06, "loss": 0.7741, "step": 23925 }, { "epoch": 2.88, "grad_norm": 0.2594783902168274, "learning_rate": 1.3374430992758033e-06, "loss": 0.827, "step": 23930 }, { "epoch": 2.88, "grad_norm": 0.31000006198883057, "learning_rate": 1.3237670765331998e-06, "loss": 0.7506, "step": 23935 }, { "epoch": 2.88, "grad_norm": 0.3279038071632385, "learning_rate": 1.310161025763179e-06, "loss": 0.7283, "step": 23940 }, { "epoch": 2.89, "grad_norm": 0.2829456925392151, "learning_rate": 1.2966249533692352e-06, "loss": 0.7266, "step": 23945 }, { "epoch": 2.89, "grad_norm": 0.2958797812461853, "learning_rate": 1.2831588657218728e-06, "loss": 0.6592, "step": 23950 }, { "epoch": 2.89, "grad_norm": 0.2799309194087982, "learning_rate": 1.2697627691586887e-06, "loss": 0.7966, "step": 23955 }, { "epoch": 2.89, "grad_norm": 0.3030032217502594, "learning_rate": 1.2564366699842899e-06, "loss": 0.8914, "step": 23960 }, { "epoch": 2.89, "grad_norm": 0.32187244296073914, "learning_rate": 1.2431805744704426e-06, "loss": 0.8033, "step": 23965 }, { "epoch": 2.89, "grad_norm": 0.2817818820476532, "learning_rate": 1.2299944888558732e-06, "loss": 0.781, "step": 23970 }, { "epoch": 2.89, "grad_norm": 0.27116790413856506, "learning_rate": 1.2194960316631197e-06, "loss": 0.7533, "step": 23975 }, { "epoch": 2.89, "grad_norm": 0.3104703426361084, "learning_rate": 1.2064359794837553e-06, "loss": 0.8843, "step": 23980 }, { "epoch": 2.89, "grad_norm": 0.27435311675071716, "learning_rate": 1.1934459544969023e-06, "loss": 0.8259, "step": 23985 }, { "epoch": 2.89, "grad_norm": 0.29726967215538025, "learning_rate": 1.1805259628160867e-06, "loss": 0.7628, "step": 23990 }, { "epoch": 2.89, "grad_norm": 0.2689824104309082, "learning_rate": 1.167676010521912e-06, "loss": 0.8803, "step": 23995 }, { "epoch": 2.89, "grad_norm": 0.27188640832901, "learning_rate": 1.154896103661973e-06, "loss": 0.8005, "step": 24000 }, { "epoch": 2.89, "grad_norm": 0.2570241689682007, "learning_rate": 1.1421862482509093e-06, "loss": 0.7015, "step": 24005 }, { "epoch": 2.89, "grad_norm": 0.27499955892562866, "learning_rate": 1.1295464502704187e-06, "loss": 0.8028, "step": 24010 }, { "epoch": 2.89, "grad_norm": 0.30428576469421387, "learning_rate": 1.11697671566921e-06, "loss": 0.8024, "step": 24015 }, { "epoch": 2.89, "grad_norm": 0.3098774254322052, "learning_rate": 1.1044770503630008e-06, "loss": 0.7861, "step": 24020 }, { "epoch": 2.89, "grad_norm": 0.2888442575931549, "learning_rate": 1.092047460234552e-06, "loss": 0.7146, "step": 24025 }, { "epoch": 2.9, "grad_norm": 0.2686418294906616, "learning_rate": 1.0796879511336676e-06, "loss": 0.6743, "step": 24030 }, { "epoch": 2.9, "grad_norm": 0.26199212670326233, "learning_rate": 1.0673985288771114e-06, "loss": 0.7769, "step": 24035 }, { "epoch": 2.9, "grad_norm": 0.27240923047065735, "learning_rate": 1.0551791992487068e-06, "loss": 0.8166, "step": 24040 }, { "epoch": 2.9, "grad_norm": 0.2600533366203308, "learning_rate": 1.0430299679992704e-06, "loss": 0.7604, "step": 24045 }, { "epoch": 2.9, "grad_norm": 0.2539452612400055, "learning_rate": 1.0309508408466617e-06, "loss": 0.8341, "step": 24050 }, { "epoch": 2.9, "grad_norm": 0.23374611139297485, "learning_rate": 1.018941823475683e-06, "loss": 0.8019, "step": 24055 }, { "epoch": 2.9, "grad_norm": 0.2858569025993347, "learning_rate": 1.0070029215381803e-06, "loss": 0.8227, "step": 24060 }, { "epoch": 2.9, "grad_norm": 0.3056763708591461, "learning_rate": 9.951341406530088e-07, "loss": 0.7465, "step": 24065 }, { "epoch": 2.9, "grad_norm": 0.2973862290382385, "learning_rate": 9.833354864060171e-07, "loss": 0.6763, "step": 24070 }, { "epoch": 2.9, "grad_norm": 0.30157533288002014, "learning_rate": 9.716069643500467e-07, "loss": 0.8366, "step": 24075 }, { "epoch": 2.9, "grad_norm": 0.2873646318912506, "learning_rate": 9.59948580004899e-07, "loss": 0.6718, "step": 24080 }, { "epoch": 2.9, "grad_norm": 0.2701966464519501, "learning_rate": 9.483603388574345e-07, "loss": 0.8791, "step": 24085 }, { "epoch": 2.9, "grad_norm": 0.283039927482605, "learning_rate": 9.368422463614411e-07, "loss": 0.7677, "step": 24090 }, { "epoch": 2.9, "grad_norm": 0.3029240071773529, "learning_rate": 9.253943079377157e-07, "loss": 0.8087, "step": 24095 }, { "epoch": 2.9, "grad_norm": 0.2594771683216095, "learning_rate": 9.14016528974032e-07, "loss": 0.7987, "step": 24100 }, { "epoch": 2.9, "grad_norm": 0.2657313048839569, "learning_rate": 9.027089148251731e-07, "loss": 0.7931, "step": 24105 }, { "epoch": 2.9, "grad_norm": 0.26724809408187866, "learning_rate": 8.914714708128657e-07, "loss": 0.7604, "step": 24110 }, { "epoch": 2.91, "grad_norm": 0.2993227541446686, "learning_rate": 8.803042022258289e-07, "loss": 0.769, "step": 24115 }, { "epoch": 2.91, "grad_norm": 0.27348792552948, "learning_rate": 8.692071143197588e-07, "loss": 0.7721, "step": 24120 }, { "epoch": 2.91, "grad_norm": 0.2768844664096832, "learning_rate": 8.581802123172776e-07, "loss": 0.7656, "step": 24125 }, { "epoch": 2.91, "grad_norm": 0.2716030478477478, "learning_rate": 8.472235014080508e-07, "loss": 0.8586, "step": 24130 }, { "epoch": 2.91, "grad_norm": 0.2838304340839386, "learning_rate": 8.363369867486369e-07, "loss": 0.6991, "step": 24135 }, { "epoch": 2.91, "grad_norm": 0.2540549039840698, "learning_rate": 8.255206734626207e-07, "loss": 0.8098, "step": 24140 }, { "epoch": 2.91, "grad_norm": 0.30254021286964417, "learning_rate": 8.147745666405137e-07, "loss": 0.8074, "step": 24145 }, { "epoch": 2.91, "grad_norm": 0.33351510763168335, "learning_rate": 8.040986713397867e-07, "loss": 0.756, "step": 24150 }, { "epoch": 2.91, "grad_norm": 0.28228092193603516, "learning_rate": 7.934929925848543e-07, "loss": 0.724, "step": 24155 }, { "epoch": 2.91, "grad_norm": 0.290088951587677, "learning_rate": 7.829575353671235e-07, "loss": 0.7698, "step": 24160 }, { "epoch": 2.91, "grad_norm": 0.29970574378967285, "learning_rate": 7.724923046449117e-07, "loss": 0.7149, "step": 24165 }, { "epoch": 2.91, "grad_norm": 0.2955015301704407, "learning_rate": 7.62097305343512e-07, "loss": 0.8158, "step": 24170 }, { "epoch": 2.91, "grad_norm": 0.31175607442855835, "learning_rate": 7.517725423551613e-07, "loss": 0.6608, "step": 24175 }, { "epoch": 2.91, "grad_norm": 0.3113124966621399, "learning_rate": 7.415180205390392e-07, "loss": 0.7003, "step": 24180 }, { "epoch": 2.91, "grad_norm": 0.25491559505462646, "learning_rate": 7.313337447212519e-07, "loss": 0.777, "step": 24185 }, { "epoch": 2.91, "grad_norm": 0.3029264509677887, "learning_rate": 7.212197196948655e-07, "loss": 0.7743, "step": 24190 }, { "epoch": 2.92, "grad_norm": 0.25185316801071167, "learning_rate": 7.111759502198721e-07, "loss": 0.7702, "step": 24195 }, { "epoch": 2.92, "grad_norm": 0.29424992203712463, "learning_rate": 7.012024410232076e-07, "loss": 0.7636, "step": 24200 }, { "epoch": 2.92, "grad_norm": 0.25971463322639465, "learning_rate": 6.912991967987169e-07, "loss": 0.792, "step": 24205 }, { "epoch": 2.92, "grad_norm": 0.2720402479171753, "learning_rate": 6.814662222072387e-07, "loss": 0.7822, "step": 24210 }, { "epoch": 2.92, "grad_norm": 0.2848743498325348, "learning_rate": 6.717035218764543e-07, "loss": 0.7481, "step": 24215 }, { "epoch": 2.92, "grad_norm": 0.31447309255599976, "learning_rate": 6.620111004010387e-07, "loss": 0.7175, "step": 24220 }, { "epoch": 2.92, "grad_norm": 0.2868044376373291, "learning_rate": 6.52388962342576e-07, "loss": 0.7447, "step": 24225 }, { "epoch": 2.92, "grad_norm": 0.281565397977829, "learning_rate": 6.428371122295273e-07, "loss": 0.739, "step": 24230 }, { "epoch": 2.92, "grad_norm": 0.27926504611968994, "learning_rate": 6.333555545573299e-07, "loss": 0.7863, "step": 24235 }, { "epoch": 2.92, "grad_norm": 0.2796558439731598, "learning_rate": 6.23944293788331e-07, "loss": 0.7584, "step": 24240 }, { "epoch": 2.92, "grad_norm": 0.27483847737312317, "learning_rate": 6.146033343517709e-07, "loss": 0.777, "step": 24245 }, { "epoch": 2.92, "grad_norm": 0.27021974325180054, "learning_rate": 6.053326806438163e-07, "loss": 0.8146, "step": 24250 }, { "epoch": 2.92, "grad_norm": 0.28549179434776306, "learning_rate": 5.961323370275439e-07, "loss": 0.8211, "step": 24255 }, { "epoch": 2.92, "grad_norm": 0.2960338592529297, "learning_rate": 5.870023078329567e-07, "loss": 0.73, "step": 24260 }, { "epoch": 2.92, "grad_norm": 0.2839416563510895, "learning_rate": 5.779425973569174e-07, "loss": 0.791, "step": 24265 }, { "epoch": 2.92, "grad_norm": 0.28548988699913025, "learning_rate": 5.689532098632487e-07, "loss": 0.7879, "step": 24270 }, { "epoch": 2.92, "grad_norm": 0.289668470621109, "learning_rate": 5.6003414958265e-07, "loss": 0.7007, "step": 24275 }, { "epoch": 2.93, "grad_norm": 0.2637322247028351, "learning_rate": 5.511854207127298e-07, "loss": 0.8707, "step": 24280 }, { "epoch": 2.93, "grad_norm": 0.27071821689605713, "learning_rate": 5.424070274179904e-07, "loss": 0.7963, "step": 24285 }, { "epoch": 2.93, "grad_norm": 0.2783946692943573, "learning_rate": 5.336989738298436e-07, "loss": 0.7763, "step": 24290 }, { "epoch": 2.93, "grad_norm": 0.24435043334960938, "learning_rate": 5.250612640465779e-07, "loss": 0.7297, "step": 24295 }, { "epoch": 2.93, "grad_norm": 0.26858675479888916, "learning_rate": 5.164939021334081e-07, "loss": 0.7109, "step": 24300 }, { "epoch": 2.93, "grad_norm": 0.28042492270469666, "learning_rate": 5.079968921223754e-07, "loss": 0.7229, "step": 24305 }, { "epoch": 2.93, "grad_norm": 0.26023703813552856, "learning_rate": 4.995702380124977e-07, "loss": 0.695, "step": 24310 }, { "epoch": 2.93, "grad_norm": 0.27599677443504333, "learning_rate": 4.912139437696195e-07, "loss": 0.7659, "step": 24315 }, { "epoch": 2.93, "grad_norm": 0.2630544602870941, "learning_rate": 4.829280133264779e-07, "loss": 0.7636, "step": 24320 }, { "epoch": 2.93, "grad_norm": 0.30303630232810974, "learning_rate": 4.747124505827371e-07, "loss": 0.6761, "step": 24325 }, { "epoch": 2.93, "grad_norm": 0.26069948077201843, "learning_rate": 4.665672594048875e-07, "loss": 0.8025, "step": 24330 }, { "epoch": 2.93, "grad_norm": 0.2887049615383148, "learning_rate": 4.5849244362634597e-07, "loss": 0.8476, "step": 24335 }, { "epoch": 2.93, "grad_norm": 0.2869950532913208, "learning_rate": 4.504880070473726e-07, "loss": 0.8227, "step": 24340 }, { "epoch": 2.93, "grad_norm": 0.2907213866710663, "learning_rate": 4.4255395343513744e-07, "loss": 0.8843, "step": 24345 }, { "epoch": 2.93, "grad_norm": 0.27622485160827637, "learning_rate": 4.3469028652365346e-07, "loss": 0.7809, "step": 24350 }, { "epoch": 2.93, "grad_norm": 0.3061695098876953, "learning_rate": 4.268970100138269e-07, "loss": 0.6801, "step": 24355 }, { "epoch": 2.94, "grad_norm": 0.26443079113960266, "learning_rate": 4.1917412757345724e-07, "loss": 0.7058, "step": 24360 }, { "epoch": 2.94, "grad_norm": 0.30546635389328003, "learning_rate": 4.1152164283715373e-07, "loss": 0.8242, "step": 24365 }, { "epoch": 2.94, "grad_norm": 0.265575647354126, "learning_rate": 4.039395594064521e-07, "loss": 0.7344, "step": 24370 }, { "epoch": 2.94, "grad_norm": 0.29234451055526733, "learning_rate": 3.9642788084971454e-07, "loss": 0.7067, "step": 24375 }, { "epoch": 2.94, "grad_norm": 0.25120893120765686, "learning_rate": 3.889866107021966e-07, "loss": 0.7673, "step": 24380 }, { "epoch": 2.94, "grad_norm": 0.32437393069267273, "learning_rate": 3.816157524660135e-07, "loss": 0.6869, "step": 24385 }, { "epoch": 2.94, "grad_norm": 0.2962343096733093, "learning_rate": 3.7431530961014034e-07, "loss": 0.7815, "step": 24390 }, { "epoch": 2.94, "grad_norm": 0.2750551700592041, "learning_rate": 3.6708528557037873e-07, "loss": 0.8175, "step": 24395 }, { "epoch": 2.94, "grad_norm": 0.2620263993740082, "learning_rate": 3.5992568374945685e-07, "loss": 0.7162, "step": 24400 }, { "epoch": 2.94, "grad_norm": 0.26457253098487854, "learning_rate": 3.528365075168793e-07, "loss": 0.6064, "step": 24405 }, { "epoch": 2.94, "grad_norm": 0.29997846484184265, "learning_rate": 3.4581776020907725e-07, "loss": 0.8372, "step": 24410 }, { "epoch": 2.94, "grad_norm": 0.2950671315193176, "learning_rate": 3.388694451293084e-07, "loss": 0.7008, "step": 24415 }, { "epoch": 2.94, "grad_norm": 0.28241240978240967, "learning_rate": 3.3199156554765684e-07, "loss": 0.7895, "step": 24420 }, { "epoch": 2.94, "grad_norm": 0.2898365557193756, "learning_rate": 3.251841247011167e-07, "loss": 0.6871, "step": 24425 }, { "epoch": 2.94, "grad_norm": 0.2402508705854416, "learning_rate": 3.1844712579345843e-07, "loss": 0.8315, "step": 24430 }, { "epoch": 2.94, "grad_norm": 0.2538807988166809, "learning_rate": 3.1178057199536256e-07, "loss": 0.7951, "step": 24435 }, { "epoch": 2.94, "grad_norm": 0.24774649739265442, "learning_rate": 3.051844664443026e-07, "loss": 0.7521, "step": 24440 }, { "epoch": 2.95, "grad_norm": 0.29270678758621216, "learning_rate": 2.9865881224466206e-07, "loss": 0.744, "step": 24445 }, { "epoch": 2.95, "grad_norm": 0.2553872764110565, "learning_rate": 2.9220361246761746e-07, "loss": 0.8013, "step": 24450 }, { "epoch": 2.95, "grad_norm": 0.24886707961559296, "learning_rate": 2.858188701511721e-07, "loss": 0.7174, "step": 24455 }, { "epoch": 2.95, "grad_norm": 0.23313623666763306, "learning_rate": 2.7950458830023893e-07, "loss": 0.7931, "step": 24460 }, { "epoch": 2.95, "grad_norm": 0.25668811798095703, "learning_rate": 2.7326076988650747e-07, "loss": 0.7746, "step": 24465 }, { "epoch": 2.95, "grad_norm": 0.2648046016693115, "learning_rate": 2.6708741784852716e-07, "loss": 0.8661, "step": 24470 }, { "epoch": 2.95, "grad_norm": 0.2649308741092682, "learning_rate": 2.6098453509167394e-07, "loss": 0.8434, "step": 24475 }, { "epoch": 2.95, "grad_norm": 0.29732733964920044, "learning_rate": 2.549521244881836e-07, "loss": 0.8796, "step": 24480 }, { "epoch": 2.95, "grad_norm": 0.3012838065624237, "learning_rate": 2.489901888771184e-07, "loss": 0.7311, "step": 24485 }, { "epoch": 2.95, "grad_norm": 0.3150104880332947, "learning_rate": 2.4309873106433395e-07, "loss": 0.75, "step": 24490 }, { "epoch": 2.95, "grad_norm": 0.260047048330307, "learning_rate": 2.3727775382256232e-07, "loss": 0.7938, "step": 24495 }, { "epoch": 2.95, "grad_norm": 0.26424214243888855, "learning_rate": 2.3152725989132869e-07, "loss": 0.8173, "step": 24500 }, { "epoch": 2.95, "grad_norm": 0.29962679743766785, "learning_rate": 2.2584725197705156e-07, "loss": 0.7686, "step": 24505 }, { "epoch": 2.95, "grad_norm": 0.2528585195541382, "learning_rate": 2.202377327528926e-07, "loss": 0.7052, "step": 24510 }, { "epoch": 2.95, "grad_norm": 0.31817159056663513, "learning_rate": 2.1469870485888995e-07, "loss": 0.8064, "step": 24515 }, { "epoch": 2.95, "grad_norm": 0.26599112153053284, "learning_rate": 2.092301709018751e-07, "loss": 0.8348, "step": 24520 }, { "epoch": 2.95, "grad_norm": 0.26814529299736023, "learning_rate": 2.0383213345555595e-07, "loss": 0.8049, "step": 24525 }, { "epoch": 2.96, "grad_norm": 0.2638113796710968, "learning_rate": 1.9850459506041693e-07, "loss": 0.8862, "step": 24530 }, { "epoch": 2.96, "grad_norm": 0.2369757890701294, "learning_rate": 1.9324755822375248e-07, "loss": 0.8741, "step": 24535 }, { "epoch": 2.96, "grad_norm": 0.28597578406333923, "learning_rate": 1.8806102541973346e-07, "loss": 0.6238, "step": 24540 }, { "epoch": 2.96, "grad_norm": 0.30425626039505005, "learning_rate": 1.829449990892906e-07, "loss": 0.788, "step": 24545 }, { "epoch": 2.96, "grad_norm": 0.29849773645401, "learning_rate": 1.7789948164019795e-07, "loss": 0.769, "step": 24550 }, { "epoch": 2.96, "grad_norm": 0.27328890562057495, "learning_rate": 1.7292447544703937e-07, "loss": 0.7794, "step": 24555 }, { "epoch": 2.96, "grad_norm": 0.26762855052948, "learning_rate": 1.6801998285124186e-07, "loss": 0.7316, "step": 24560 }, { "epoch": 2.96, "grad_norm": 0.2702507972717285, "learning_rate": 1.6318600616099241e-07, "loss": 0.8332, "step": 24565 }, { "epoch": 2.96, "grad_norm": 0.2567251920700073, "learning_rate": 1.5842254765135453e-07, "loss": 0.7298, "step": 24570 }, { "epoch": 2.96, "grad_norm": 0.2939104437828064, "learning_rate": 1.5372960956413493e-07, "loss": 0.688, "step": 24575 }, { "epoch": 2.96, "grad_norm": 0.28854793310165405, "learning_rate": 1.491071941080002e-07, "loss": 0.7518, "step": 24580 }, { "epoch": 2.96, "grad_norm": 0.2733674645423889, "learning_rate": 1.445553034584268e-07, "loss": 0.7832, "step": 24585 }, { "epoch": 2.96, "grad_norm": 0.27166128158569336, "learning_rate": 1.4007393975766777e-07, "loss": 0.7416, "step": 24590 }, { "epoch": 2.96, "grad_norm": 0.260300874710083, "learning_rate": 1.3566310511480272e-07, "loss": 0.7861, "step": 24595 }, { "epoch": 2.96, "grad_norm": 0.22208818793296814, "learning_rate": 1.3132280160572105e-07, "loss": 0.7546, "step": 24600 }, { "epoch": 2.96, "grad_norm": 0.2763347029685974, "learning_rate": 1.270530312731055e-07, "loss": 0.7988, "step": 24605 }, { "epoch": 2.97, "grad_norm": 0.2827538847923279, "learning_rate": 1.2285379612648195e-07, "loss": 0.672, "step": 24610 }, { "epoch": 2.97, "grad_norm": 0.27377375960350037, "learning_rate": 1.187250981421195e-07, "loss": 0.7166, "step": 24615 }, { "epoch": 2.97, "grad_norm": 0.26959091424942017, "learning_rate": 1.1466693926311388e-07, "loss": 0.8364, "step": 24620 }, { "epoch": 2.97, "grad_norm": 0.29500630497932434, "learning_rate": 1.1067932139940394e-07, "loss": 0.7982, "step": 24625 }, { "epoch": 2.97, "grad_norm": 0.24503514170646667, "learning_rate": 1.0676224642767184e-07, "loss": 0.884, "step": 24630 }, { "epoch": 2.97, "grad_norm": 0.30973246693611145, "learning_rate": 1.0291571619142624e-07, "loss": 0.7196, "step": 24635 }, { "epoch": 2.97, "grad_norm": 0.30464237928390503, "learning_rate": 9.913973250096907e-08, "loss": 0.7846, "step": 24640 }, { "epoch": 2.97, "grad_norm": 0.2849758565425873, "learning_rate": 9.543429713339546e-08, "loss": 0.8509, "step": 24645 }, { "epoch": 2.97, "grad_norm": 0.2636762261390686, "learning_rate": 9.179941183264372e-08, "loss": 0.7391, "step": 24650 }, { "epoch": 2.97, "grad_norm": 0.29409074783325195, "learning_rate": 8.823507830936216e-08, "loss": 0.8165, "step": 24655 }, { "epoch": 2.97, "grad_norm": 0.2603693902492523, "learning_rate": 8.474129824109222e-08, "loss": 0.8494, "step": 24660 }, { "epoch": 2.97, "grad_norm": 0.27702707052230835, "learning_rate": 8.131807327208529e-08, "loss": 0.7467, "step": 24665 }, { "epoch": 2.97, "grad_norm": 0.2720186710357666, "learning_rate": 7.796540501343595e-08, "loss": 0.8084, "step": 24670 }, { "epoch": 2.97, "grad_norm": 0.27940112352371216, "learning_rate": 7.468329504303206e-08, "loss": 0.8123, "step": 24675 }, { "epoch": 2.97, "grad_norm": 0.2809736132621765, "learning_rate": 7.147174490552132e-08, "loss": 0.7652, "step": 24680 }, { "epoch": 2.97, "grad_norm": 0.29643842577934265, "learning_rate": 6.833075611239469e-08, "loss": 0.8162, "step": 24685 }, { "epoch": 2.97, "grad_norm": 0.329057514667511, "learning_rate": 6.526033014188637e-08, "loss": 0.883, "step": 24690 }, { "epoch": 2.98, "grad_norm": 0.2862626612186432, "learning_rate": 6.226046843904042e-08, "loss": 0.7266, "step": 24695 }, { "epoch": 2.98, "grad_norm": 0.28131476044654846, "learning_rate": 5.933117241571084e-08, "loss": 0.8039, "step": 24700 }, { "epoch": 2.98, "grad_norm": 0.2740655839443207, "learning_rate": 5.647244345049484e-08, "loss": 0.7991, "step": 24705 }, { "epoch": 2.98, "grad_norm": 0.2963801324367523, "learning_rate": 5.368428288881621e-08, "loss": 0.7506, "step": 24710 }, { "epoch": 2.98, "grad_norm": 0.2843016982078552, "learning_rate": 5.096669204287529e-08, "loss": 0.7785, "step": 24715 }, { "epoch": 2.98, "grad_norm": 0.2702232897281647, "learning_rate": 4.831967219168231e-08, "loss": 0.7964, "step": 24720 }, { "epoch": 2.98, "grad_norm": 0.3016199767589569, "learning_rate": 4.574322458097413e-08, "loss": 0.7077, "step": 24725 }, { "epoch": 2.98, "grad_norm": 0.31466734409332275, "learning_rate": 4.3237350423330766e-08, "loss": 0.775, "step": 24730 }, { "epoch": 2.98, "grad_norm": 0.256837397813797, "learning_rate": 4.080205089810884e-08, "loss": 0.7682, "step": 24735 }, { "epoch": 2.98, "grad_norm": 0.28616052865982056, "learning_rate": 3.843732715142489e-08, "loss": 0.7311, "step": 24740 }, { "epoch": 2.98, "grad_norm": 0.24874311685562134, "learning_rate": 3.614318029620533e-08, "loss": 0.7582, "step": 24745 }, { "epoch": 2.98, "grad_norm": 0.27316519618034363, "learning_rate": 3.3919611412153156e-08, "loss": 0.8188, "step": 24750 }, { "epoch": 2.98, "grad_norm": 0.2963423728942871, "learning_rate": 3.1766621545764594e-08, "loss": 0.8122, "step": 24755 }, { "epoch": 2.98, "grad_norm": 0.27651023864746094, "learning_rate": 2.9684211710279128e-08, "loss": 0.774, "step": 24760 }, { "epoch": 2.98, "grad_norm": 0.28626778721809387, "learning_rate": 2.7672382885762788e-08, "loss": 0.8288, "step": 24765 }, { "epoch": 2.98, "grad_norm": 0.2841629683971405, "learning_rate": 2.5731136019058186e-08, "loss": 0.7956, "step": 24770 }, { "epoch": 2.99, "grad_norm": 0.27702802419662476, "learning_rate": 2.3860472023767842e-08, "loss": 0.7156, "step": 24775 }, { "epoch": 2.99, "grad_norm": 0.3124939799308777, "learning_rate": 2.206039178028751e-08, "loss": 0.7434, "step": 24780 }, { "epoch": 2.99, "grad_norm": 0.2639085650444031, "learning_rate": 2.0330896135806184e-08, "loss": 0.7925, "step": 24785 }, { "epoch": 2.99, "grad_norm": 0.28985705971717834, "learning_rate": 1.8671985904289422e-08, "loss": 0.798, "step": 24790 }, { "epoch": 2.99, "grad_norm": 0.29289746284484863, "learning_rate": 1.7083661866446053e-08, "loss": 0.8071, "step": 24795 }, { "epoch": 2.99, "grad_norm": 0.25807228684425354, "learning_rate": 1.5565924769811446e-08, "loss": 0.8328, "step": 24800 }, { "epoch": 2.99, "grad_norm": 0.2961677610874176, "learning_rate": 1.4118775328680886e-08, "loss": 0.7459, "step": 24805 }, { "epoch": 2.99, "grad_norm": 0.28014975786209106, "learning_rate": 1.2742214224126246e-08, "loss": 0.8192, "step": 24810 }, { "epoch": 2.99, "grad_norm": 0.26734206080436707, "learning_rate": 1.143624210402927e-08, "loss": 0.8212, "step": 24815 }, { "epoch": 2.99, "grad_norm": 0.2634017765522003, "learning_rate": 1.0200859582981669e-08, "loss": 0.7237, "step": 24820 }, { "epoch": 2.99, "grad_norm": 0.27349719405174255, "learning_rate": 9.036067242418343e-09, "loss": 0.8578, "step": 24825 }, { "epoch": 2.99, "grad_norm": 0.2613513171672821, "learning_rate": 7.941865630534116e-09, "loss": 0.7642, "step": 24830 }, { "epoch": 2.99, "grad_norm": 0.2503609359264374, "learning_rate": 6.918255262300387e-09, "loss": 0.7935, "step": 24835 }, { "epoch": 2.99, "grad_norm": 0.27115345001220703, "learning_rate": 5.9652366194318235e-09, "loss": 0.7417, "step": 24840 }, { "epoch": 2.99, "grad_norm": 0.29795727133750916, "learning_rate": 5.082810150486283e-09, "loss": 0.7581, "step": 24845 }, { "epoch": 2.99, "grad_norm": 0.2834166884422302, "learning_rate": 4.270976270731585e-09, "loss": 0.685, "step": 24850 }, { "epoch": 2.99, "grad_norm": 0.2606295347213745, "learning_rate": 3.5297353622787406e-09, "loss": 0.821, "step": 24855 }, { "epoch": 3.0, "grad_norm": 0.27347010374069214, "learning_rate": 2.8590877739487205e-09, "loss": 0.7438, "step": 24860 }, { "epoch": 3.0, "grad_norm": 0.2770770788192749, "learning_rate": 2.2590338213890334e-09, "loss": 0.8011, "step": 24865 }, { "epoch": 3.0, "grad_norm": 0.27099478244781494, "learning_rate": 1.729573786990457e-09, "loss": 0.8102, "step": 24870 }, { "epoch": 3.0, "grad_norm": 0.31159690022468567, "learning_rate": 1.2707079199536507e-09, "loss": 0.8513, "step": 24875 }, { "epoch": 3.0, "grad_norm": 0.2697480618953705, "learning_rate": 8.824364362225445e-10, "loss": 0.6736, "step": 24880 }, { "epoch": 3.0, "grad_norm": 0.25522977113723755, "learning_rate": 5.647595185176435e-10, "loss": 0.7454, "step": 24885 }, { "epoch": 3.0, "grad_norm": 0.26916685700416565, "learning_rate": 3.1767731638598916e-10, "loss": 0.8128, "step": 24890 }, { "epoch": 3.0, "grad_norm": 0.27489572763442993, "learning_rate": 1.4118994608458556e-10, "loss": 0.8325, "step": 24895 }, { "epoch": 3.0, "step": 24897, "total_flos": 3.2538134341012685e+19, "train_loss": 0.8796718681596458, "train_runtime": 1082714.0985, "train_samples_per_second": 0.368, "train_steps_per_second": 0.023 } ], "logging_steps": 5, "max_steps": 24897, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 3.2538134341012685e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }