{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001, "grad_norm": 553.298583984375, "learning_rate": 9.999e-06, "loss": 2.5703, "step": 10 }, { "epoch": 0.002, "grad_norm": 178.6173858642578, "learning_rate": 9.997000000000001e-06, "loss": 2.8106, "step": 20 }, { "epoch": 0.003, "grad_norm": 146.4707489013672, "learning_rate": 9.995000000000002e-06, "loss": 1.6458, "step": 30 }, { "epoch": 0.004, "grad_norm": 64.28072357177734, "learning_rate": 9.993e-06, "loss": 1.2954, "step": 40 }, { "epoch": 0.005, "grad_norm": 68.40310668945312, "learning_rate": 9.991000000000001e-06, "loss": 1.809, "step": 50 }, { "epoch": 0.006, "grad_norm": 36.96562194824219, "learning_rate": 9.989e-06, "loss": 1.5231, "step": 60 }, { "epoch": 0.007, "grad_norm": 66.8483657836914, "learning_rate": 9.987000000000001e-06, "loss": 1.5638, "step": 70 }, { "epoch": 0.008, "grad_norm": 45.907325744628906, "learning_rate": 9.985000000000002e-06, "loss": 1.1657, "step": 80 }, { "epoch": 0.009, "grad_norm": 70.11509704589844, "learning_rate": 9.983e-06, "loss": 1.6563, "step": 90 }, { "epoch": 0.01, "grad_norm": 42.17732238769531, "learning_rate": 9.981000000000002e-06, "loss": 1.3248, "step": 100 }, { "epoch": 0.011, "grad_norm": 29.66499137878418, "learning_rate": 9.979e-06, "loss": 1.2286, "step": 110 }, { "epoch": 0.012, "grad_norm": 35.80244827270508, "learning_rate": 9.977000000000001e-06, "loss": 1.9074, "step": 120 }, { "epoch": 0.013, "grad_norm": 14.79271125793457, "learning_rate": 9.975000000000002e-06, "loss": 1.5907, "step": 130 }, { "epoch": 0.014, "grad_norm": 14.291900634765625, "learning_rate": 9.973000000000001e-06, "loss": 1.3962, "step": 140 }, { "epoch": 0.015, "grad_norm": 13.312259674072266, "learning_rate": 9.971e-06, "loss": 1.4318, "step": 150 }, { "epoch": 0.016, "grad_norm": 15.379679679870605, "learning_rate": 9.969e-06, "loss": 1.3098, "step": 160 }, { "epoch": 0.017, "grad_norm": 22.36258316040039, "learning_rate": 9.967000000000001e-06, "loss": 1.2661, "step": 170 }, { "epoch": 0.018, "grad_norm": 20.435331344604492, "learning_rate": 9.965000000000002e-06, "loss": 1.3866, "step": 180 }, { "epoch": 0.019, "grad_norm": 35.4296989440918, "learning_rate": 9.963000000000001e-06, "loss": 1.5716, "step": 190 }, { "epoch": 0.02, "grad_norm": 70.40152740478516, "learning_rate": 9.961e-06, "loss": 1.4656, "step": 200 }, { "epoch": 0.021, "grad_norm": 108.4637451171875, "learning_rate": 9.959e-06, "loss": 1.5261, "step": 210 }, { "epoch": 0.022, "grad_norm": 26.985759735107422, "learning_rate": 9.957000000000001e-06, "loss": 1.5041, "step": 220 }, { "epoch": 0.023, "grad_norm": 23.619604110717773, "learning_rate": 9.955000000000002e-06, "loss": 1.34, "step": 230 }, { "epoch": 0.024, "grad_norm": 17.254131317138672, "learning_rate": 9.953000000000001e-06, "loss": 1.5387, "step": 240 }, { "epoch": 0.025, "grad_norm": 15.026030540466309, "learning_rate": 9.951e-06, "loss": 1.3294, "step": 250 }, { "epoch": 0.026, "grad_norm": 20.824031829833984, "learning_rate": 9.949e-06, "loss": 1.5163, "step": 260 }, { "epoch": 0.027, "grad_norm": 13.130329132080078, "learning_rate": 9.947000000000001e-06, "loss": 1.2808, "step": 270 }, { "epoch": 0.028, "grad_norm": 15.41263198852539, "learning_rate": 9.945e-06, "loss": 1.3727, "step": 280 }, { "epoch": 0.029, "grad_norm": 16.74553871154785, "learning_rate": 9.943000000000001e-06, "loss": 1.4336, "step": 290 }, { "epoch": 0.03, "grad_norm": 15.605561256408691, "learning_rate": 9.941e-06, "loss": 1.3934, "step": 300 }, { "epoch": 0.031, "grad_norm": 16.457656860351562, "learning_rate": 9.939000000000001e-06, "loss": 1.2274, "step": 310 }, { "epoch": 0.032, "grad_norm": 24.017627716064453, "learning_rate": 9.937000000000002e-06, "loss": 1.4911, "step": 320 }, { "epoch": 0.033, "grad_norm": 28.355009078979492, "learning_rate": 9.935e-06, "loss": 1.3001, "step": 330 }, { "epoch": 0.034, "grad_norm": 30.17832374572754, "learning_rate": 9.933e-06, "loss": 1.3518, "step": 340 }, { "epoch": 0.035, "grad_norm": 18.55721092224121, "learning_rate": 9.931e-06, "loss": 1.5084, "step": 350 }, { "epoch": 0.036, "grad_norm": 22.83087921142578, "learning_rate": 9.929000000000001e-06, "loss": 1.4036, "step": 360 }, { "epoch": 0.037, "grad_norm": 17.123695373535156, "learning_rate": 9.927000000000002e-06, "loss": 1.3755, "step": 370 }, { "epoch": 0.038, "grad_norm": 15.054317474365234, "learning_rate": 9.925e-06, "loss": 1.3215, "step": 380 }, { "epoch": 0.039, "grad_norm": 14.674361228942871, "learning_rate": 9.923e-06, "loss": 1.4566, "step": 390 }, { "epoch": 0.04, "grad_norm": 20.41136360168457, "learning_rate": 9.921e-06, "loss": 1.3833, "step": 400 }, { "epoch": 0.041, "grad_norm": 20.24753189086914, "learning_rate": 9.919000000000001e-06, "loss": 1.2975, "step": 410 }, { "epoch": 0.042, "grad_norm": 17.354625701904297, "learning_rate": 9.917000000000002e-06, "loss": 1.2448, "step": 420 }, { "epoch": 0.043, "grad_norm": 35.62627029418945, "learning_rate": 9.915e-06, "loss": 1.1399, "step": 430 }, { "epoch": 0.044, "grad_norm": 84.29058074951172, "learning_rate": 9.913e-06, "loss": 1.7178, "step": 440 }, { "epoch": 0.045, "grad_norm": 21.371997833251953, "learning_rate": 9.911e-06, "loss": 1.5645, "step": 450 }, { "epoch": 0.046, "grad_norm": 13.37043571472168, "learning_rate": 9.909000000000001e-06, "loss": 1.3828, "step": 460 }, { "epoch": 0.047, "grad_norm": 18.28638458251953, "learning_rate": 9.907000000000002e-06, "loss": 1.4066, "step": 470 }, { "epoch": 0.048, "grad_norm": 13.755346298217773, "learning_rate": 9.905000000000001e-06, "loss": 1.3278, "step": 480 }, { "epoch": 0.049, "grad_norm": 11.66639518737793, "learning_rate": 9.903e-06, "loss": 1.5005, "step": 490 }, { "epoch": 0.05, "grad_norm": 14.479344367980957, "learning_rate": 9.901e-06, "loss": 1.3727, "step": 500 }, { "epoch": 0.051, "grad_norm": 12.67892837524414, "learning_rate": 9.899000000000001e-06, "loss": 1.4698, "step": 510 }, { "epoch": 0.052, "grad_norm": 11.318411827087402, "learning_rate": 9.897e-06, "loss": 1.3041, "step": 520 }, { "epoch": 0.053, "grad_norm": 14.132431030273438, "learning_rate": 9.895000000000001e-06, "loss": 1.4128, "step": 530 }, { "epoch": 0.054, "grad_norm": 12.292994499206543, "learning_rate": 9.893e-06, "loss": 1.4052, "step": 540 }, { "epoch": 0.055, "grad_norm": 36.542171478271484, "learning_rate": 9.891e-06, "loss": 1.4204, "step": 550 }, { "epoch": 0.056, "grad_norm": 12.63934326171875, "learning_rate": 9.889000000000001e-06, "loss": 1.1125, "step": 560 }, { "epoch": 0.057, "grad_norm": 48.24842071533203, "learning_rate": 9.887e-06, "loss": 1.7927, "step": 570 }, { "epoch": 0.058, "grad_norm": 13.541878700256348, "learning_rate": 9.885000000000001e-06, "loss": 1.4452, "step": 580 }, { "epoch": 0.059, "grad_norm": 13.018189430236816, "learning_rate": 9.883e-06, "loss": 1.4627, "step": 590 }, { "epoch": 0.06, "grad_norm": 10.311127662658691, "learning_rate": 9.881e-06, "loss": 1.4585, "step": 600 }, { "epoch": 0.061, "grad_norm": 9.261850357055664, "learning_rate": 9.879000000000001e-06, "loss": 1.387, "step": 610 }, { "epoch": 0.062, "grad_norm": 10.84254264831543, "learning_rate": 9.877e-06, "loss": 1.4871, "step": 620 }, { "epoch": 0.063, "grad_norm": 10.952869415283203, "learning_rate": 9.875000000000001e-06, "loss": 1.4979, "step": 630 }, { "epoch": 0.064, "grad_norm": 10.002272605895996, "learning_rate": 9.873e-06, "loss": 1.458, "step": 640 }, { "epoch": 0.065, "grad_norm": 10.011144638061523, "learning_rate": 9.871000000000001e-06, "loss": 1.3875, "step": 650 }, { "epoch": 0.066, "grad_norm": 9.889904975891113, "learning_rate": 9.869000000000002e-06, "loss": 1.3545, "step": 660 }, { "epoch": 0.067, "grad_norm": 12.218109130859375, "learning_rate": 9.867e-06, "loss": 1.2896, "step": 670 }, { "epoch": 0.068, "grad_norm": 27.468809127807617, "learning_rate": 9.865000000000001e-06, "loss": 1.3319, "step": 680 }, { "epoch": 0.069, "grad_norm": 14.605216979980469, "learning_rate": 9.863e-06, "loss": 1.2975, "step": 690 }, { "epoch": 0.07, "grad_norm": 14.876215934753418, "learning_rate": 9.861000000000001e-06, "loss": 1.5312, "step": 700 }, { "epoch": 0.071, "grad_norm": 23.29885482788086, "learning_rate": 9.859e-06, "loss": 1.4344, "step": 710 }, { "epoch": 0.072, "grad_norm": 14.471511840820312, "learning_rate": 9.857e-06, "loss": 1.2612, "step": 720 }, { "epoch": 0.073, "grad_norm": 13.913259506225586, "learning_rate": 9.855000000000001e-06, "loss": 1.2979, "step": 730 }, { "epoch": 0.074, "grad_norm": 11.070694923400879, "learning_rate": 9.853e-06, "loss": 1.4822, "step": 740 }, { "epoch": 0.075, "grad_norm": 12.937481880187988, "learning_rate": 9.851000000000001e-06, "loss": 1.3308, "step": 750 }, { "epoch": 0.076, "grad_norm": 11.561639785766602, "learning_rate": 9.849e-06, "loss": 1.2722, "step": 760 }, { "epoch": 0.077, "grad_norm": 21.433521270751953, "learning_rate": 9.847e-06, "loss": 1.3904, "step": 770 }, { "epoch": 0.078, "grad_norm": 16.05402374267578, "learning_rate": 9.845000000000001e-06, "loss": 1.3266, "step": 780 }, { "epoch": 0.079, "grad_norm": 15.273530006408691, "learning_rate": 9.843e-06, "loss": 1.3803, "step": 790 }, { "epoch": 0.08, "grad_norm": 14.392065048217773, "learning_rate": 9.841000000000001e-06, "loss": 1.4635, "step": 800 }, { "epoch": 0.081, "grad_norm": 15.215195655822754, "learning_rate": 9.839e-06, "loss": 1.2314, "step": 810 }, { "epoch": 0.082, "grad_norm": 25.194108963012695, "learning_rate": 9.837000000000001e-06, "loss": 1.66, "step": 820 }, { "epoch": 0.083, "grad_norm": 17.175424575805664, "learning_rate": 9.835000000000002e-06, "loss": 1.255, "step": 830 }, { "epoch": 0.084, "grad_norm": 28.1933536529541, "learning_rate": 9.833e-06, "loss": 1.261, "step": 840 }, { "epoch": 0.085, "grad_norm": 12.571901321411133, "learning_rate": 9.831000000000001e-06, "loss": 1.1889, "step": 850 }, { "epoch": 0.086, "grad_norm": 23.534793853759766, "learning_rate": 9.829e-06, "loss": 1.4708, "step": 860 }, { "epoch": 0.087, "grad_norm": 12.261663436889648, "learning_rate": 9.827000000000001e-06, "loss": 1.368, "step": 870 }, { "epoch": 0.088, "grad_norm": 17.82017707824707, "learning_rate": 9.825000000000002e-06, "loss": 1.3567, "step": 880 }, { "epoch": 0.089, "grad_norm": 29.98917007446289, "learning_rate": 9.823e-06, "loss": 1.276, "step": 890 }, { "epoch": 0.09, "grad_norm": 35.46406173706055, "learning_rate": 9.821000000000001e-06, "loss": 1.5261, "step": 900 }, { "epoch": 0.091, "grad_norm": 13.116817474365234, "learning_rate": 9.819e-06, "loss": 1.3631, "step": 910 }, { "epoch": 0.092, "grad_norm": 18.06022834777832, "learning_rate": 9.817000000000001e-06, "loss": 1.2091, "step": 920 }, { "epoch": 0.093, "grad_norm": 9.640213012695312, "learning_rate": 9.815000000000002e-06, "loss": 1.267, "step": 930 }, { "epoch": 0.094, "grad_norm": 21.18136978149414, "learning_rate": 9.813e-06, "loss": 1.6418, "step": 940 }, { "epoch": 0.095, "grad_norm": 22.034860610961914, "learning_rate": 9.811e-06, "loss": 1.1179, "step": 950 }, { "epoch": 0.096, "grad_norm": 18.680660247802734, "learning_rate": 9.809e-06, "loss": 1.1055, "step": 960 }, { "epoch": 0.097, "grad_norm": 57.178077697753906, "learning_rate": 9.807000000000001e-06, "loss": 1.2559, "step": 970 }, { "epoch": 0.098, "grad_norm": 58.18821716308594, "learning_rate": 9.805000000000002e-06, "loss": 1.4701, "step": 980 }, { "epoch": 0.099, "grad_norm": 18.12061882019043, "learning_rate": 9.803e-06, "loss": 1.1901, "step": 990 }, { "epoch": 0.1, "grad_norm": 16.00912857055664, "learning_rate": 9.801e-06, "loss": 1.5521, "step": 1000 }, { "epoch": 0.101, "grad_norm": 10.803945541381836, "learning_rate": 9.799e-06, "loss": 1.4065, "step": 1010 }, { "epoch": 0.102, "grad_norm": 32.719879150390625, "learning_rate": 9.797000000000001e-06, "loss": 1.5233, "step": 1020 }, { "epoch": 0.103, "grad_norm": 12.73800277709961, "learning_rate": 9.795000000000002e-06, "loss": 1.3417, "step": 1030 }, { "epoch": 0.104, "grad_norm": 10.571123123168945, "learning_rate": 9.793000000000001e-06, "loss": 1.2145, "step": 1040 }, { "epoch": 0.105, "grad_norm": 12.390090942382812, "learning_rate": 9.791e-06, "loss": 1.6005, "step": 1050 }, { "epoch": 0.106, "grad_norm": 11.546483993530273, "learning_rate": 9.789e-06, "loss": 1.3421, "step": 1060 }, { "epoch": 0.107, "grad_norm": 9.59424877166748, "learning_rate": 9.787000000000001e-06, "loss": 1.4667, "step": 1070 }, { "epoch": 0.108, "grad_norm": 10.555761337280273, "learning_rate": 9.785e-06, "loss": 1.3217, "step": 1080 }, { "epoch": 0.109, "grad_norm": 18.523649215698242, "learning_rate": 9.783000000000001e-06, "loss": 1.1472, "step": 1090 }, { "epoch": 0.11, "grad_norm": 17.833881378173828, "learning_rate": 9.781e-06, "loss": 1.3665, "step": 1100 }, { "epoch": 0.111, "grad_norm": 11.07105541229248, "learning_rate": 9.779e-06, "loss": 1.4237, "step": 1110 }, { "epoch": 0.112, "grad_norm": 41.3753776550293, "learning_rate": 9.777000000000001e-06, "loss": 1.3564, "step": 1120 }, { "epoch": 0.113, "grad_norm": 22.180496215820312, "learning_rate": 9.775e-06, "loss": 1.5289, "step": 1130 }, { "epoch": 0.114, "grad_norm": 9.563224792480469, "learning_rate": 9.773e-06, "loss": 1.365, "step": 1140 }, { "epoch": 0.115, "grad_norm": 12.874878883361816, "learning_rate": 9.771e-06, "loss": 1.4286, "step": 1150 }, { "epoch": 0.116, "grad_norm": 7.953062057495117, "learning_rate": 9.769e-06, "loss": 1.1758, "step": 1160 }, { "epoch": 0.117, "grad_norm": 11.840394973754883, "learning_rate": 9.767000000000002e-06, "loss": 1.299, "step": 1170 }, { "epoch": 0.118, "grad_norm": 11.139777183532715, "learning_rate": 9.765e-06, "loss": 1.338, "step": 1180 }, { "epoch": 0.119, "grad_norm": 20.537607192993164, "learning_rate": 9.763e-06, "loss": 1.1772, "step": 1190 }, { "epoch": 0.12, "grad_norm": 18.039188385009766, "learning_rate": 9.761e-06, "loss": 1.3318, "step": 1200 }, { "epoch": 0.121, "grad_norm": 13.58515739440918, "learning_rate": 9.759000000000001e-06, "loss": 1.1604, "step": 1210 }, { "epoch": 0.122, "grad_norm": 14.454570770263672, "learning_rate": 9.757000000000002e-06, "loss": 1.316, "step": 1220 }, { "epoch": 0.123, "grad_norm": 12.138327598571777, "learning_rate": 9.755e-06, "loss": 1.5007, "step": 1230 }, { "epoch": 0.124, "grad_norm": 16.977571487426758, "learning_rate": 9.753e-06, "loss": 1.6336, "step": 1240 }, { "epoch": 0.125, "grad_norm": 14.92613410949707, "learning_rate": 9.751e-06, "loss": 1.172, "step": 1250 }, { "epoch": 0.126, "grad_norm": 19.17780876159668, "learning_rate": 9.749000000000001e-06, "loss": 1.6124, "step": 1260 }, { "epoch": 0.127, "grad_norm": 15.29408073425293, "learning_rate": 9.747000000000002e-06, "loss": 1.3267, "step": 1270 }, { "epoch": 0.128, "grad_norm": 13.527634620666504, "learning_rate": 9.745e-06, "loss": 1.3724, "step": 1280 }, { "epoch": 0.129, "grad_norm": 22.792518615722656, "learning_rate": 9.743000000000001e-06, "loss": 1.4452, "step": 1290 }, { "epoch": 0.13, "grad_norm": 11.144407272338867, "learning_rate": 9.741e-06, "loss": 1.3702, "step": 1300 }, { "epoch": 0.131, "grad_norm": 14.562795639038086, "learning_rate": 9.739000000000001e-06, "loss": 1.4214, "step": 1310 }, { "epoch": 0.132, "grad_norm": 11.825474739074707, "learning_rate": 9.737e-06, "loss": 1.3714, "step": 1320 }, { "epoch": 0.133, "grad_norm": 13.623064994812012, "learning_rate": 9.735e-06, "loss": 1.4901, "step": 1330 }, { "epoch": 0.134, "grad_norm": 15.191678047180176, "learning_rate": 9.733000000000002e-06, "loss": 1.282, "step": 1340 }, { "epoch": 0.135, "grad_norm": 16.95834732055664, "learning_rate": 9.731e-06, "loss": 1.3128, "step": 1350 }, { "epoch": 0.136, "grad_norm": 17.236778259277344, "learning_rate": 9.729000000000001e-06, "loss": 1.5308, "step": 1360 }, { "epoch": 0.137, "grad_norm": 13.272063255310059, "learning_rate": 9.727e-06, "loss": 1.3998, "step": 1370 }, { "epoch": 0.138, "grad_norm": 12.335739135742188, "learning_rate": 9.725000000000001e-06, "loss": 1.3014, "step": 1380 }, { "epoch": 0.139, "grad_norm": 10.706031799316406, "learning_rate": 9.723000000000002e-06, "loss": 1.2868, "step": 1390 }, { "epoch": 0.14, "grad_norm": 17.583459854125977, "learning_rate": 9.721e-06, "loss": 1.449, "step": 1400 }, { "epoch": 0.141, "grad_norm": 12.920193672180176, "learning_rate": 9.719000000000001e-06, "loss": 1.1349, "step": 1410 }, { "epoch": 0.142, "grad_norm": 12.062737464904785, "learning_rate": 9.717e-06, "loss": 1.405, "step": 1420 }, { "epoch": 0.143, "grad_norm": 11.076716423034668, "learning_rate": 9.715000000000001e-06, "loss": 1.2565, "step": 1430 }, { "epoch": 0.144, "grad_norm": 16.540769577026367, "learning_rate": 9.713000000000002e-06, "loss": 1.2652, "step": 1440 }, { "epoch": 0.145, "grad_norm": 13.019174575805664, "learning_rate": 9.711e-06, "loss": 1.3722, "step": 1450 }, { "epoch": 0.146, "grad_norm": 19.650390625, "learning_rate": 9.709000000000001e-06, "loss": 1.2706, "step": 1460 }, { "epoch": 0.147, "grad_norm": 23.96675682067871, "learning_rate": 9.707e-06, "loss": 1.3947, "step": 1470 }, { "epoch": 0.148, "grad_norm": 7.801124572753906, "learning_rate": 9.705000000000001e-06, "loss": 1.4594, "step": 1480 }, { "epoch": 0.149, "grad_norm": 10.313019752502441, "learning_rate": 9.703000000000002e-06, "loss": 1.3196, "step": 1490 }, { "epoch": 0.15, "grad_norm": 12.60252571105957, "learning_rate": 9.701e-06, "loss": 1.1644, "step": 1500 }, { "epoch": 0.151, "grad_norm": 11.170662879943848, "learning_rate": 9.699e-06, "loss": 1.5397, "step": 1510 }, { "epoch": 0.152, "grad_norm": 20.48816680908203, "learning_rate": 9.697e-06, "loss": 1.3486, "step": 1520 }, { "epoch": 0.153, "grad_norm": 13.319032669067383, "learning_rate": 9.695000000000001e-06, "loss": 1.3661, "step": 1530 }, { "epoch": 0.154, "grad_norm": 11.840545654296875, "learning_rate": 9.693000000000002e-06, "loss": 1.2802, "step": 1540 }, { "epoch": 0.155, "grad_norm": 9.547907829284668, "learning_rate": 9.691000000000001e-06, "loss": 1.3302, "step": 1550 }, { "epoch": 0.156, "grad_norm": 15.77173137664795, "learning_rate": 9.689e-06, "loss": 1.3501, "step": 1560 }, { "epoch": 0.157, "grad_norm": 10.96810245513916, "learning_rate": 9.687e-06, "loss": 1.421, "step": 1570 }, { "epoch": 0.158, "grad_norm": 14.247650146484375, "learning_rate": 9.685000000000001e-06, "loss": 1.2498, "step": 1580 }, { "epoch": 0.159, "grad_norm": 11.022327423095703, "learning_rate": 9.683000000000002e-06, "loss": 1.2387, "step": 1590 }, { "epoch": 0.16, "grad_norm": 17.94396209716797, "learning_rate": 9.681000000000001e-06, "loss": 1.3944, "step": 1600 }, { "epoch": 0.161, "grad_norm": 8.992880821228027, "learning_rate": 9.679e-06, "loss": 1.3319, "step": 1610 }, { "epoch": 0.162, "grad_norm": 8.144800186157227, "learning_rate": 9.677e-06, "loss": 1.4857, "step": 1620 }, { "epoch": 0.163, "grad_norm": 14.298980712890625, "learning_rate": 9.675000000000001e-06, "loss": 1.4057, "step": 1630 }, { "epoch": 0.164, "grad_norm": 9.995141983032227, "learning_rate": 9.673000000000002e-06, "loss": 1.3126, "step": 1640 }, { "epoch": 0.165, "grad_norm": 14.224298477172852, "learning_rate": 9.671000000000001e-06, "loss": 1.2886, "step": 1650 }, { "epoch": 0.166, "grad_norm": 13.646617889404297, "learning_rate": 9.669e-06, "loss": 1.4572, "step": 1660 }, { "epoch": 0.167, "grad_norm": 13.845555305480957, "learning_rate": 9.667e-06, "loss": 1.2082, "step": 1670 }, { "epoch": 0.168, "grad_norm": 11.388080596923828, "learning_rate": 9.665000000000001e-06, "loss": 1.3184, "step": 1680 }, { "epoch": 0.169, "grad_norm": 34.45281219482422, "learning_rate": 9.663e-06, "loss": 1.3161, "step": 1690 }, { "epoch": 0.17, "grad_norm": 13.287130355834961, "learning_rate": 9.661000000000001e-06, "loss": 1.4591, "step": 1700 }, { "epoch": 0.171, "grad_norm": 11.078805923461914, "learning_rate": 9.659e-06, "loss": 1.6159, "step": 1710 }, { "epoch": 0.172, "grad_norm": 10.86491584777832, "learning_rate": 9.657000000000001e-06, "loss": 1.3147, "step": 1720 }, { "epoch": 0.173, "grad_norm": 12.116174697875977, "learning_rate": 9.655000000000002e-06, "loss": 1.2181, "step": 1730 }, { "epoch": 0.174, "grad_norm": 24.011011123657227, "learning_rate": 9.653e-06, "loss": 1.2426, "step": 1740 }, { "epoch": 0.175, "grad_norm": 18.672365188598633, "learning_rate": 9.651e-06, "loss": 1.4219, "step": 1750 }, { "epoch": 0.176, "grad_norm": 15.598608016967773, "learning_rate": 9.649e-06, "loss": 1.2288, "step": 1760 }, { "epoch": 0.177, "grad_norm": 28.17891502380371, "learning_rate": 9.647000000000001e-06, "loss": 1.3433, "step": 1770 }, { "epoch": 0.178, "grad_norm": 23.797483444213867, "learning_rate": 9.645000000000002e-06, "loss": 1.2853, "step": 1780 }, { "epoch": 0.179, "grad_norm": 15.106059074401855, "learning_rate": 9.643e-06, "loss": 1.5703, "step": 1790 }, { "epoch": 0.18, "grad_norm": 16.796579360961914, "learning_rate": 9.641e-06, "loss": 1.1314, "step": 1800 }, { "epoch": 0.181, "grad_norm": 12.762946128845215, "learning_rate": 9.639e-06, "loss": 1.4878, "step": 1810 }, { "epoch": 0.182, "grad_norm": 10.877366065979004, "learning_rate": 9.637000000000001e-06, "loss": 1.3484, "step": 1820 }, { "epoch": 0.183, "grad_norm": 11.013875961303711, "learning_rate": 9.635000000000002e-06, "loss": 1.2754, "step": 1830 }, { "epoch": 0.184, "grad_norm": 11.537466049194336, "learning_rate": 9.633e-06, "loss": 1.2966, "step": 1840 }, { "epoch": 0.185, "grad_norm": 12.215804100036621, "learning_rate": 9.631e-06, "loss": 1.2855, "step": 1850 }, { "epoch": 0.186, "grad_norm": 13.950762748718262, "learning_rate": 9.629e-06, "loss": 1.4337, "step": 1860 }, { "epoch": 0.187, "grad_norm": 24.029438018798828, "learning_rate": 9.627000000000001e-06, "loss": 1.1903, "step": 1870 }, { "epoch": 0.188, "grad_norm": 15.793492317199707, "learning_rate": 9.625e-06, "loss": 1.2571, "step": 1880 }, { "epoch": 0.189, "grad_norm": 19.81192970275879, "learning_rate": 9.623000000000001e-06, "loss": 1.4858, "step": 1890 }, { "epoch": 0.19, "grad_norm": 11.644768714904785, "learning_rate": 9.621e-06, "loss": 1.2688, "step": 1900 }, { "epoch": 0.191, "grad_norm": 13.873980522155762, "learning_rate": 9.619e-06, "loss": 1.4197, "step": 1910 }, { "epoch": 0.192, "grad_norm": 14.052123069763184, "learning_rate": 9.617000000000001e-06, "loss": 1.327, "step": 1920 }, { "epoch": 0.193, "grad_norm": 15.266214370727539, "learning_rate": 9.615e-06, "loss": 1.3143, "step": 1930 }, { "epoch": 0.194, "grad_norm": 11.495858192443848, "learning_rate": 9.613000000000001e-06, "loss": 1.1783, "step": 1940 }, { "epoch": 0.195, "grad_norm": 16.20531463623047, "learning_rate": 9.611e-06, "loss": 1.4048, "step": 1950 }, { "epoch": 0.196, "grad_norm": 16.000892639160156, "learning_rate": 9.609e-06, "loss": 1.2313, "step": 1960 }, { "epoch": 0.197, "grad_norm": 9.629794120788574, "learning_rate": 9.607000000000001e-06, "loss": 1.0599, "step": 1970 }, { "epoch": 0.198, "grad_norm": 20.01260757446289, "learning_rate": 9.605e-06, "loss": 0.9873, "step": 1980 }, { "epoch": 0.199, "grad_norm": 42.95699691772461, "learning_rate": 9.603000000000001e-06, "loss": 1.4126, "step": 1990 }, { "epoch": 0.2, "grad_norm": 22.034114837646484, "learning_rate": 9.601e-06, "loss": 1.2358, "step": 2000 }, { "epoch": 0.201, "grad_norm": 16.554719924926758, "learning_rate": 9.599200000000001e-06, "loss": 1.7608, "step": 2010 }, { "epoch": 0.202, "grad_norm": 7.907214164733887, "learning_rate": 9.5972e-06, "loss": 1.3501, "step": 2020 }, { "epoch": 0.203, "grad_norm": 11.139115333557129, "learning_rate": 9.595200000000001e-06, "loss": 1.0459, "step": 2030 }, { "epoch": 0.204, "grad_norm": 11.075380325317383, "learning_rate": 9.5932e-06, "loss": 1.309, "step": 2040 }, { "epoch": 0.205, "grad_norm": 13.7120361328125, "learning_rate": 9.5912e-06, "loss": 1.4386, "step": 2050 }, { "epoch": 0.206, "grad_norm": 14.79853343963623, "learning_rate": 9.589200000000001e-06, "loss": 1.1815, "step": 2060 }, { "epoch": 0.207, "grad_norm": 10.341431617736816, "learning_rate": 9.5872e-06, "loss": 1.3621, "step": 2070 }, { "epoch": 0.208, "grad_norm": 17.985410690307617, "learning_rate": 9.585200000000001e-06, "loss": 1.2473, "step": 2080 }, { "epoch": 0.209, "grad_norm": 7.008401870727539, "learning_rate": 9.5832e-06, "loss": 1.0828, "step": 2090 }, { "epoch": 0.21, "grad_norm": 34.05182647705078, "learning_rate": 9.5812e-06, "loss": 1.4671, "step": 2100 }, { "epoch": 0.211, "grad_norm": 17.483779907226562, "learning_rate": 9.579200000000001e-06, "loss": 1.2896, "step": 2110 }, { "epoch": 0.212, "grad_norm": 16.54655647277832, "learning_rate": 9.5772e-06, "loss": 1.294, "step": 2120 }, { "epoch": 0.213, "grad_norm": 19.916540145874023, "learning_rate": 9.575200000000001e-06, "loss": 1.4402, "step": 2130 }, { "epoch": 0.214, "grad_norm": 13.710661888122559, "learning_rate": 9.5732e-06, "loss": 1.3673, "step": 2140 }, { "epoch": 0.215, "grad_norm": 9.033020973205566, "learning_rate": 9.5712e-06, "loss": 1.3853, "step": 2150 }, { "epoch": 0.216, "grad_norm": 15.84251880645752, "learning_rate": 9.569200000000001e-06, "loss": 1.2338, "step": 2160 }, { "epoch": 0.217, "grad_norm": 17.018945693969727, "learning_rate": 9.5672e-06, "loss": 1.0963, "step": 2170 }, { "epoch": 0.218, "grad_norm": 9.448897361755371, "learning_rate": 9.565200000000001e-06, "loss": 1.2931, "step": 2180 }, { "epoch": 0.219, "grad_norm": 9.88851261138916, "learning_rate": 9.5632e-06, "loss": 1.3749, "step": 2190 }, { "epoch": 0.22, "grad_norm": 14.702767372131348, "learning_rate": 9.561200000000001e-06, "loss": 1.357, "step": 2200 }, { "epoch": 0.221, "grad_norm": 12.954829216003418, "learning_rate": 9.5592e-06, "loss": 0.9406, "step": 2210 }, { "epoch": 0.222, "grad_norm": 16.99563217163086, "learning_rate": 9.5572e-06, "loss": 1.2867, "step": 2220 }, { "epoch": 0.223, "grad_norm": 11.784324645996094, "learning_rate": 9.555200000000001e-06, "loss": 1.6213, "step": 2230 }, { "epoch": 0.224, "grad_norm": 17.55175018310547, "learning_rate": 9.5532e-06, "loss": 1.444, "step": 2240 }, { "epoch": 0.225, "grad_norm": 12.724123001098633, "learning_rate": 9.551200000000001e-06, "loss": 1.3443, "step": 2250 }, { "epoch": 0.226, "grad_norm": 8.896844863891602, "learning_rate": 9.5492e-06, "loss": 1.3926, "step": 2260 }, { "epoch": 0.227, "grad_norm": 10.380001068115234, "learning_rate": 9.5472e-06, "loss": 1.3656, "step": 2270 }, { "epoch": 0.228, "grad_norm": 10.764153480529785, "learning_rate": 9.545200000000001e-06, "loss": 1.3294, "step": 2280 }, { "epoch": 0.229, "grad_norm": 10.48664379119873, "learning_rate": 9.5432e-06, "loss": 1.3305, "step": 2290 }, { "epoch": 0.23, "grad_norm": 16.07596778869629, "learning_rate": 9.541200000000001e-06, "loss": 1.3037, "step": 2300 }, { "epoch": 0.231, "grad_norm": 10.121542930603027, "learning_rate": 9.5392e-06, "loss": 1.2709, "step": 2310 }, { "epoch": 0.232, "grad_norm": 9.873505592346191, "learning_rate": 9.5372e-06, "loss": 1.2372, "step": 2320 }, { "epoch": 0.233, "grad_norm": 11.051129341125488, "learning_rate": 9.535200000000001e-06, "loss": 1.2499, "step": 2330 }, { "epoch": 0.234, "grad_norm": 10.30156135559082, "learning_rate": 9.5332e-06, "loss": 1.2587, "step": 2340 }, { "epoch": 0.235, "grad_norm": 12.412140846252441, "learning_rate": 9.531200000000001e-06, "loss": 1.0826, "step": 2350 }, { "epoch": 0.236, "grad_norm": 9.977198600769043, "learning_rate": 9.5292e-06, "loss": 1.3221, "step": 2360 }, { "epoch": 0.237, "grad_norm": 28.165489196777344, "learning_rate": 9.527200000000001e-06, "loss": 1.6583, "step": 2370 }, { "epoch": 0.238, "grad_norm": 12.594054222106934, "learning_rate": 9.525200000000002e-06, "loss": 1.1554, "step": 2380 }, { "epoch": 0.239, "grad_norm": 12.32361125946045, "learning_rate": 9.5232e-06, "loss": 1.4172, "step": 2390 }, { "epoch": 0.24, "grad_norm": 15.793909072875977, "learning_rate": 9.521200000000001e-06, "loss": 1.1642, "step": 2400 }, { "epoch": 0.241, "grad_norm": 11.18139934539795, "learning_rate": 9.5192e-06, "loss": 1.4028, "step": 2410 }, { "epoch": 0.242, "grad_norm": 11.02873706817627, "learning_rate": 9.517200000000001e-06, "loss": 1.3016, "step": 2420 }, { "epoch": 0.243, "grad_norm": 12.460975646972656, "learning_rate": 9.515200000000002e-06, "loss": 1.3669, "step": 2430 }, { "epoch": 0.244, "grad_norm": 10.570152282714844, "learning_rate": 9.5132e-06, "loss": 1.2849, "step": 2440 }, { "epoch": 0.245, "grad_norm": 12.707137107849121, "learning_rate": 9.5112e-06, "loss": 1.1437, "step": 2450 }, { "epoch": 0.246, "grad_norm": 13.67633056640625, "learning_rate": 9.5092e-06, "loss": 1.3225, "step": 2460 }, { "epoch": 0.247, "grad_norm": 9.119421005249023, "learning_rate": 9.507200000000001e-06, "loss": 1.3295, "step": 2470 }, { "epoch": 0.248, "grad_norm": 12.34472370147705, "learning_rate": 9.505200000000002e-06, "loss": 1.339, "step": 2480 }, { "epoch": 0.249, "grad_norm": 14.196908950805664, "learning_rate": 9.5032e-06, "loss": 1.2084, "step": 2490 }, { "epoch": 0.25, "grad_norm": 12.906794548034668, "learning_rate": 9.5012e-06, "loss": 1.5653, "step": 2500 }, { "epoch": 0.251, "grad_norm": 10.584567070007324, "learning_rate": 9.4992e-06, "loss": 1.4479, "step": 2510 }, { "epoch": 0.252, "grad_norm": 8.522605895996094, "learning_rate": 9.497200000000001e-06, "loss": 1.2735, "step": 2520 }, { "epoch": 0.253, "grad_norm": 10.849920272827148, "learning_rate": 9.495200000000002e-06, "loss": 1.0779, "step": 2530 }, { "epoch": 0.254, "grad_norm": 8.324913024902344, "learning_rate": 9.493200000000001e-06, "loss": 1.2378, "step": 2540 }, { "epoch": 0.255, "grad_norm": 23.52731704711914, "learning_rate": 9.4912e-06, "loss": 1.4444, "step": 2550 }, { "epoch": 0.256, "grad_norm": 19.23080062866211, "learning_rate": 9.4892e-06, "loss": 1.4559, "step": 2560 }, { "epoch": 0.257, "grad_norm": 28.2281551361084, "learning_rate": 9.487200000000001e-06, "loss": 1.4082, "step": 2570 }, { "epoch": 0.258, "grad_norm": 9.790892601013184, "learning_rate": 9.4852e-06, "loss": 1.4092, "step": 2580 }, { "epoch": 0.259, "grad_norm": 7.988399505615234, "learning_rate": 9.483200000000001e-06, "loss": 1.2037, "step": 2590 }, { "epoch": 0.26, "grad_norm": 8.269539833068848, "learning_rate": 9.4812e-06, "loss": 1.2213, "step": 2600 }, { "epoch": 0.261, "grad_norm": 9.257765769958496, "learning_rate": 9.4792e-06, "loss": 1.5012, "step": 2610 }, { "epoch": 0.262, "grad_norm": 11.58704948425293, "learning_rate": 9.477200000000001e-06, "loss": 1.4133, "step": 2620 }, { "epoch": 0.263, "grad_norm": 14.528082847595215, "learning_rate": 9.4752e-06, "loss": 1.0085, "step": 2630 }, { "epoch": 0.264, "grad_norm": 50.10718536376953, "learning_rate": 9.473200000000001e-06, "loss": 1.2768, "step": 2640 }, { "epoch": 0.265, "grad_norm": 23.46904182434082, "learning_rate": 9.4712e-06, "loss": 2.0818, "step": 2650 }, { "epoch": 0.266, "grad_norm": 10.064269065856934, "learning_rate": 9.4692e-06, "loss": 1.2232, "step": 2660 }, { "epoch": 0.267, "grad_norm": 13.59894847869873, "learning_rate": 9.467200000000001e-06, "loss": 1.2164, "step": 2670 }, { "epoch": 0.268, "grad_norm": 12.694211959838867, "learning_rate": 9.4652e-06, "loss": 1.3911, "step": 2680 }, { "epoch": 0.269, "grad_norm": 13.977474212646484, "learning_rate": 9.463200000000001e-06, "loss": 1.266, "step": 2690 }, { "epoch": 0.27, "grad_norm": 11.899271965026855, "learning_rate": 9.4612e-06, "loss": 1.0906, "step": 2700 }, { "epoch": 0.271, "grad_norm": 18.347423553466797, "learning_rate": 9.4592e-06, "loss": 1.2188, "step": 2710 }, { "epoch": 0.272, "grad_norm": 31.01997184753418, "learning_rate": 9.457200000000002e-06, "loss": 1.2443, "step": 2720 }, { "epoch": 0.273, "grad_norm": 21.409841537475586, "learning_rate": 9.4552e-06, "loss": 1.6206, "step": 2730 }, { "epoch": 0.274, "grad_norm": 28.464738845825195, "learning_rate": 9.453200000000001e-06, "loss": 1.3959, "step": 2740 }, { "epoch": 0.275, "grad_norm": 27.363481521606445, "learning_rate": 9.4512e-06, "loss": 1.3278, "step": 2750 }, { "epoch": 0.276, "grad_norm": 12.727892875671387, "learning_rate": 9.449200000000001e-06, "loss": 1.1531, "step": 2760 }, { "epoch": 0.277, "grad_norm": 24.230548858642578, "learning_rate": 9.447200000000002e-06, "loss": 1.2294, "step": 2770 }, { "epoch": 0.278, "grad_norm": 14.277069091796875, "learning_rate": 9.4452e-06, "loss": 1.3216, "step": 2780 }, { "epoch": 0.279, "grad_norm": 16.04420280456543, "learning_rate": 9.443200000000001e-06, "loss": 1.2706, "step": 2790 }, { "epoch": 0.28, "grad_norm": 14.82460880279541, "learning_rate": 9.4412e-06, "loss": 1.4543, "step": 2800 }, { "epoch": 0.281, "grad_norm": 14.254340171813965, "learning_rate": 9.439200000000001e-06, "loss": 1.011, "step": 2810 }, { "epoch": 0.282, "grad_norm": 18.26475715637207, "learning_rate": 9.4372e-06, "loss": 1.2179, "step": 2820 }, { "epoch": 0.283, "grad_norm": 11.68290901184082, "learning_rate": 9.4352e-06, "loss": 1.2755, "step": 2830 }, { "epoch": 0.284, "grad_norm": 12.850386619567871, "learning_rate": 9.433200000000001e-06, "loss": 1.4531, "step": 2840 }, { "epoch": 0.285, "grad_norm": 13.108072280883789, "learning_rate": 9.4312e-06, "loss": 1.2234, "step": 2850 }, { "epoch": 0.286, "grad_norm": 13.620012283325195, "learning_rate": 9.429200000000001e-06, "loss": 1.2761, "step": 2860 }, { "epoch": 0.287, "grad_norm": 11.770565032958984, "learning_rate": 9.4272e-06, "loss": 1.2688, "step": 2870 }, { "epoch": 0.288, "grad_norm": 23.039783477783203, "learning_rate": 9.4252e-06, "loss": 1.3185, "step": 2880 }, { "epoch": 0.289, "grad_norm": 15.242749214172363, "learning_rate": 9.423200000000002e-06, "loss": 1.2726, "step": 2890 }, { "epoch": 0.29, "grad_norm": 13.054326057434082, "learning_rate": 9.4212e-06, "loss": 1.4965, "step": 2900 }, { "epoch": 0.291, "grad_norm": 7.936321258544922, "learning_rate": 9.419200000000001e-06, "loss": 1.2111, "step": 2910 }, { "epoch": 0.292, "grad_norm": 15.058441162109375, "learning_rate": 9.4172e-06, "loss": 1.4452, "step": 2920 }, { "epoch": 0.293, "grad_norm": 15.545040130615234, "learning_rate": 9.415200000000001e-06, "loss": 1.4195, "step": 2930 }, { "epoch": 0.294, "grad_norm": 8.68011474609375, "learning_rate": 9.413200000000002e-06, "loss": 1.1531, "step": 2940 }, { "epoch": 0.295, "grad_norm": 9.976950645446777, "learning_rate": 9.4112e-06, "loss": 1.3448, "step": 2950 }, { "epoch": 0.296, "grad_norm": 13.283685684204102, "learning_rate": 9.409200000000001e-06, "loss": 1.2648, "step": 2960 }, { "epoch": 0.297, "grad_norm": 11.547643661499023, "learning_rate": 9.4072e-06, "loss": 1.3325, "step": 2970 }, { "epoch": 0.298, "grad_norm": 9.15052604675293, "learning_rate": 9.405200000000001e-06, "loss": 1.2221, "step": 2980 }, { "epoch": 0.299, "grad_norm": 10.677905082702637, "learning_rate": 9.403200000000002e-06, "loss": 1.3889, "step": 2990 }, { "epoch": 0.3, "grad_norm": 11.118428230285645, "learning_rate": 9.4012e-06, "loss": 1.462, "step": 3000 }, { "epoch": 0.301, "grad_norm": 15.067032814025879, "learning_rate": 9.3992e-06, "loss": 1.3039, "step": 3010 }, { "epoch": 0.302, "grad_norm": 10.537190437316895, "learning_rate": 9.3972e-06, "loss": 1.331, "step": 3020 }, { "epoch": 0.303, "grad_norm": 9.242080688476562, "learning_rate": 9.395200000000001e-06, "loss": 1.3398, "step": 3030 }, { "epoch": 0.304, "grad_norm": 8.189338684082031, "learning_rate": 9.393200000000002e-06, "loss": 1.3571, "step": 3040 }, { "epoch": 0.305, "grad_norm": 7.820470333099365, "learning_rate": 9.3912e-06, "loss": 1.1683, "step": 3050 }, { "epoch": 0.306, "grad_norm": 17.190282821655273, "learning_rate": 9.3892e-06, "loss": 1.631, "step": 3060 }, { "epoch": 0.307, "grad_norm": 8.663206100463867, "learning_rate": 9.3872e-06, "loss": 1.2288, "step": 3070 }, { "epoch": 0.308, "grad_norm": 10.377467155456543, "learning_rate": 9.385200000000001e-06, "loss": 1.2994, "step": 3080 }, { "epoch": 0.309, "grad_norm": 10.615673065185547, "learning_rate": 9.383200000000002e-06, "loss": 1.4301, "step": 3090 }, { "epoch": 0.31, "grad_norm": 13.625157356262207, "learning_rate": 9.381200000000001e-06, "loss": 1.3571, "step": 3100 }, { "epoch": 0.311, "grad_norm": 8.007444381713867, "learning_rate": 9.3792e-06, "loss": 1.5293, "step": 3110 }, { "epoch": 0.312, "grad_norm": 8.776016235351562, "learning_rate": 9.3772e-06, "loss": 1.0383, "step": 3120 }, { "epoch": 0.313, "grad_norm": 9.243791580200195, "learning_rate": 9.375200000000001e-06, "loss": 1.0221, "step": 3130 }, { "epoch": 0.314, "grad_norm": 16.613603591918945, "learning_rate": 9.373200000000002e-06, "loss": 1.3903, "step": 3140 }, { "epoch": 0.315, "grad_norm": 13.014996528625488, "learning_rate": 9.371200000000001e-06, "loss": 1.1755, "step": 3150 }, { "epoch": 0.316, "grad_norm": 17.09458351135254, "learning_rate": 9.3692e-06, "loss": 1.6569, "step": 3160 }, { "epoch": 0.317, "grad_norm": 10.674847602844238, "learning_rate": 9.3672e-06, "loss": 1.0852, "step": 3170 }, { "epoch": 0.318, "grad_norm": 10.704791069030762, "learning_rate": 9.365200000000001e-06, "loss": 1.3693, "step": 3180 }, { "epoch": 0.319, "grad_norm": 15.340550422668457, "learning_rate": 9.3632e-06, "loss": 1.4769, "step": 3190 }, { "epoch": 0.32, "grad_norm": 9.755617141723633, "learning_rate": 9.361200000000001e-06, "loss": 1.0809, "step": 3200 }, { "epoch": 0.321, "grad_norm": 11.036744117736816, "learning_rate": 9.3592e-06, "loss": 1.0881, "step": 3210 }, { "epoch": 0.322, "grad_norm": 16.14586067199707, "learning_rate": 9.3572e-06, "loss": 1.2413, "step": 3220 }, { "epoch": 0.323, "grad_norm": 18.264163970947266, "learning_rate": 9.355200000000002e-06, "loss": 1.3974, "step": 3230 }, { "epoch": 0.324, "grad_norm": 23.410655975341797, "learning_rate": 9.3532e-06, "loss": 1.4149, "step": 3240 }, { "epoch": 0.325, "grad_norm": 12.093097686767578, "learning_rate": 9.3512e-06, "loss": 1.4245, "step": 3250 }, { "epoch": 0.326, "grad_norm": 14.360788345336914, "learning_rate": 9.3492e-06, "loss": 1.2235, "step": 3260 }, { "epoch": 0.327, "grad_norm": 9.432902336120605, "learning_rate": 9.347200000000001e-06, "loss": 1.3656, "step": 3270 }, { "epoch": 0.328, "grad_norm": 18.593488693237305, "learning_rate": 9.345200000000002e-06, "loss": 1.203, "step": 3280 }, { "epoch": 0.329, "grad_norm": 16.372730255126953, "learning_rate": 9.3432e-06, "loss": 1.2568, "step": 3290 }, { "epoch": 0.33, "grad_norm": 19.8244686126709, "learning_rate": 9.3412e-06, "loss": 1.3016, "step": 3300 }, { "epoch": 0.331, "grad_norm": 15.406209945678711, "learning_rate": 9.3392e-06, "loss": 1.4093, "step": 3310 }, { "epoch": 0.332, "grad_norm": 13.136086463928223, "learning_rate": 9.337200000000001e-06, "loss": 1.3213, "step": 3320 }, { "epoch": 0.333, "grad_norm": 15.53652286529541, "learning_rate": 9.335200000000002e-06, "loss": 1.3157, "step": 3330 }, { "epoch": 0.334, "grad_norm": 10.82143497467041, "learning_rate": 9.3332e-06, "loss": 1.2828, "step": 3340 }, { "epoch": 0.335, "grad_norm": 9.532742500305176, "learning_rate": 9.3312e-06, "loss": 1.315, "step": 3350 }, { "epoch": 0.336, "grad_norm": 13.635393142700195, "learning_rate": 9.3292e-06, "loss": 1.3081, "step": 3360 }, { "epoch": 0.337, "grad_norm": 12.003236770629883, "learning_rate": 9.327200000000001e-06, "loss": 1.2782, "step": 3370 }, { "epoch": 0.338, "grad_norm": 11.80959701538086, "learning_rate": 9.3252e-06, "loss": 1.262, "step": 3380 }, { "epoch": 0.339, "grad_norm": 9.662680625915527, "learning_rate": 9.3232e-06, "loss": 1.157, "step": 3390 }, { "epoch": 0.34, "grad_norm": 8.688703536987305, "learning_rate": 9.3212e-06, "loss": 1.3084, "step": 3400 }, { "epoch": 0.341, "grad_norm": 19.05828285217285, "learning_rate": 9.3192e-06, "loss": 1.479, "step": 3410 }, { "epoch": 0.342, "grad_norm": 8.719159126281738, "learning_rate": 9.317200000000001e-06, "loss": 1.0643, "step": 3420 }, { "epoch": 0.343, "grad_norm": 12.395783424377441, "learning_rate": 9.3152e-06, "loss": 1.2321, "step": 3430 }, { "epoch": 0.344, "grad_norm": 14.686535835266113, "learning_rate": 9.313200000000001e-06, "loss": 1.4487, "step": 3440 }, { "epoch": 0.345, "grad_norm": 28.613759994506836, "learning_rate": 9.3112e-06, "loss": 1.5556, "step": 3450 }, { "epoch": 0.346, "grad_norm": 16.280899047851562, "learning_rate": 9.3092e-06, "loss": 1.5714, "step": 3460 }, { "epoch": 0.347, "grad_norm": 11.7525634765625, "learning_rate": 9.307200000000001e-06, "loss": 1.3243, "step": 3470 }, { "epoch": 0.348, "grad_norm": 8.579140663146973, "learning_rate": 9.3052e-06, "loss": 1.2698, "step": 3480 }, { "epoch": 0.349, "grad_norm": 7.920191764831543, "learning_rate": 9.303200000000001e-06, "loss": 1.5224, "step": 3490 }, { "epoch": 0.35, "grad_norm": 8.037747383117676, "learning_rate": 9.3012e-06, "loss": 1.4063, "step": 3500 }, { "epoch": 0.351, "grad_norm": 8.377565383911133, "learning_rate": 9.2992e-06, "loss": 1.2039, "step": 3510 }, { "epoch": 0.352, "grad_norm": 16.84113883972168, "learning_rate": 9.297200000000001e-06, "loss": 1.5108, "step": 3520 }, { "epoch": 0.353, "grad_norm": 8.579976081848145, "learning_rate": 9.2952e-06, "loss": 1.2058, "step": 3530 }, { "epoch": 0.354, "grad_norm": 9.581146240234375, "learning_rate": 9.293200000000001e-06, "loss": 1.3751, "step": 3540 }, { "epoch": 0.355, "grad_norm": 13.727093696594238, "learning_rate": 9.2912e-06, "loss": 1.4747, "step": 3550 }, { "epoch": 0.356, "grad_norm": 8.757494926452637, "learning_rate": 9.2892e-06, "loss": 1.213, "step": 3560 }, { "epoch": 0.357, "grad_norm": 9.663415908813477, "learning_rate": 9.287200000000001e-06, "loss": 1.1638, "step": 3570 }, { "epoch": 0.358, "grad_norm": 14.356528282165527, "learning_rate": 9.2852e-06, "loss": 1.3933, "step": 3580 }, { "epoch": 0.359, "grad_norm": 10.559135437011719, "learning_rate": 9.283200000000001e-06, "loss": 1.2146, "step": 3590 }, { "epoch": 0.36, "grad_norm": 23.07046127319336, "learning_rate": 9.2812e-06, "loss": 1.1888, "step": 3600 }, { "epoch": 0.361, "grad_norm": 12.793816566467285, "learning_rate": 9.279200000000001e-06, "loss": 1.3649, "step": 3610 }, { "epoch": 0.362, "grad_norm": 23.559301376342773, "learning_rate": 9.2772e-06, "loss": 1.4811, "step": 3620 }, { "epoch": 0.363, "grad_norm": 15.077939987182617, "learning_rate": 9.2752e-06, "loss": 1.4171, "step": 3630 }, { "epoch": 0.364, "grad_norm": 11.191791534423828, "learning_rate": 9.273200000000001e-06, "loss": 1.1002, "step": 3640 }, { "epoch": 0.365, "grad_norm": 11.525708198547363, "learning_rate": 9.271200000000002e-06, "loss": 1.159, "step": 3650 }, { "epoch": 0.366, "grad_norm": 22.139638900756836, "learning_rate": 9.269200000000001e-06, "loss": 1.423, "step": 3660 }, { "epoch": 0.367, "grad_norm": 21.18092155456543, "learning_rate": 9.2672e-06, "loss": 1.4533, "step": 3670 }, { "epoch": 0.368, "grad_norm": 16.23587417602539, "learning_rate": 9.2652e-06, "loss": 1.2505, "step": 3680 }, { "epoch": 0.369, "grad_norm": 14.089337348937988, "learning_rate": 9.263200000000001e-06, "loss": 1.3815, "step": 3690 }, { "epoch": 0.37, "grad_norm": 20.590518951416016, "learning_rate": 9.261200000000002e-06, "loss": 1.206, "step": 3700 }, { "epoch": 0.371, "grad_norm": 14.413628578186035, "learning_rate": 9.259200000000001e-06, "loss": 1.2502, "step": 3710 }, { "epoch": 0.372, "grad_norm": 12.682168960571289, "learning_rate": 9.2572e-06, "loss": 1.3259, "step": 3720 }, { "epoch": 0.373, "grad_norm": 13.32224178314209, "learning_rate": 9.2552e-06, "loss": 1.5608, "step": 3730 }, { "epoch": 0.374, "grad_norm": 12.589244842529297, "learning_rate": 9.253200000000001e-06, "loss": 1.3362, "step": 3740 }, { "epoch": 0.375, "grad_norm": 9.821370124816895, "learning_rate": 9.2512e-06, "loss": 1.1899, "step": 3750 }, { "epoch": 0.376, "grad_norm": 10.051286697387695, "learning_rate": 9.249200000000001e-06, "loss": 1.4641, "step": 3760 }, { "epoch": 0.377, "grad_norm": 17.172809600830078, "learning_rate": 9.2472e-06, "loss": 1.3807, "step": 3770 }, { "epoch": 0.378, "grad_norm": 11.091852188110352, "learning_rate": 9.245200000000001e-06, "loss": 1.3636, "step": 3780 }, { "epoch": 0.379, "grad_norm": 11.264762878417969, "learning_rate": 9.243200000000002e-06, "loss": 1.144, "step": 3790 }, { "epoch": 0.38, "grad_norm": 16.187755584716797, "learning_rate": 9.2412e-06, "loss": 1.3544, "step": 3800 }, { "epoch": 0.381, "grad_norm": 13.031487464904785, "learning_rate": 9.2392e-06, "loss": 1.5363, "step": 3810 }, { "epoch": 0.382, "grad_norm": 8.881110191345215, "learning_rate": 9.2372e-06, "loss": 1.2359, "step": 3820 }, { "epoch": 0.383, "grad_norm": 22.943395614624023, "learning_rate": 9.235200000000001e-06, "loss": 1.3783, "step": 3830 }, { "epoch": 0.384, "grad_norm": 11.28361701965332, "learning_rate": 9.233200000000002e-06, "loss": 1.1897, "step": 3840 }, { "epoch": 0.385, "grad_norm": 15.082734107971191, "learning_rate": 9.2312e-06, "loss": 0.9349, "step": 3850 }, { "epoch": 0.386, "grad_norm": 14.34780502319336, "learning_rate": 9.2292e-06, "loss": 1.3506, "step": 3860 }, { "epoch": 0.387, "grad_norm": 21.930788040161133, "learning_rate": 9.2272e-06, "loss": 1.5344, "step": 3870 }, { "epoch": 0.388, "grad_norm": 3.953766345977783, "learning_rate": 9.225200000000001e-06, "loss": 1.1874, "step": 3880 }, { "epoch": 0.389, "grad_norm": 26.546579360961914, "learning_rate": 9.223200000000002e-06, "loss": 1.1489, "step": 3890 }, { "epoch": 0.39, "grad_norm": 16.212556838989258, "learning_rate": 9.2212e-06, "loss": 1.3737, "step": 3900 }, { "epoch": 0.391, "grad_norm": 25.557315826416016, "learning_rate": 9.2192e-06, "loss": 1.4626, "step": 3910 }, { "epoch": 0.392, "grad_norm": 18.43967056274414, "learning_rate": 9.2172e-06, "loss": 0.9943, "step": 3920 }, { "epoch": 0.393, "grad_norm": 17.430503845214844, "learning_rate": 9.215200000000001e-06, "loss": 1.3599, "step": 3930 }, { "epoch": 0.394, "grad_norm": 17.398202896118164, "learning_rate": 9.213200000000002e-06, "loss": 1.1161, "step": 3940 }, { "epoch": 0.395, "grad_norm": 12.919048309326172, "learning_rate": 9.211200000000001e-06, "loss": 1.3019, "step": 3950 }, { "epoch": 0.396, "grad_norm": 12.259662628173828, "learning_rate": 9.2092e-06, "loss": 1.2118, "step": 3960 }, { "epoch": 0.397, "grad_norm": 12.485530853271484, "learning_rate": 9.2072e-06, "loss": 1.1991, "step": 3970 }, { "epoch": 0.398, "grad_norm": 16.184070587158203, "learning_rate": 9.205200000000001e-06, "loss": 1.0205, "step": 3980 }, { "epoch": 0.399, "grad_norm": 12.243937492370605, "learning_rate": 9.2032e-06, "loss": 1.3139, "step": 3990 }, { "epoch": 0.4, "grad_norm": 16.732261657714844, "learning_rate": 9.201200000000001e-06, "loss": 1.6951, "step": 4000 }, { "epoch": 0.401, "grad_norm": 13.049805641174316, "learning_rate": 9.1994e-06, "loss": 1.3506, "step": 4010 }, { "epoch": 0.402, "grad_norm": 6.581603527069092, "learning_rate": 9.197400000000001e-06, "loss": 1.1833, "step": 4020 }, { "epoch": 0.403, "grad_norm": 13.749405860900879, "learning_rate": 9.195400000000002e-06, "loss": 1.3661, "step": 4030 }, { "epoch": 0.404, "grad_norm": 9.183478355407715, "learning_rate": 9.1934e-06, "loss": 1.3971, "step": 4040 }, { "epoch": 0.405, "grad_norm": 8.104496955871582, "learning_rate": 9.1914e-06, "loss": 1.5687, "step": 4050 }, { "epoch": 0.406, "grad_norm": 7.333279609680176, "learning_rate": 9.1894e-06, "loss": 1.3316, "step": 4060 }, { "epoch": 0.407, "grad_norm": 7.6298956871032715, "learning_rate": 9.187400000000001e-06, "loss": 0.9288, "step": 4070 }, { "epoch": 0.408, "grad_norm": 8.161398887634277, "learning_rate": 9.1854e-06, "loss": 1.1203, "step": 4080 }, { "epoch": 0.409, "grad_norm": 17.671445846557617, "learning_rate": 9.183400000000001e-06, "loss": 1.5368, "step": 4090 }, { "epoch": 0.41, "grad_norm": 9.265928268432617, "learning_rate": 9.1814e-06, "loss": 1.6052, "step": 4100 }, { "epoch": 0.411, "grad_norm": 11.215835571289062, "learning_rate": 9.1794e-06, "loss": 1.3046, "step": 4110 }, { "epoch": 0.412, "grad_norm": 12.322066307067871, "learning_rate": 9.177400000000001e-06, "loss": 1.2959, "step": 4120 }, { "epoch": 0.413, "grad_norm": 9.997173309326172, "learning_rate": 9.1754e-06, "loss": 1.3044, "step": 4130 }, { "epoch": 0.414, "grad_norm": 18.876413345336914, "learning_rate": 9.173400000000001e-06, "loss": 1.2386, "step": 4140 }, { "epoch": 0.415, "grad_norm": 11.710536003112793, "learning_rate": 9.1714e-06, "loss": 1.1605, "step": 4150 }, { "epoch": 0.416, "grad_norm": 40.21215057373047, "learning_rate": 9.1694e-06, "loss": 1.8447, "step": 4160 }, { "epoch": 0.417, "grad_norm": 14.148285865783691, "learning_rate": 9.167400000000001e-06, "loss": 1.4958, "step": 4170 }, { "epoch": 0.418, "grad_norm": 8.623912811279297, "learning_rate": 9.1654e-06, "loss": 1.4314, "step": 4180 }, { "epoch": 0.419, "grad_norm": 6.790268898010254, "learning_rate": 9.163400000000001e-06, "loss": 1.2324, "step": 4190 }, { "epoch": 0.42, "grad_norm": 7.486350059509277, "learning_rate": 9.1614e-06, "loss": 1.2192, "step": 4200 }, { "epoch": 0.421, "grad_norm": 19.309904098510742, "learning_rate": 9.1594e-06, "loss": 1.2218, "step": 4210 }, { "epoch": 0.422, "grad_norm": 9.007675170898438, "learning_rate": 9.157400000000001e-06, "loss": 1.362, "step": 4220 }, { "epoch": 0.423, "grad_norm": 14.461140632629395, "learning_rate": 9.1554e-06, "loss": 1.2684, "step": 4230 }, { "epoch": 0.424, "grad_norm": 8.517191886901855, "learning_rate": 9.153400000000001e-06, "loss": 1.3562, "step": 4240 }, { "epoch": 0.425, "grad_norm": 11.127458572387695, "learning_rate": 9.1514e-06, "loss": 1.308, "step": 4250 }, { "epoch": 0.426, "grad_norm": 22.960670471191406, "learning_rate": 9.149400000000001e-06, "loss": 1.3747, "step": 4260 }, { "epoch": 0.427, "grad_norm": 10.214832305908203, "learning_rate": 9.147400000000002e-06, "loss": 0.9972, "step": 4270 }, { "epoch": 0.428, "grad_norm": 10.02214241027832, "learning_rate": 9.1454e-06, "loss": 1.7316, "step": 4280 }, { "epoch": 0.429, "grad_norm": 9.159738540649414, "learning_rate": 9.143400000000001e-06, "loss": 1.2506, "step": 4290 }, { "epoch": 0.43, "grad_norm": 16.804216384887695, "learning_rate": 9.1414e-06, "loss": 1.4223, "step": 4300 }, { "epoch": 0.431, "grad_norm": 10.659538269042969, "learning_rate": 9.139400000000001e-06, "loss": 1.201, "step": 4310 }, { "epoch": 0.432, "grad_norm": 12.202113151550293, "learning_rate": 9.1374e-06, "loss": 1.1805, "step": 4320 }, { "epoch": 0.433, "grad_norm": 14.963141441345215, "learning_rate": 9.1354e-06, "loss": 1.0188, "step": 4330 }, { "epoch": 0.434, "grad_norm": 10.751496315002441, "learning_rate": 9.133400000000001e-06, "loss": 1.2498, "step": 4340 }, { "epoch": 0.435, "grad_norm": 11.19100284576416, "learning_rate": 9.1314e-06, "loss": 1.5348, "step": 4350 }, { "epoch": 0.436, "grad_norm": 9.26286506652832, "learning_rate": 9.129400000000001e-06, "loss": 1.1841, "step": 4360 }, { "epoch": 0.437, "grad_norm": 11.9354829788208, "learning_rate": 9.1274e-06, "loss": 1.3717, "step": 4370 }, { "epoch": 0.438, "grad_norm": 8.911476135253906, "learning_rate": 9.1254e-06, "loss": 1.2523, "step": 4380 }, { "epoch": 0.439, "grad_norm": 13.4826021194458, "learning_rate": 9.123400000000001e-06, "loss": 1.1743, "step": 4390 }, { "epoch": 0.44, "grad_norm": 20.680143356323242, "learning_rate": 9.1214e-06, "loss": 1.3545, "step": 4400 }, { "epoch": 0.441, "grad_norm": 16.1810245513916, "learning_rate": 9.119400000000001e-06, "loss": 1.1206, "step": 4410 }, { "epoch": 0.442, "grad_norm": 28.446529388427734, "learning_rate": 9.1174e-06, "loss": 1.6057, "step": 4420 }, { "epoch": 0.443, "grad_norm": 9.078392028808594, "learning_rate": 9.115400000000001e-06, "loss": 1.1614, "step": 4430 }, { "epoch": 0.444, "grad_norm": 19.624433517456055, "learning_rate": 9.113400000000002e-06, "loss": 1.1848, "step": 4440 }, { "epoch": 0.445, "grad_norm": 10.796993255615234, "learning_rate": 9.1114e-06, "loss": 1.3099, "step": 4450 }, { "epoch": 0.446, "grad_norm": 10.463573455810547, "learning_rate": 9.109400000000001e-06, "loss": 1.4088, "step": 4460 }, { "epoch": 0.447, "grad_norm": 12.466163635253906, "learning_rate": 9.1074e-06, "loss": 1.128, "step": 4470 }, { "epoch": 0.448, "grad_norm": 8.93423080444336, "learning_rate": 9.105400000000001e-06, "loss": 1.2888, "step": 4480 }, { "epoch": 0.449, "grad_norm": 14.094794273376465, "learning_rate": 9.103400000000002e-06, "loss": 1.4118, "step": 4490 }, { "epoch": 0.45, "grad_norm": 9.293695449829102, "learning_rate": 9.1014e-06, "loss": 1.4197, "step": 4500 }, { "epoch": 0.451, "grad_norm": 14.770209312438965, "learning_rate": 9.0994e-06, "loss": 1.2449, "step": 4510 }, { "epoch": 0.452, "grad_norm": 8.0684814453125, "learning_rate": 9.0974e-06, "loss": 1.3484, "step": 4520 }, { "epoch": 0.453, "grad_norm": 8.21815299987793, "learning_rate": 9.095400000000001e-06, "loss": 1.3225, "step": 4530 }, { "epoch": 0.454, "grad_norm": 15.967477798461914, "learning_rate": 9.093400000000002e-06, "loss": 1.2123, "step": 4540 }, { "epoch": 0.455, "grad_norm": 9.977898597717285, "learning_rate": 9.0914e-06, "loss": 1.3056, "step": 4550 }, { "epoch": 0.456, "grad_norm": 18.105287551879883, "learning_rate": 9.0894e-06, "loss": 1.4645, "step": 4560 }, { "epoch": 0.457, "grad_norm": 12.611705780029297, "learning_rate": 9.0874e-06, "loss": 1.4314, "step": 4570 }, { "epoch": 0.458, "grad_norm": 10.353316307067871, "learning_rate": 9.085400000000001e-06, "loss": 1.411, "step": 4580 }, { "epoch": 0.459, "grad_norm": 26.875818252563477, "learning_rate": 9.083400000000002e-06, "loss": 1.4486, "step": 4590 }, { "epoch": 0.46, "grad_norm": 12.134267807006836, "learning_rate": 9.0814e-06, "loss": 1.4155, "step": 4600 }, { "epoch": 0.461, "grad_norm": 10.851944923400879, "learning_rate": 9.0794e-06, "loss": 1.3434, "step": 4610 }, { "epoch": 0.462, "grad_norm": 7.924882411956787, "learning_rate": 9.0774e-06, "loss": 1.3502, "step": 4620 }, { "epoch": 0.463, "grad_norm": 15.417466163635254, "learning_rate": 9.075400000000001e-06, "loss": 1.2635, "step": 4630 }, { "epoch": 0.464, "grad_norm": 16.324195861816406, "learning_rate": 9.073400000000002e-06, "loss": 1.3611, "step": 4640 }, { "epoch": 0.465, "grad_norm": 19.853666305541992, "learning_rate": 9.071400000000001e-06, "loss": 1.5135, "step": 4650 }, { "epoch": 0.466, "grad_norm": 9.207900047302246, "learning_rate": 9.0694e-06, "loss": 1.4571, "step": 4660 }, { "epoch": 0.467, "grad_norm": 8.441938400268555, "learning_rate": 9.0674e-06, "loss": 1.4173, "step": 4670 }, { "epoch": 0.468, "grad_norm": 10.553547859191895, "learning_rate": 9.065400000000001e-06, "loss": 1.4312, "step": 4680 }, { "epoch": 0.469, "grad_norm": 12.739753723144531, "learning_rate": 9.0634e-06, "loss": 1.531, "step": 4690 }, { "epoch": 0.47, "grad_norm": 9.211139678955078, "learning_rate": 9.061400000000001e-06, "loss": 1.4592, "step": 4700 }, { "epoch": 0.471, "grad_norm": 34.805763244628906, "learning_rate": 9.0594e-06, "loss": 1.3902, "step": 4710 }, { "epoch": 0.472, "grad_norm": 7.537581443786621, "learning_rate": 9.0574e-06, "loss": 1.4375, "step": 4720 }, { "epoch": 0.473, "grad_norm": 8.185653686523438, "learning_rate": 9.055400000000001e-06, "loss": 1.3044, "step": 4730 }, { "epoch": 0.474, "grad_norm": 8.564155578613281, "learning_rate": 9.0534e-06, "loss": 1.4194, "step": 4740 }, { "epoch": 0.475, "grad_norm": 9.794910430908203, "learning_rate": 9.0514e-06, "loss": 1.5224, "step": 4750 }, { "epoch": 0.476, "grad_norm": 7.486529350280762, "learning_rate": 9.0494e-06, "loss": 1.3633, "step": 4760 }, { "epoch": 0.477, "grad_norm": 8.693879127502441, "learning_rate": 9.0474e-06, "loss": 1.2656, "step": 4770 }, { "epoch": 0.478, "grad_norm": 10.538393020629883, "learning_rate": 9.045400000000002e-06, "loss": 1.342, "step": 4780 }, { "epoch": 0.479, "grad_norm": 8.370972633361816, "learning_rate": 9.0434e-06, "loss": 1.1873, "step": 4790 }, { "epoch": 0.48, "grad_norm": 7.864738941192627, "learning_rate": 9.041400000000001e-06, "loss": 1.3346, "step": 4800 }, { "epoch": 0.481, "grad_norm": 9.628923416137695, "learning_rate": 9.0394e-06, "loss": 1.3781, "step": 4810 }, { "epoch": 0.482, "grad_norm": 8.316746711730957, "learning_rate": 9.037400000000001e-06, "loss": 1.2832, "step": 4820 }, { "epoch": 0.483, "grad_norm": 11.638014793395996, "learning_rate": 9.035400000000002e-06, "loss": 1.2641, "step": 4830 }, { "epoch": 0.484, "grad_norm": 8.936310768127441, "learning_rate": 9.0334e-06, "loss": 1.1154, "step": 4840 }, { "epoch": 0.485, "grad_norm": 9.713971138000488, "learning_rate": 9.031400000000001e-06, "loss": 1.1244, "step": 4850 }, { "epoch": 0.486, "grad_norm": 9.916156768798828, "learning_rate": 9.0294e-06, "loss": 1.1963, "step": 4860 }, { "epoch": 0.487, "grad_norm": 9.219595909118652, "learning_rate": 9.027400000000001e-06, "loss": 1.3753, "step": 4870 }, { "epoch": 0.488, "grad_norm": 17.253002166748047, "learning_rate": 9.0254e-06, "loss": 1.2406, "step": 4880 }, { "epoch": 0.489, "grad_norm": 15.618207931518555, "learning_rate": 9.0234e-06, "loss": 1.7111, "step": 4890 }, { "epoch": 0.49, "grad_norm": 15.506583213806152, "learning_rate": 9.021400000000001e-06, "loss": 1.2685, "step": 4900 }, { "epoch": 0.491, "grad_norm": 10.533292770385742, "learning_rate": 9.0194e-06, "loss": 1.1279, "step": 4910 }, { "epoch": 0.492, "grad_norm": 14.164593696594238, "learning_rate": 9.017400000000001e-06, "loss": 1.2458, "step": 4920 }, { "epoch": 0.493, "grad_norm": 11.445045471191406, "learning_rate": 9.0154e-06, "loss": 1.4126, "step": 4930 }, { "epoch": 0.494, "grad_norm": 12.08869743347168, "learning_rate": 9.0134e-06, "loss": 1.3938, "step": 4940 }, { "epoch": 0.495, "grad_norm": 9.766892433166504, "learning_rate": 9.011400000000002e-06, "loss": 1.3039, "step": 4950 }, { "epoch": 0.496, "grad_norm": 9.105816841125488, "learning_rate": 9.0094e-06, "loss": 1.0959, "step": 4960 }, { "epoch": 0.497, "grad_norm": 16.583984375, "learning_rate": 9.007400000000001e-06, "loss": 1.346, "step": 4970 }, { "epoch": 0.498, "grad_norm": 15.626133918762207, "learning_rate": 9.0054e-06, "loss": 1.4372, "step": 4980 }, { "epoch": 0.499, "grad_norm": 27.694082260131836, "learning_rate": 9.003400000000001e-06, "loss": 1.3513, "step": 4990 }, { "epoch": 0.5, "grad_norm": 18.48790168762207, "learning_rate": 9.001400000000002e-06, "loss": 1.4027, "step": 5000 }, { "epoch": 0.501, "grad_norm": 45.40183639526367, "learning_rate": 8.9994e-06, "loss": 1.386, "step": 5010 }, { "epoch": 0.502, "grad_norm": 10.51933765411377, "learning_rate": 8.997400000000001e-06, "loss": 1.3968, "step": 5020 }, { "epoch": 0.503, "grad_norm": 14.720911979675293, "learning_rate": 8.9954e-06, "loss": 1.424, "step": 5030 }, { "epoch": 0.504, "grad_norm": 7.827688694000244, "learning_rate": 8.993400000000001e-06, "loss": 1.3437, "step": 5040 }, { "epoch": 0.505, "grad_norm": 8.26240062713623, "learning_rate": 8.991400000000002e-06, "loss": 1.3456, "step": 5050 }, { "epoch": 0.506, "grad_norm": 9.115861892700195, "learning_rate": 8.9894e-06, "loss": 1.2535, "step": 5060 }, { "epoch": 0.507, "grad_norm": 8.265856742858887, "learning_rate": 8.987400000000001e-06, "loss": 1.5078, "step": 5070 }, { "epoch": 0.508, "grad_norm": 10.252098083496094, "learning_rate": 8.9854e-06, "loss": 1.2771, "step": 5080 }, { "epoch": 0.509, "grad_norm": 7.927690029144287, "learning_rate": 8.983400000000001e-06, "loss": 1.3618, "step": 5090 }, { "epoch": 0.51, "grad_norm": 13.25570297241211, "learning_rate": 8.981400000000002e-06, "loss": 1.1959, "step": 5100 }, { "epoch": 0.511, "grad_norm": 9.054607391357422, "learning_rate": 8.9794e-06, "loss": 1.1198, "step": 5110 }, { "epoch": 0.512, "grad_norm": 10.774911880493164, "learning_rate": 8.9774e-06, "loss": 1.3702, "step": 5120 }, { "epoch": 0.513, "grad_norm": 9.171457290649414, "learning_rate": 8.9754e-06, "loss": 1.4318, "step": 5130 }, { "epoch": 0.514, "grad_norm": 12.501185417175293, "learning_rate": 8.973400000000001e-06, "loss": 1.3032, "step": 5140 }, { "epoch": 0.515, "grad_norm": 15.207738876342773, "learning_rate": 8.971400000000002e-06, "loss": 1.4883, "step": 5150 }, { "epoch": 0.516, "grad_norm": 10.586227416992188, "learning_rate": 8.969400000000001e-06, "loss": 1.3671, "step": 5160 }, { "epoch": 0.517, "grad_norm": 14.664137840270996, "learning_rate": 8.9674e-06, "loss": 1.291, "step": 5170 }, { "epoch": 0.518, "grad_norm": 14.140101432800293, "learning_rate": 8.9654e-06, "loss": 1.2848, "step": 5180 }, { "epoch": 0.519, "grad_norm": 7.017533779144287, "learning_rate": 8.963400000000001e-06, "loss": 1.035, "step": 5190 }, { "epoch": 0.52, "grad_norm": 7.003543853759766, "learning_rate": 8.961400000000002e-06, "loss": 1.2902, "step": 5200 }, { "epoch": 0.521, "grad_norm": 23.892742156982422, "learning_rate": 8.959400000000001e-06, "loss": 1.3636, "step": 5210 }, { "epoch": 0.522, "grad_norm": 8.793959617614746, "learning_rate": 8.9574e-06, "loss": 1.4787, "step": 5220 }, { "epoch": 0.523, "grad_norm": 15.292360305786133, "learning_rate": 8.9554e-06, "loss": 1.8326, "step": 5230 }, { "epoch": 0.524, "grad_norm": 9.125349998474121, "learning_rate": 8.953400000000001e-06, "loss": 1.2145, "step": 5240 }, { "epoch": 0.525, "grad_norm": 10.882486343383789, "learning_rate": 8.9514e-06, "loss": 1.3899, "step": 5250 }, { "epoch": 0.526, "grad_norm": 8.018329620361328, "learning_rate": 8.949400000000001e-06, "loss": 1.2234, "step": 5260 }, { "epoch": 0.527, "grad_norm": 9.878753662109375, "learning_rate": 8.9474e-06, "loss": 1.4145, "step": 5270 }, { "epoch": 0.528, "grad_norm": 8.53125, "learning_rate": 8.9454e-06, "loss": 1.2056, "step": 5280 }, { "epoch": 0.529, "grad_norm": 9.106351852416992, "learning_rate": 8.943400000000001e-06, "loss": 1.461, "step": 5290 }, { "epoch": 0.53, "grad_norm": 8.726819038391113, "learning_rate": 8.9414e-06, "loss": 1.3769, "step": 5300 }, { "epoch": 0.531, "grad_norm": 9.639981269836426, "learning_rate": 8.9394e-06, "loss": 1.3188, "step": 5310 }, { "epoch": 0.532, "grad_norm": 9.185858726501465, "learning_rate": 8.9374e-06, "loss": 1.3319, "step": 5320 }, { "epoch": 0.533, "grad_norm": 9.937215805053711, "learning_rate": 8.935400000000001e-06, "loss": 1.1629, "step": 5330 }, { "epoch": 0.534, "grad_norm": 11.293951034545898, "learning_rate": 8.933400000000002e-06, "loss": 1.3782, "step": 5340 }, { "epoch": 0.535, "grad_norm": 7.814295291900635, "learning_rate": 8.9314e-06, "loss": 1.2662, "step": 5350 }, { "epoch": 0.536, "grad_norm": 13.945892333984375, "learning_rate": 8.9294e-06, "loss": 1.3954, "step": 5360 }, { "epoch": 0.537, "grad_norm": 16.959444046020508, "learning_rate": 8.9274e-06, "loss": 1.4365, "step": 5370 }, { "epoch": 0.538, "grad_norm": 7.4403510093688965, "learning_rate": 8.925400000000001e-06, "loss": 1.2837, "step": 5380 }, { "epoch": 0.539, "grad_norm": 7.64058780670166, "learning_rate": 8.923400000000002e-06, "loss": 1.3042, "step": 5390 }, { "epoch": 0.54, "grad_norm": 7.550900459289551, "learning_rate": 8.9214e-06, "loss": 1.2555, "step": 5400 }, { "epoch": 0.541, "grad_norm": 7.080636024475098, "learning_rate": 8.9194e-06, "loss": 1.3236, "step": 5410 }, { "epoch": 0.542, "grad_norm": 12.005993843078613, "learning_rate": 8.9174e-06, "loss": 1.2954, "step": 5420 }, { "epoch": 0.543, "grad_norm": 8.378647804260254, "learning_rate": 8.915400000000001e-06, "loss": 1.2363, "step": 5430 }, { "epoch": 0.544, "grad_norm": 9.629424095153809, "learning_rate": 8.913400000000002e-06, "loss": 1.347, "step": 5440 }, { "epoch": 0.545, "grad_norm": 16.59284782409668, "learning_rate": 8.9114e-06, "loss": 1.6357, "step": 5450 }, { "epoch": 0.546, "grad_norm": 16.73984146118164, "learning_rate": 8.9094e-06, "loss": 1.624, "step": 5460 }, { "epoch": 0.547, "grad_norm": 6.30186653137207, "learning_rate": 8.9074e-06, "loss": 1.1132, "step": 5470 }, { "epoch": 0.548, "grad_norm": 8.136763572692871, "learning_rate": 8.905400000000001e-06, "loss": 1.0458, "step": 5480 }, { "epoch": 0.549, "grad_norm": 5.975733280181885, "learning_rate": 8.9034e-06, "loss": 1.0785, "step": 5490 }, { "epoch": 0.55, "grad_norm": 11.36539077758789, "learning_rate": 8.901400000000001e-06, "loss": 1.3518, "step": 5500 }, { "epoch": 0.551, "grad_norm": 12.356161117553711, "learning_rate": 8.8994e-06, "loss": 1.286, "step": 5510 }, { "epoch": 0.552, "grad_norm": 27.67923355102539, "learning_rate": 8.8974e-06, "loss": 2.1539, "step": 5520 }, { "epoch": 0.553, "grad_norm": 9.271571159362793, "learning_rate": 8.895400000000001e-06, "loss": 1.3442, "step": 5530 }, { "epoch": 0.554, "grad_norm": 7.344857215881348, "learning_rate": 8.8934e-06, "loss": 1.2405, "step": 5540 }, { "epoch": 0.555, "grad_norm": 10.535940170288086, "learning_rate": 8.891400000000001e-06, "loss": 1.1594, "step": 5550 }, { "epoch": 0.556, "grad_norm": 9.774299621582031, "learning_rate": 8.8894e-06, "loss": 1.1702, "step": 5560 }, { "epoch": 0.557, "grad_norm": 6.225581169128418, "learning_rate": 8.8874e-06, "loss": 1.4462, "step": 5570 }, { "epoch": 0.558, "grad_norm": 7.791273593902588, "learning_rate": 8.885400000000001e-06, "loss": 1.459, "step": 5580 }, { "epoch": 0.559, "grad_norm": 5.674136161804199, "learning_rate": 8.8834e-06, "loss": 1.3381, "step": 5590 }, { "epoch": 0.56, "grad_norm": 10.079562187194824, "learning_rate": 8.881400000000001e-06, "loss": 1.4841, "step": 5600 }, { "epoch": 0.561, "grad_norm": 7.8481292724609375, "learning_rate": 8.8794e-06, "loss": 1.244, "step": 5610 }, { "epoch": 0.562, "grad_norm": 19.269500732421875, "learning_rate": 8.8774e-06, "loss": 1.2812, "step": 5620 }, { "epoch": 0.563, "grad_norm": 7.57558012008667, "learning_rate": 8.875400000000001e-06, "loss": 1.3797, "step": 5630 }, { "epoch": 0.564, "grad_norm": 7.8853440284729, "learning_rate": 8.8734e-06, "loss": 1.4588, "step": 5640 }, { "epoch": 0.565, "grad_norm": 8.824810028076172, "learning_rate": 8.871400000000001e-06, "loss": 1.2344, "step": 5650 }, { "epoch": 0.566, "grad_norm": 8.935410499572754, "learning_rate": 8.8694e-06, "loss": 1.1085, "step": 5660 }, { "epoch": 0.567, "grad_norm": 8.826509475708008, "learning_rate": 8.867400000000001e-06, "loss": 1.3969, "step": 5670 }, { "epoch": 0.568, "grad_norm": 14.326011657714844, "learning_rate": 8.8654e-06, "loss": 1.3439, "step": 5680 }, { "epoch": 0.569, "grad_norm": 9.995936393737793, "learning_rate": 8.8634e-06, "loss": 1.4331, "step": 5690 }, { "epoch": 0.57, "grad_norm": 10.353776931762695, "learning_rate": 8.861400000000001e-06, "loss": 1.3715, "step": 5700 }, { "epoch": 0.571, "grad_norm": 8.846041679382324, "learning_rate": 8.8594e-06, "loss": 1.528, "step": 5710 }, { "epoch": 0.572, "grad_norm": 12.004854202270508, "learning_rate": 8.857400000000001e-06, "loss": 1.3763, "step": 5720 }, { "epoch": 0.573, "grad_norm": 12.780080795288086, "learning_rate": 8.8554e-06, "loss": 1.1381, "step": 5730 }, { "epoch": 0.574, "grad_norm": 9.335272789001465, "learning_rate": 8.8534e-06, "loss": 1.3221, "step": 5740 }, { "epoch": 0.575, "grad_norm": 13.810250282287598, "learning_rate": 8.851400000000001e-06, "loss": 1.1585, "step": 5750 }, { "epoch": 0.576, "grad_norm": 10.059493064880371, "learning_rate": 8.8494e-06, "loss": 1.0912, "step": 5760 }, { "epoch": 0.577, "grad_norm": 7.981428623199463, "learning_rate": 8.847400000000001e-06, "loss": 1.5361, "step": 5770 }, { "epoch": 0.578, "grad_norm": 19.02338981628418, "learning_rate": 8.8454e-06, "loss": 1.2302, "step": 5780 }, { "epoch": 0.579, "grad_norm": 7.436992645263672, "learning_rate": 8.8434e-06, "loss": 1.2374, "step": 5790 }, { "epoch": 0.58, "grad_norm": 7.643286228179932, "learning_rate": 8.841400000000001e-06, "loss": 1.2691, "step": 5800 }, { "epoch": 0.581, "grad_norm": 8.87479019165039, "learning_rate": 8.8394e-06, "loss": 1.5383, "step": 5810 }, { "epoch": 0.582, "grad_norm": 8.180926322937012, "learning_rate": 8.837400000000001e-06, "loss": 1.4179, "step": 5820 }, { "epoch": 0.583, "grad_norm": 12.239075660705566, "learning_rate": 8.8354e-06, "loss": 1.1523, "step": 5830 }, { "epoch": 0.584, "grad_norm": 20.099628448486328, "learning_rate": 8.8334e-06, "loss": 1.1947, "step": 5840 }, { "epoch": 0.585, "grad_norm": 19.915409088134766, "learning_rate": 8.831400000000002e-06, "loss": 1.438, "step": 5850 }, { "epoch": 0.586, "grad_norm": 12.24613094329834, "learning_rate": 8.8294e-06, "loss": 1.0389, "step": 5860 }, { "epoch": 0.587, "grad_norm": 9.551973342895508, "learning_rate": 8.827400000000001e-06, "loss": 1.2193, "step": 5870 }, { "epoch": 0.588, "grad_norm": 10.822647094726562, "learning_rate": 8.8254e-06, "loss": 1.4431, "step": 5880 }, { "epoch": 0.589, "grad_norm": 10.414114952087402, "learning_rate": 8.823400000000001e-06, "loss": 1.4134, "step": 5890 }, { "epoch": 0.59, "grad_norm": 11.21683120727539, "learning_rate": 8.821400000000002e-06, "loss": 1.3784, "step": 5900 }, { "epoch": 0.591, "grad_norm": 10.10229206085205, "learning_rate": 8.8194e-06, "loss": 1.4514, "step": 5910 }, { "epoch": 0.592, "grad_norm": 11.186251640319824, "learning_rate": 8.8174e-06, "loss": 1.454, "step": 5920 }, { "epoch": 0.593, "grad_norm": 16.076051712036133, "learning_rate": 8.8154e-06, "loss": 1.4822, "step": 5930 }, { "epoch": 0.594, "grad_norm": 8.257436752319336, "learning_rate": 8.813400000000001e-06, "loss": 1.3401, "step": 5940 }, { "epoch": 0.595, "grad_norm": 14.286370277404785, "learning_rate": 8.811400000000002e-06, "loss": 1.4115, "step": 5950 }, { "epoch": 0.596, "grad_norm": 9.498801231384277, "learning_rate": 8.8094e-06, "loss": 1.4851, "step": 5960 }, { "epoch": 0.597, "grad_norm": 8.820110321044922, "learning_rate": 8.8074e-06, "loss": 1.2037, "step": 5970 }, { "epoch": 0.598, "grad_norm": 10.993141174316406, "learning_rate": 8.8054e-06, "loss": 1.4469, "step": 5980 }, { "epoch": 0.599, "grad_norm": 12.472010612487793, "learning_rate": 8.803400000000001e-06, "loss": 1.4796, "step": 5990 }, { "epoch": 0.6, "grad_norm": 6.990931987762451, "learning_rate": 8.801400000000002e-06, "loss": 1.3748, "step": 6000 }, { "epoch": 0.601, "grad_norm": 13.181191444396973, "learning_rate": 8.7996e-06, "loss": 1.3756, "step": 6010 }, { "epoch": 0.602, "grad_norm": 11.772881507873535, "learning_rate": 8.7976e-06, "loss": 1.3104, "step": 6020 }, { "epoch": 0.603, "grad_norm": 10.84348201751709, "learning_rate": 8.795600000000001e-06, "loss": 1.4685, "step": 6030 }, { "epoch": 0.604, "grad_norm": 7.282617568969727, "learning_rate": 8.793600000000002e-06, "loss": 1.05, "step": 6040 }, { "epoch": 0.605, "grad_norm": 16.91712188720703, "learning_rate": 8.7916e-06, "loss": 1.387, "step": 6050 }, { "epoch": 0.606, "grad_norm": 13.488628387451172, "learning_rate": 8.7896e-06, "loss": 1.1856, "step": 6060 }, { "epoch": 0.607, "grad_norm": 10.451751708984375, "learning_rate": 8.7876e-06, "loss": 1.4238, "step": 6070 }, { "epoch": 0.608, "grad_norm": 10.60791301727295, "learning_rate": 8.785600000000001e-06, "loss": 1.6501, "step": 6080 }, { "epoch": 0.609, "grad_norm": 7.717032432556152, "learning_rate": 8.783600000000002e-06, "loss": 1.4032, "step": 6090 }, { "epoch": 0.61, "grad_norm": 8.718606948852539, "learning_rate": 8.7816e-06, "loss": 1.4299, "step": 6100 }, { "epoch": 0.611, "grad_norm": 10.26463508605957, "learning_rate": 8.7796e-06, "loss": 1.2697, "step": 6110 }, { "epoch": 0.612, "grad_norm": 9.358569145202637, "learning_rate": 8.7776e-06, "loss": 1.3661, "step": 6120 }, { "epoch": 0.613, "grad_norm": 15.170132637023926, "learning_rate": 8.775600000000001e-06, "loss": 1.2181, "step": 6130 }, { "epoch": 0.614, "grad_norm": 13.230052947998047, "learning_rate": 8.7736e-06, "loss": 1.3931, "step": 6140 }, { "epoch": 0.615, "grad_norm": 13.141670227050781, "learning_rate": 8.771600000000001e-06, "loss": 1.3615, "step": 6150 }, { "epoch": 0.616, "grad_norm": 8.110639572143555, "learning_rate": 8.7696e-06, "loss": 1.3219, "step": 6160 }, { "epoch": 0.617, "grad_norm": 8.820226669311523, "learning_rate": 8.7676e-06, "loss": 1.4095, "step": 6170 }, { "epoch": 0.618, "grad_norm": 9.184921264648438, "learning_rate": 8.765600000000001e-06, "loss": 1.3429, "step": 6180 }, { "epoch": 0.619, "grad_norm": 16.057344436645508, "learning_rate": 8.7636e-06, "loss": 1.1915, "step": 6190 }, { "epoch": 0.62, "grad_norm": 7.217300891876221, "learning_rate": 8.761600000000001e-06, "loss": 1.4384, "step": 6200 }, { "epoch": 0.621, "grad_norm": 8.664640426635742, "learning_rate": 8.7596e-06, "loss": 1.305, "step": 6210 }, { "epoch": 0.622, "grad_norm": 12.4964599609375, "learning_rate": 8.7576e-06, "loss": 1.3465, "step": 6220 }, { "epoch": 0.623, "grad_norm": 12.042380332946777, "learning_rate": 8.755600000000001e-06, "loss": 1.3749, "step": 6230 }, { "epoch": 0.624, "grad_norm": 10.018320083618164, "learning_rate": 8.7536e-06, "loss": 1.3895, "step": 6240 }, { "epoch": 0.625, "grad_norm": 5.1300764083862305, "learning_rate": 8.751600000000001e-06, "loss": 1.2088, "step": 6250 }, { "epoch": 0.626, "grad_norm": 9.236071586608887, "learning_rate": 8.7496e-06, "loss": 1.3159, "step": 6260 }, { "epoch": 0.627, "grad_norm": 5.720637798309326, "learning_rate": 8.7476e-06, "loss": 1.124, "step": 6270 }, { "epoch": 0.628, "grad_norm": 7.678037643432617, "learning_rate": 8.745600000000001e-06, "loss": 1.2812, "step": 6280 }, { "epoch": 0.629, "grad_norm": 8.793689727783203, "learning_rate": 8.7436e-06, "loss": 1.389, "step": 6290 }, { "epoch": 0.63, "grad_norm": 6.279944896697998, "learning_rate": 8.741600000000001e-06, "loss": 0.9006, "step": 6300 }, { "epoch": 0.631, "grad_norm": 17.25923728942871, "learning_rate": 8.7396e-06, "loss": 1.5018, "step": 6310 }, { "epoch": 0.632, "grad_norm": 7.323878288269043, "learning_rate": 8.7376e-06, "loss": 1.033, "step": 6320 }, { "epoch": 0.633, "grad_norm": 7.894130706787109, "learning_rate": 8.735600000000002e-06, "loss": 1.17, "step": 6330 }, { "epoch": 0.634, "grad_norm": 22.005563735961914, "learning_rate": 8.7336e-06, "loss": 1.0509, "step": 6340 }, { "epoch": 0.635, "grad_norm": 5.812353610992432, "learning_rate": 8.731600000000001e-06, "loss": 1.3178, "step": 6350 }, { "epoch": 0.636, "grad_norm": 14.244625091552734, "learning_rate": 8.7296e-06, "loss": 1.4223, "step": 6360 }, { "epoch": 0.637, "grad_norm": 8.534819602966309, "learning_rate": 8.727600000000001e-06, "loss": 1.34, "step": 6370 }, { "epoch": 0.638, "grad_norm": 8.201289176940918, "learning_rate": 8.7256e-06, "loss": 1.3692, "step": 6380 }, { "epoch": 0.639, "grad_norm": 6.031764030456543, "learning_rate": 8.7236e-06, "loss": 1.1451, "step": 6390 }, { "epoch": 0.64, "grad_norm": 8.062491416931152, "learning_rate": 8.721600000000001e-06, "loss": 1.231, "step": 6400 }, { "epoch": 0.641, "grad_norm": 8.3547945022583, "learning_rate": 8.7196e-06, "loss": 1.3295, "step": 6410 }, { "epoch": 0.642, "grad_norm": 6.641840934753418, "learning_rate": 8.717600000000001e-06, "loss": 1.3604, "step": 6420 }, { "epoch": 0.643, "grad_norm": 8.045875549316406, "learning_rate": 8.7156e-06, "loss": 1.3766, "step": 6430 }, { "epoch": 0.644, "grad_norm": 7.5357866287231445, "learning_rate": 8.7136e-06, "loss": 1.2875, "step": 6440 }, { "epoch": 0.645, "grad_norm": 8.163207054138184, "learning_rate": 8.711600000000001e-06, "loss": 1.3384, "step": 6450 }, { "epoch": 0.646, "grad_norm": 15.271834373474121, "learning_rate": 8.7096e-06, "loss": 1.2697, "step": 6460 }, { "epoch": 0.647, "grad_norm": 12.160016059875488, "learning_rate": 8.707600000000001e-06, "loss": 1.3496, "step": 6470 }, { "epoch": 0.648, "grad_norm": 6.739315986633301, "learning_rate": 8.7056e-06, "loss": 1.1825, "step": 6480 }, { "epoch": 0.649, "grad_norm": 10.1211576461792, "learning_rate": 8.7036e-06, "loss": 1.1073, "step": 6490 }, { "epoch": 0.65, "grad_norm": 14.367308616638184, "learning_rate": 8.701600000000002e-06, "loss": 1.2348, "step": 6500 }, { "epoch": 0.651, "grad_norm": 10.315905570983887, "learning_rate": 8.6996e-06, "loss": 1.3319, "step": 6510 }, { "epoch": 0.652, "grad_norm": 25.72903060913086, "learning_rate": 8.697600000000001e-06, "loss": 1.5011, "step": 6520 }, { "epoch": 0.653, "grad_norm": 8.948445320129395, "learning_rate": 8.6956e-06, "loss": 1.4154, "step": 6530 }, { "epoch": 0.654, "grad_norm": 8.107232093811035, "learning_rate": 8.693600000000001e-06, "loss": 1.0821, "step": 6540 }, { "epoch": 0.655, "grad_norm": 8.597017288208008, "learning_rate": 8.691600000000002e-06, "loss": 1.1132, "step": 6550 }, { "epoch": 0.656, "grad_norm": 25.52571678161621, "learning_rate": 8.6896e-06, "loss": 1.57, "step": 6560 }, { "epoch": 0.657, "grad_norm": 7.1117377281188965, "learning_rate": 8.687600000000001e-06, "loss": 1.5039, "step": 6570 }, { "epoch": 0.658, "grad_norm": 11.255306243896484, "learning_rate": 8.6856e-06, "loss": 1.1606, "step": 6580 }, { "epoch": 0.659, "grad_norm": 14.473061561584473, "learning_rate": 8.683600000000001e-06, "loss": 1.2538, "step": 6590 }, { "epoch": 0.66, "grad_norm": 14.601563453674316, "learning_rate": 8.681600000000002e-06, "loss": 1.3407, "step": 6600 }, { "epoch": 0.661, "grad_norm": 9.168686866760254, "learning_rate": 8.6796e-06, "loss": 1.194, "step": 6610 }, { "epoch": 0.662, "grad_norm": 11.533926963806152, "learning_rate": 8.6776e-06, "loss": 1.3223, "step": 6620 }, { "epoch": 0.663, "grad_norm": 12.94609546661377, "learning_rate": 8.6756e-06, "loss": 1.1959, "step": 6630 }, { "epoch": 0.664, "grad_norm": 14.221107482910156, "learning_rate": 8.673600000000001e-06, "loss": 1.4096, "step": 6640 }, { "epoch": 0.665, "grad_norm": 7.058326244354248, "learning_rate": 8.671600000000002e-06, "loss": 1.2665, "step": 6650 }, { "epoch": 0.666, "grad_norm": 7.107744216918945, "learning_rate": 8.6696e-06, "loss": 1.4503, "step": 6660 }, { "epoch": 0.667, "grad_norm": 5.244264602661133, "learning_rate": 8.6676e-06, "loss": 1.4134, "step": 6670 }, { "epoch": 0.668, "grad_norm": 8.123653411865234, "learning_rate": 8.6656e-06, "loss": 1.1377, "step": 6680 }, { "epoch": 0.669, "grad_norm": 6.894880771636963, "learning_rate": 8.663600000000001e-06, "loss": 1.5088, "step": 6690 }, { "epoch": 0.67, "grad_norm": 15.106626510620117, "learning_rate": 8.661600000000002e-06, "loss": 1.4093, "step": 6700 }, { "epoch": 0.671, "grad_norm": 13.133957862854004, "learning_rate": 8.659600000000001e-06, "loss": 1.372, "step": 6710 }, { "epoch": 0.672, "grad_norm": 5.771068096160889, "learning_rate": 8.6576e-06, "loss": 1.3439, "step": 6720 }, { "epoch": 0.673, "grad_norm": 6.851573467254639, "learning_rate": 8.6556e-06, "loss": 1.4333, "step": 6730 }, { "epoch": 0.674, "grad_norm": 10.56118392944336, "learning_rate": 8.653600000000001e-06, "loss": 1.3246, "step": 6740 }, { "epoch": 0.675, "grad_norm": 9.875953674316406, "learning_rate": 8.6516e-06, "loss": 1.4723, "step": 6750 }, { "epoch": 0.676, "grad_norm": 7.086522579193115, "learning_rate": 8.649600000000001e-06, "loss": 1.4725, "step": 6760 }, { "epoch": 0.677, "grad_norm": 7.7899603843688965, "learning_rate": 8.6476e-06, "loss": 1.1571, "step": 6770 }, { "epoch": 0.678, "grad_norm": 6.270768165588379, "learning_rate": 8.6456e-06, "loss": 1.1176, "step": 6780 }, { "epoch": 0.679, "grad_norm": 10.535731315612793, "learning_rate": 8.643600000000001e-06, "loss": 1.2883, "step": 6790 }, { "epoch": 0.68, "grad_norm": 7.9164299964904785, "learning_rate": 8.6416e-06, "loss": 1.1391, "step": 6800 }, { "epoch": 0.681, "grad_norm": 9.529337882995605, "learning_rate": 8.6396e-06, "loss": 1.5328, "step": 6810 }, { "epoch": 0.682, "grad_norm": 6.789098262786865, "learning_rate": 8.6376e-06, "loss": 1.5125, "step": 6820 }, { "epoch": 0.683, "grad_norm": 8.427510261535645, "learning_rate": 8.6356e-06, "loss": 1.0715, "step": 6830 }, { "epoch": 0.684, "grad_norm": 7.790762424468994, "learning_rate": 8.633600000000001e-06, "loss": 1.2374, "step": 6840 }, { "epoch": 0.685, "grad_norm": 16.94967269897461, "learning_rate": 8.6316e-06, "loss": 1.1546, "step": 6850 }, { "epoch": 0.686, "grad_norm": 9.769964218139648, "learning_rate": 8.6296e-06, "loss": 1.2163, "step": 6860 }, { "epoch": 0.687, "grad_norm": 12.126694679260254, "learning_rate": 8.6276e-06, "loss": 1.1037, "step": 6870 }, { "epoch": 0.688, "grad_norm": 6.1467976570129395, "learning_rate": 8.625600000000001e-06, "loss": 1.1472, "step": 6880 }, { "epoch": 0.689, "grad_norm": 17.157371520996094, "learning_rate": 8.623600000000002e-06, "loss": 1.3738, "step": 6890 }, { "epoch": 0.69, "grad_norm": 9.996745109558105, "learning_rate": 8.6216e-06, "loss": 1.3618, "step": 6900 }, { "epoch": 0.691, "grad_norm": 9.224713325500488, "learning_rate": 8.6196e-06, "loss": 1.0697, "step": 6910 }, { "epoch": 0.692, "grad_norm": 16.572965621948242, "learning_rate": 8.6176e-06, "loss": 1.0966, "step": 6920 }, { "epoch": 0.693, "grad_norm": 20.881969451904297, "learning_rate": 8.615600000000001e-06, "loss": 1.8904, "step": 6930 }, { "epoch": 0.694, "grad_norm": 20.136632919311523, "learning_rate": 8.613600000000002e-06, "loss": 1.1902, "step": 6940 }, { "epoch": 0.695, "grad_norm": 12.941941261291504, "learning_rate": 8.6116e-06, "loss": 1.2805, "step": 6950 }, { "epoch": 0.696, "grad_norm": 13.695612907409668, "learning_rate": 8.6096e-06, "loss": 1.5408, "step": 6960 }, { "epoch": 0.697, "grad_norm": 18.51884651184082, "learning_rate": 8.6076e-06, "loss": 1.092, "step": 6970 }, { "epoch": 0.698, "grad_norm": 7.573236465454102, "learning_rate": 8.605600000000001e-06, "loss": 1.5104, "step": 6980 }, { "epoch": 0.699, "grad_norm": 13.744678497314453, "learning_rate": 8.6036e-06, "loss": 1.5015, "step": 6990 }, { "epoch": 0.7, "grad_norm": 7.250736713409424, "learning_rate": 8.6016e-06, "loss": 1.2589, "step": 7000 }, { "epoch": 0.701, "grad_norm": 6.799300670623779, "learning_rate": 8.5996e-06, "loss": 1.2387, "step": 7010 }, { "epoch": 0.702, "grad_norm": 11.354289054870605, "learning_rate": 8.5976e-06, "loss": 1.3093, "step": 7020 }, { "epoch": 0.703, "grad_norm": 10.395122528076172, "learning_rate": 8.595600000000001e-06, "loss": 1.0857, "step": 7030 }, { "epoch": 0.704, "grad_norm": 19.742162704467773, "learning_rate": 8.5936e-06, "loss": 1.2246, "step": 7040 }, { "epoch": 0.705, "grad_norm": 10.69759464263916, "learning_rate": 8.591600000000001e-06, "loss": 1.5878, "step": 7050 }, { "epoch": 0.706, "grad_norm": 10.897568702697754, "learning_rate": 8.5896e-06, "loss": 1.4875, "step": 7060 }, { "epoch": 0.707, "grad_norm": 11.466116905212402, "learning_rate": 8.5876e-06, "loss": 1.4667, "step": 7070 }, { "epoch": 0.708, "grad_norm": 8.831307411193848, "learning_rate": 8.585600000000001e-06, "loss": 1.2384, "step": 7080 }, { "epoch": 0.709, "grad_norm": 11.752703666687012, "learning_rate": 8.5836e-06, "loss": 1.2866, "step": 7090 }, { "epoch": 0.71, "grad_norm": 11.646993637084961, "learning_rate": 8.581600000000001e-06, "loss": 1.2842, "step": 7100 }, { "epoch": 0.711, "grad_norm": 7.932891845703125, "learning_rate": 8.5796e-06, "loss": 1.3968, "step": 7110 }, { "epoch": 0.712, "grad_norm": 9.761377334594727, "learning_rate": 8.5776e-06, "loss": 1.324, "step": 7120 }, { "epoch": 0.713, "grad_norm": 10.336273193359375, "learning_rate": 8.575600000000001e-06, "loss": 1.2892, "step": 7130 }, { "epoch": 0.714, "grad_norm": 10.62780475616455, "learning_rate": 8.5736e-06, "loss": 1.2138, "step": 7140 }, { "epoch": 0.715, "grad_norm": 8.426196098327637, "learning_rate": 8.571600000000001e-06, "loss": 1.326, "step": 7150 }, { "epoch": 0.716, "grad_norm": 9.330110549926758, "learning_rate": 8.569600000000002e-06, "loss": 1.1931, "step": 7160 }, { "epoch": 0.717, "grad_norm": 8.45018482208252, "learning_rate": 8.5676e-06, "loss": 1.2988, "step": 7170 }, { "epoch": 0.718, "grad_norm": 13.61511516571045, "learning_rate": 8.5656e-06, "loss": 1.4823, "step": 7180 }, { "epoch": 0.719, "grad_norm": 7.438213348388672, "learning_rate": 8.5636e-06, "loss": 1.066, "step": 7190 }, { "epoch": 0.72, "grad_norm": 8.488134384155273, "learning_rate": 8.561600000000001e-06, "loss": 1.5699, "step": 7200 }, { "epoch": 0.721, "grad_norm": 7.344362258911133, "learning_rate": 8.559600000000002e-06, "loss": 1.4329, "step": 7210 }, { "epoch": 0.722, "grad_norm": 7.786438941955566, "learning_rate": 8.557600000000001e-06, "loss": 1.1816, "step": 7220 }, { "epoch": 0.723, "grad_norm": 11.758119583129883, "learning_rate": 8.5556e-06, "loss": 1.3462, "step": 7230 }, { "epoch": 0.724, "grad_norm": 7.506147384643555, "learning_rate": 8.5536e-06, "loss": 1.1712, "step": 7240 }, { "epoch": 0.725, "grad_norm": 8.71332836151123, "learning_rate": 8.551600000000001e-06, "loss": 1.4559, "step": 7250 }, { "epoch": 0.726, "grad_norm": 11.137746810913086, "learning_rate": 8.549600000000002e-06, "loss": 1.5458, "step": 7260 }, { "epoch": 0.727, "grad_norm": 9.963936805725098, "learning_rate": 8.547600000000001e-06, "loss": 1.2861, "step": 7270 }, { "epoch": 0.728, "grad_norm": 6.192053318023682, "learning_rate": 8.5456e-06, "loss": 1.3101, "step": 7280 }, { "epoch": 0.729, "grad_norm": 14.203436851501465, "learning_rate": 8.5436e-06, "loss": 1.4761, "step": 7290 }, { "epoch": 0.73, "grad_norm": 7.16986608505249, "learning_rate": 8.541600000000001e-06, "loss": 1.3407, "step": 7300 }, { "epoch": 0.731, "grad_norm": 7.007495880126953, "learning_rate": 8.539600000000002e-06, "loss": 1.3321, "step": 7310 }, { "epoch": 0.732, "grad_norm": 6.572269916534424, "learning_rate": 8.537600000000001e-06, "loss": 1.0081, "step": 7320 }, { "epoch": 0.733, "grad_norm": 8.959054946899414, "learning_rate": 8.5356e-06, "loss": 1.3594, "step": 7330 }, { "epoch": 0.734, "grad_norm": 7.302248001098633, "learning_rate": 8.5336e-06, "loss": 1.3677, "step": 7340 }, { "epoch": 0.735, "grad_norm": 6.668020725250244, "learning_rate": 8.531600000000001e-06, "loss": 1.247, "step": 7350 }, { "epoch": 0.736, "grad_norm": 7.541649341583252, "learning_rate": 8.5296e-06, "loss": 1.5682, "step": 7360 }, { "epoch": 0.737, "grad_norm": 8.16542911529541, "learning_rate": 8.527600000000001e-06, "loss": 1.0398, "step": 7370 }, { "epoch": 0.738, "grad_norm": 7.299499034881592, "learning_rate": 8.5256e-06, "loss": 1.3329, "step": 7380 }, { "epoch": 0.739, "grad_norm": 11.323725700378418, "learning_rate": 8.523600000000001e-06, "loss": 1.3726, "step": 7390 }, { "epoch": 0.74, "grad_norm": 8.565047264099121, "learning_rate": 8.521600000000002e-06, "loss": 1.07, "step": 7400 }, { "epoch": 0.741, "grad_norm": 13.438093185424805, "learning_rate": 8.5196e-06, "loss": 1.0761, "step": 7410 }, { "epoch": 0.742, "grad_norm": 30.01604652404785, "learning_rate": 8.5176e-06, "loss": 1.533, "step": 7420 }, { "epoch": 0.743, "grad_norm": 16.543590545654297, "learning_rate": 8.5156e-06, "loss": 1.5848, "step": 7430 }, { "epoch": 0.744, "grad_norm": 13.049363136291504, "learning_rate": 8.513600000000001e-06, "loss": 1.6805, "step": 7440 }, { "epoch": 0.745, "grad_norm": 6.363814353942871, "learning_rate": 8.511600000000002e-06, "loss": 1.2924, "step": 7450 }, { "epoch": 0.746, "grad_norm": 12.835099220275879, "learning_rate": 8.5096e-06, "loss": 1.3819, "step": 7460 }, { "epoch": 0.747, "grad_norm": 16.182188034057617, "learning_rate": 8.5076e-06, "loss": 1.3377, "step": 7470 }, { "epoch": 0.748, "grad_norm": 11.965877532958984, "learning_rate": 8.5056e-06, "loss": 1.3868, "step": 7480 }, { "epoch": 0.749, "grad_norm": 9.030449867248535, "learning_rate": 8.503600000000001e-06, "loss": 1.2009, "step": 7490 }, { "epoch": 0.75, "grad_norm": 6.703801155090332, "learning_rate": 8.501600000000002e-06, "loss": 1.1556, "step": 7500 }, { "epoch": 0.751, "grad_norm": 30.354846954345703, "learning_rate": 8.4996e-06, "loss": 1.2578, "step": 7510 }, { "epoch": 0.752, "grad_norm": 12.863663673400879, "learning_rate": 8.4976e-06, "loss": 1.3195, "step": 7520 }, { "epoch": 0.753, "grad_norm": 9.004260063171387, "learning_rate": 8.4956e-06, "loss": 1.271, "step": 7530 }, { "epoch": 0.754, "grad_norm": 8.384016036987305, "learning_rate": 8.493600000000001e-06, "loss": 1.2263, "step": 7540 }, { "epoch": 0.755, "grad_norm": 11.641860008239746, "learning_rate": 8.4916e-06, "loss": 1.2548, "step": 7550 }, { "epoch": 0.756, "grad_norm": 14.890166282653809, "learning_rate": 8.4896e-06, "loss": 1.3677, "step": 7560 }, { "epoch": 0.757, "grad_norm": 8.099945068359375, "learning_rate": 8.4876e-06, "loss": 1.4503, "step": 7570 }, { "epoch": 0.758, "grad_norm": 10.030981063842773, "learning_rate": 8.4856e-06, "loss": 1.2514, "step": 7580 }, { "epoch": 0.759, "grad_norm": 8.353160858154297, "learning_rate": 8.483600000000001e-06, "loss": 1.3892, "step": 7590 }, { "epoch": 0.76, "grad_norm": 6.855755805969238, "learning_rate": 8.4816e-06, "loss": 1.2428, "step": 7600 }, { "epoch": 0.761, "grad_norm": 9.648360252380371, "learning_rate": 8.479600000000001e-06, "loss": 1.2645, "step": 7610 }, { "epoch": 0.762, "grad_norm": 8.851248741149902, "learning_rate": 8.4776e-06, "loss": 1.3178, "step": 7620 }, { "epoch": 0.763, "grad_norm": 8.711851119995117, "learning_rate": 8.4756e-06, "loss": 1.2406, "step": 7630 }, { "epoch": 0.764, "grad_norm": 15.47616195678711, "learning_rate": 8.473600000000001e-06, "loss": 1.4365, "step": 7640 }, { "epoch": 0.765, "grad_norm": 10.66360092163086, "learning_rate": 8.4716e-06, "loss": 1.1765, "step": 7650 }, { "epoch": 0.766, "grad_norm": 7.519439220428467, "learning_rate": 8.469600000000001e-06, "loss": 1.183, "step": 7660 }, { "epoch": 0.767, "grad_norm": 6.129993438720703, "learning_rate": 8.4676e-06, "loss": 1.3828, "step": 7670 }, { "epoch": 0.768, "grad_norm": 7.567848205566406, "learning_rate": 8.4656e-06, "loss": 1.2672, "step": 7680 }, { "epoch": 0.769, "grad_norm": 8.9171724319458, "learning_rate": 8.463600000000001e-06, "loss": 1.1809, "step": 7690 }, { "epoch": 0.77, "grad_norm": 12.822111129760742, "learning_rate": 8.4616e-06, "loss": 1.5742, "step": 7700 }, { "epoch": 0.771, "grad_norm": 7.132790565490723, "learning_rate": 8.459600000000001e-06, "loss": 1.5134, "step": 7710 }, { "epoch": 0.772, "grad_norm": 8.924680709838867, "learning_rate": 8.4576e-06, "loss": 1.2935, "step": 7720 }, { "epoch": 0.773, "grad_norm": 7.6789984703063965, "learning_rate": 8.4556e-06, "loss": 1.2992, "step": 7730 }, { "epoch": 0.774, "grad_norm": 6.9659647941589355, "learning_rate": 8.453600000000002e-06, "loss": 1.3561, "step": 7740 }, { "epoch": 0.775, "grad_norm": 9.076621055603027, "learning_rate": 8.4516e-06, "loss": 1.1812, "step": 7750 }, { "epoch": 0.776, "grad_norm": 7.050920009613037, "learning_rate": 8.449600000000001e-06, "loss": 1.485, "step": 7760 }, { "epoch": 0.777, "grad_norm": 5.524001121520996, "learning_rate": 8.4476e-06, "loss": 1.3482, "step": 7770 }, { "epoch": 0.778, "grad_norm": 9.58509635925293, "learning_rate": 8.445600000000001e-06, "loss": 1.3402, "step": 7780 }, { "epoch": 0.779, "grad_norm": 14.442522048950195, "learning_rate": 8.4436e-06, "loss": 1.3168, "step": 7790 }, { "epoch": 0.78, "grad_norm": 6.068690776824951, "learning_rate": 8.4416e-06, "loss": 1.3553, "step": 7800 }, { "epoch": 0.781, "grad_norm": 7.8703460693359375, "learning_rate": 8.439600000000001e-06, "loss": 1.1133, "step": 7810 }, { "epoch": 0.782, "grad_norm": 17.56264877319336, "learning_rate": 8.4376e-06, "loss": 1.3037, "step": 7820 }, { "epoch": 0.783, "grad_norm": 32.318336486816406, "learning_rate": 8.435600000000001e-06, "loss": 1.4643, "step": 7830 }, { "epoch": 0.784, "grad_norm": 9.71606159210205, "learning_rate": 8.4336e-06, "loss": 1.2365, "step": 7840 }, { "epoch": 0.785, "grad_norm": 5.395797252655029, "learning_rate": 8.4316e-06, "loss": 1.5656, "step": 7850 }, { "epoch": 0.786, "grad_norm": 21.213876724243164, "learning_rate": 8.429600000000001e-06, "loss": 1.4199, "step": 7860 }, { "epoch": 0.787, "grad_norm": 14.53171157836914, "learning_rate": 8.4276e-06, "loss": 1.2943, "step": 7870 }, { "epoch": 0.788, "grad_norm": 7.524209976196289, "learning_rate": 8.425600000000001e-06, "loss": 1.5904, "step": 7880 }, { "epoch": 0.789, "grad_norm": 14.162694931030273, "learning_rate": 8.4236e-06, "loss": 1.1529, "step": 7890 }, { "epoch": 0.79, "grad_norm": 26.22031021118164, "learning_rate": 8.4216e-06, "loss": 1.0972, "step": 7900 }, { "epoch": 0.791, "grad_norm": 8.859457015991211, "learning_rate": 8.419600000000002e-06, "loss": 1.0498, "step": 7910 }, { "epoch": 0.792, "grad_norm": 9.807426452636719, "learning_rate": 8.4176e-06, "loss": 1.5987, "step": 7920 }, { "epoch": 0.793, "grad_norm": Infinity, "learning_rate": 8.4158e-06, "loss": 1.3527, "step": 7930 }, { "epoch": 0.794, "grad_norm": 10.584507942199707, "learning_rate": 8.4138e-06, "loss": 1.1737, "step": 7940 }, { "epoch": 0.795, "grad_norm": 9.154919624328613, "learning_rate": 8.411800000000001e-06, "loss": 1.4527, "step": 7950 }, { "epoch": 0.796, "grad_norm": 9.639301300048828, "learning_rate": 8.4098e-06, "loss": 1.0941, "step": 7960 }, { "epoch": 0.797, "grad_norm": 8.37706470489502, "learning_rate": 8.407800000000001e-06, "loss": 1.4764, "step": 7970 }, { "epoch": 0.798, "grad_norm": 10.064932823181152, "learning_rate": 8.4058e-06, "loss": 1.2635, "step": 7980 }, { "epoch": 0.799, "grad_norm": 8.123895645141602, "learning_rate": 8.4038e-06, "loss": 1.2729, "step": 7990 }, { "epoch": 0.8, "grad_norm": 8.623579025268555, "learning_rate": 8.401800000000001e-06, "loss": 1.3229, "step": 8000 }, { "epoch": 0.801, "grad_norm": 11.084688186645508, "learning_rate": 8.3998e-06, "loss": 1.5338, "step": 8010 }, { "epoch": 0.802, "grad_norm": 9.748945236206055, "learning_rate": 8.397800000000001e-06, "loss": 1.1826, "step": 8020 }, { "epoch": 0.803, "grad_norm": 9.606584548950195, "learning_rate": 8.3958e-06, "loss": 1.2449, "step": 8030 }, { "epoch": 0.804, "grad_norm": 13.231161117553711, "learning_rate": 8.393800000000001e-06, "loss": 1.4826, "step": 8040 }, { "epoch": 0.805, "grad_norm": 8.760950088500977, "learning_rate": 8.391800000000002e-06, "loss": 1.1832, "step": 8050 }, { "epoch": 0.806, "grad_norm": 9.873347282409668, "learning_rate": 8.3898e-06, "loss": 1.1998, "step": 8060 }, { "epoch": 0.807, "grad_norm": 9.804389953613281, "learning_rate": 8.3878e-06, "loss": 1.2058, "step": 8070 }, { "epoch": 0.808, "grad_norm": 10.413833618164062, "learning_rate": 8.3858e-06, "loss": 1.068, "step": 8080 }, { "epoch": 0.809, "grad_norm": 11.191216468811035, "learning_rate": 8.383800000000001e-06, "loss": 1.2013, "step": 8090 }, { "epoch": 0.81, "grad_norm": 8.188349723815918, "learning_rate": 8.381800000000002e-06, "loss": 1.4124, "step": 8100 }, { "epoch": 0.811, "grad_norm": 11.037068367004395, "learning_rate": 8.3798e-06, "loss": 1.3408, "step": 8110 }, { "epoch": 0.812, "grad_norm": 12.660216331481934, "learning_rate": 8.3778e-06, "loss": 1.1643, "step": 8120 }, { "epoch": 0.813, "grad_norm": 7.056172847747803, "learning_rate": 8.3758e-06, "loss": 1.4733, "step": 8130 }, { "epoch": 0.814, "grad_norm": 11.407397270202637, "learning_rate": 8.373800000000001e-06, "loss": 1.2624, "step": 8140 }, { "epoch": 0.815, "grad_norm": 12.42174243927002, "learning_rate": 8.371800000000002e-06, "loss": 1.4334, "step": 8150 }, { "epoch": 0.816, "grad_norm": 12.337141036987305, "learning_rate": 8.3698e-06, "loss": 1.3376, "step": 8160 }, { "epoch": 0.817, "grad_norm": 11.446084022521973, "learning_rate": 8.3678e-06, "loss": 1.3531, "step": 8170 }, { "epoch": 0.818, "grad_norm": 12.284687042236328, "learning_rate": 8.3658e-06, "loss": 1.4043, "step": 8180 }, { "epoch": 0.819, "grad_norm": 7.367145538330078, "learning_rate": 8.363800000000001e-06, "loss": 1.2635, "step": 8190 }, { "epoch": 0.82, "grad_norm": 8.88268756866455, "learning_rate": 8.361800000000002e-06, "loss": 1.3094, "step": 8200 }, { "epoch": 0.821, "grad_norm": 7.995091438293457, "learning_rate": 8.3598e-06, "loss": 1.3039, "step": 8210 }, { "epoch": 0.822, "grad_norm": 13.755719184875488, "learning_rate": 8.3578e-06, "loss": 1.2662, "step": 8220 }, { "epoch": 0.823, "grad_norm": 8.747024536132812, "learning_rate": 8.3558e-06, "loss": 1.369, "step": 8230 }, { "epoch": 0.824, "grad_norm": 10.373444557189941, "learning_rate": 8.353800000000001e-06, "loss": 1.1885, "step": 8240 }, { "epoch": 0.825, "grad_norm": 17.393314361572266, "learning_rate": 8.3518e-06, "loss": 1.3867, "step": 8250 }, { "epoch": 0.826, "grad_norm": 6.5052385330200195, "learning_rate": 8.349800000000001e-06, "loss": 1.3611, "step": 8260 }, { "epoch": 0.827, "grad_norm": 5.978771686553955, "learning_rate": 8.3478e-06, "loss": 1.375, "step": 8270 }, { "epoch": 0.828, "grad_norm": 6.609357833862305, "learning_rate": 8.3458e-06, "loss": 1.3527, "step": 8280 }, { "epoch": 0.829, "grad_norm": 6.416878700256348, "learning_rate": 8.343800000000001e-06, "loss": 1.4646, "step": 8290 }, { "epoch": 0.83, "grad_norm": 5.9553070068359375, "learning_rate": 8.3418e-06, "loss": 1.4419, "step": 8300 }, { "epoch": 0.831, "grad_norm": 6.06718111038208, "learning_rate": 8.339800000000001e-06, "loss": 1.2246, "step": 8310 }, { "epoch": 0.832, "grad_norm": 7.142729759216309, "learning_rate": 8.3378e-06, "loss": 1.1964, "step": 8320 }, { "epoch": 0.833, "grad_norm": 7.330401420593262, "learning_rate": 8.3358e-06, "loss": 1.4103, "step": 8330 }, { "epoch": 0.834, "grad_norm": 6.410057067871094, "learning_rate": 8.333800000000001e-06, "loss": 1.2153, "step": 8340 }, { "epoch": 0.835, "grad_norm": 9.032052040100098, "learning_rate": 8.3318e-06, "loss": 1.2476, "step": 8350 }, { "epoch": 0.836, "grad_norm": 9.584602355957031, "learning_rate": 8.329800000000001e-06, "loss": 1.563, "step": 8360 }, { "epoch": 0.837, "grad_norm": 7.753745079040527, "learning_rate": 8.3278e-06, "loss": 1.3091, "step": 8370 }, { "epoch": 0.838, "grad_norm": 7.525424480438232, "learning_rate": 8.3258e-06, "loss": 1.4617, "step": 8380 }, { "epoch": 0.839, "grad_norm": 8.326545715332031, "learning_rate": 8.324e-06, "loss": 1.3037, "step": 8390 }, { "epoch": 0.84, "grad_norm": 7.542481422424316, "learning_rate": 8.322000000000001e-06, "loss": 1.2197, "step": 8400 }, { "epoch": 0.841, "grad_norm": 6.847381114959717, "learning_rate": 8.32e-06, "loss": 1.2244, "step": 8410 }, { "epoch": 0.842, "grad_norm": 6.623319149017334, "learning_rate": 8.318e-06, "loss": 1.3025, "step": 8420 }, { "epoch": 0.843, "grad_norm": 7.8201985359191895, "learning_rate": 8.316000000000001e-06, "loss": 1.5489, "step": 8430 }, { "epoch": 0.844, "grad_norm": 14.384247779846191, "learning_rate": 8.314e-06, "loss": 1.3125, "step": 8440 }, { "epoch": 0.845, "grad_norm": 8.017972946166992, "learning_rate": 8.312000000000001e-06, "loss": 1.2404, "step": 8450 }, { "epoch": 0.846, "grad_norm": 7.326786994934082, "learning_rate": 8.31e-06, "loss": 1.3027, "step": 8460 }, { "epoch": 0.847, "grad_norm": 10.630331993103027, "learning_rate": 8.308e-06, "loss": 1.1309, "step": 8470 }, { "epoch": 0.848, "grad_norm": 8.170510292053223, "learning_rate": 8.306000000000001e-06, "loss": 1.2588, "step": 8480 }, { "epoch": 0.849, "grad_norm": 7.433817386627197, "learning_rate": 8.304e-06, "loss": 1.205, "step": 8490 }, { "epoch": 0.85, "grad_norm": 86.2530288696289, "learning_rate": 8.302000000000001e-06, "loss": 1.0026, "step": 8500 }, { "epoch": 0.851, "grad_norm": 13.199993133544922, "learning_rate": 8.3e-06, "loss": 1.3911, "step": 8510 }, { "epoch": 0.852, "grad_norm": 9.135298728942871, "learning_rate": 8.298000000000001e-06, "loss": 1.4678, "step": 8520 }, { "epoch": 0.853, "grad_norm": 11.74889850616455, "learning_rate": 8.296000000000002e-06, "loss": 1.4173, "step": 8530 }, { "epoch": 0.854, "grad_norm": 8.984197616577148, "learning_rate": 8.294e-06, "loss": 1.447, "step": 8540 }, { "epoch": 0.855, "grad_norm": 9.712289810180664, "learning_rate": 8.292000000000001e-06, "loss": 1.2709, "step": 8550 }, { "epoch": 0.856, "grad_norm": 10.421830177307129, "learning_rate": 8.29e-06, "loss": 1.2745, "step": 8560 }, { "epoch": 0.857, "grad_norm": 8.220575332641602, "learning_rate": 8.288000000000001e-06, "loss": 1.2318, "step": 8570 }, { "epoch": 0.858, "grad_norm": 7.070282459259033, "learning_rate": 8.286e-06, "loss": 1.5558, "step": 8580 }, { "epoch": 0.859, "grad_norm": 10.326601028442383, "learning_rate": 8.284e-06, "loss": 1.1433, "step": 8590 }, { "epoch": 0.86, "grad_norm": 10.045757293701172, "learning_rate": 8.282000000000001e-06, "loss": 1.5078, "step": 8600 }, { "epoch": 0.861, "grad_norm": 10.863116264343262, "learning_rate": 8.28e-06, "loss": 1.2519, "step": 8610 }, { "epoch": 0.862, "grad_norm": 9.606372833251953, "learning_rate": 8.278000000000001e-06, "loss": 1.5117, "step": 8620 }, { "epoch": 0.863, "grad_norm": 7.906520843505859, "learning_rate": 8.276e-06, "loss": 1.2494, "step": 8630 }, { "epoch": 0.864, "grad_norm": 6.342785835266113, "learning_rate": 8.274e-06, "loss": 1.1593, "step": 8640 }, { "epoch": 0.865, "grad_norm": 12.541701316833496, "learning_rate": 8.272000000000001e-06, "loss": 1.8405, "step": 8650 }, { "epoch": 0.866, "grad_norm": 8.582027435302734, "learning_rate": 8.27e-06, "loss": 1.1913, "step": 8660 }, { "epoch": 0.867, "grad_norm": 7.983564853668213, "learning_rate": 8.268000000000001e-06, "loss": 1.1821, "step": 8670 }, { "epoch": 0.868, "grad_norm": 9.006510734558105, "learning_rate": 8.266e-06, "loss": 1.1201, "step": 8680 }, { "epoch": 0.869, "grad_norm": 19.340961456298828, "learning_rate": 8.264e-06, "loss": 1.2173, "step": 8690 }, { "epoch": 0.87, "grad_norm": 12.617766380310059, "learning_rate": 8.262000000000002e-06, "loss": 1.6695, "step": 8700 }, { "epoch": 0.871, "grad_norm": 12.432829856872559, "learning_rate": 8.26e-06, "loss": 1.2302, "step": 8710 }, { "epoch": 0.872, "grad_norm": 8.307838439941406, "learning_rate": 8.258000000000001e-06, "loss": 1.2066, "step": 8720 }, { "epoch": 0.873, "grad_norm": 12.183505058288574, "learning_rate": 8.256e-06, "loss": 1.4248, "step": 8730 }, { "epoch": 0.874, "grad_norm": 7.865843772888184, "learning_rate": 8.254000000000001e-06, "loss": 1.3798, "step": 8740 }, { "epoch": 0.875, "grad_norm": 7.76606559753418, "learning_rate": 8.252000000000002e-06, "loss": 1.3944, "step": 8750 }, { "epoch": 0.876, "grad_norm": 7.239150047302246, "learning_rate": 8.25e-06, "loss": 1.201, "step": 8760 }, { "epoch": 0.877, "grad_norm": 6.1729936599731445, "learning_rate": 8.248e-06, "loss": 1.1715, "step": 8770 }, { "epoch": 0.878, "grad_norm": 8.52038860321045, "learning_rate": 8.246e-06, "loss": 1.2424, "step": 8780 }, { "epoch": 0.879, "grad_norm": 6.671008586883545, "learning_rate": 8.244000000000001e-06, "loss": 1.203, "step": 8790 }, { "epoch": 0.88, "grad_norm": 6.790961742401123, "learning_rate": 8.242000000000002e-06, "loss": 1.2305, "step": 8800 }, { "epoch": 0.881, "grad_norm": 11.63425064086914, "learning_rate": 8.24e-06, "loss": 1.1037, "step": 8810 }, { "epoch": 0.882, "grad_norm": 10.134452819824219, "learning_rate": 8.238e-06, "loss": 1.2812, "step": 8820 }, { "epoch": 0.883, "grad_norm": 15.643850326538086, "learning_rate": 8.236e-06, "loss": 1.4852, "step": 8830 }, { "epoch": 0.884, "grad_norm": 6.008415699005127, "learning_rate": 8.234000000000001e-06, "loss": 1.1147, "step": 8840 }, { "epoch": 0.885, "grad_norm": 9.277166366577148, "learning_rate": 8.232000000000002e-06, "loss": 1.1376, "step": 8850 }, { "epoch": 0.886, "grad_norm": 19.397674560546875, "learning_rate": 8.23e-06, "loss": 1.165, "step": 8860 }, { "epoch": 0.887, "grad_norm": 15.908233642578125, "learning_rate": 8.228e-06, "loss": 1.1535, "step": 8870 }, { "epoch": 0.888, "grad_norm": 14.281697273254395, "learning_rate": 8.226e-06, "loss": 1.7103, "step": 8880 }, { "epoch": 0.889, "grad_norm": 16.34212303161621, "learning_rate": 8.224000000000001e-06, "loss": 1.353, "step": 8890 }, { "epoch": 0.89, "grad_norm": 19.422325134277344, "learning_rate": 8.222000000000002e-06, "loss": 1.311, "step": 8900 }, { "epoch": 0.891, "grad_norm": 14.055275917053223, "learning_rate": 8.220000000000001e-06, "loss": 1.2886, "step": 8910 }, { "epoch": 0.892, "grad_norm": 9.511970520019531, "learning_rate": 8.218e-06, "loss": 0.9904, "step": 8920 }, { "epoch": 0.893, "grad_norm": 8.720645904541016, "learning_rate": 8.216e-06, "loss": 1.3097, "step": 8930 }, { "epoch": 0.894, "grad_norm": 8.380960464477539, "learning_rate": 8.214000000000001e-06, "loss": 1.1766, "step": 8940 }, { "epoch": 0.895, "grad_norm": 12.928722381591797, "learning_rate": 8.212e-06, "loss": 1.2654, "step": 8950 }, { "epoch": 0.896, "grad_norm": 12.528756141662598, "learning_rate": 8.210000000000001e-06, "loss": 1.3464, "step": 8960 }, { "epoch": 0.897, "grad_norm": 12.092126846313477, "learning_rate": 8.208e-06, "loss": 1.2696, "step": 8970 }, { "epoch": 0.898, "grad_norm": 9.894901275634766, "learning_rate": 8.206e-06, "loss": 1.6406, "step": 8980 }, { "epoch": 0.899, "grad_norm": 7.282639503479004, "learning_rate": 8.204000000000001e-06, "loss": 1.4836, "step": 8990 }, { "epoch": 0.9, "grad_norm": 10.198521614074707, "learning_rate": 8.202e-06, "loss": 1.3081, "step": 9000 }, { "epoch": 0.901, "grad_norm": 12.465457916259766, "learning_rate": 8.2e-06, "loss": 1.3733, "step": 9010 }, { "epoch": 0.902, "grad_norm": 9.660425186157227, "learning_rate": 8.198e-06, "loss": 1.1725, "step": 9020 }, { "epoch": 0.903, "grad_norm": 17.863597869873047, "learning_rate": 8.196e-06, "loss": 1.2955, "step": 9030 }, { "epoch": 0.904, "grad_norm": 8.901445388793945, "learning_rate": 8.194000000000002e-06, "loss": 1.2603, "step": 9040 }, { "epoch": 0.905, "grad_norm": 13.649604797363281, "learning_rate": 8.192e-06, "loss": 1.4597, "step": 9050 }, { "epoch": 0.906, "grad_norm": 6.941133499145508, "learning_rate": 8.19e-06, "loss": 1.1778, "step": 9060 }, { "epoch": 0.907, "grad_norm": 9.281495094299316, "learning_rate": 8.188e-06, "loss": 1.1426, "step": 9070 }, { "epoch": 0.908, "grad_norm": 9.242386817932129, "learning_rate": 8.186000000000001e-06, "loss": 1.2009, "step": 9080 }, { "epoch": 0.909, "grad_norm": 3.5044586658477783, "learning_rate": 8.184000000000002e-06, "loss": 1.2419, "step": 9090 }, { "epoch": 0.91, "grad_norm": 18.300643920898438, "learning_rate": 8.182e-06, "loss": 1.7836, "step": 9100 }, { "epoch": 0.911, "grad_norm": 4.608722686767578, "learning_rate": 8.18e-06, "loss": 1.1672, "step": 9110 }, { "epoch": 0.912, "grad_norm": 5.36377477645874, "learning_rate": 8.178e-06, "loss": 1.4461, "step": 9120 }, { "epoch": 0.913, "grad_norm": 7.647757530212402, "learning_rate": 8.176000000000001e-06, "loss": 1.4784, "step": 9130 }, { "epoch": 0.914, "grad_norm": 5.6226277351379395, "learning_rate": 8.174e-06, "loss": 1.392, "step": 9140 }, { "epoch": 0.915, "grad_norm": 5.684090614318848, "learning_rate": 8.172e-06, "loss": 1.2352, "step": 9150 }, { "epoch": 0.916, "grad_norm": 11.48893928527832, "learning_rate": 8.17e-06, "loss": 1.2246, "step": 9160 }, { "epoch": 0.917, "grad_norm": 7.330920219421387, "learning_rate": 8.168e-06, "loss": 1.1757, "step": 9170 }, { "epoch": 0.918, "grad_norm": 16.869800567626953, "learning_rate": 8.166000000000001e-06, "loss": 1.266, "step": 9180 }, { "epoch": 0.919, "grad_norm": 8.509018898010254, "learning_rate": 8.164e-06, "loss": 1.1967, "step": 9190 }, { "epoch": 0.92, "grad_norm": 8.543651580810547, "learning_rate": 8.162e-06, "loss": 1.1354, "step": 9200 }, { "epoch": 0.921, "grad_norm": 15.755046844482422, "learning_rate": 8.16e-06, "loss": 1.6088, "step": 9210 }, { "epoch": 0.922, "grad_norm": 15.97175121307373, "learning_rate": 8.158e-06, "loss": 1.4148, "step": 9220 }, { "epoch": 0.923, "grad_norm": 9.156184196472168, "learning_rate": 8.156000000000001e-06, "loss": 1.1948, "step": 9230 }, { "epoch": 0.924, "grad_norm": 7.709662914276123, "learning_rate": 8.154e-06, "loss": 1.2567, "step": 9240 }, { "epoch": 0.925, "grad_norm": 19.597766876220703, "learning_rate": 8.152000000000001e-06, "loss": 1.3205, "step": 9250 }, { "epoch": 0.926, "grad_norm": 16.312480926513672, "learning_rate": 8.15e-06, "loss": 1.4792, "step": 9260 }, { "epoch": 0.927, "grad_norm": 12.760614395141602, "learning_rate": 8.148e-06, "loss": 1.3687, "step": 9270 }, { "epoch": 0.928, "grad_norm": 14.185050964355469, "learning_rate": 8.146000000000001e-06, "loss": 1.4312, "step": 9280 }, { "epoch": 0.929, "grad_norm": 10.186132431030273, "learning_rate": 8.144e-06, "loss": 1.4786, "step": 9290 }, { "epoch": 0.93, "grad_norm": 6.729693412780762, "learning_rate": 8.142000000000001e-06, "loss": 1.157, "step": 9300 }, { "epoch": 0.931, "grad_norm": 8.60821533203125, "learning_rate": 8.14e-06, "loss": 1.3616, "step": 9310 }, { "epoch": 0.932, "grad_norm": 8.236334800720215, "learning_rate": 8.138e-06, "loss": 1.2969, "step": 9320 }, { "epoch": 0.933, "grad_norm": 11.71229076385498, "learning_rate": 8.136000000000001e-06, "loss": 1.2363, "step": 9330 }, { "epoch": 0.934, "grad_norm": 11.756903648376465, "learning_rate": 8.134e-06, "loss": 1.1534, "step": 9340 }, { "epoch": 0.935, "grad_norm": 9.359963417053223, "learning_rate": 8.132000000000001e-06, "loss": 1.269, "step": 9350 }, { "epoch": 0.936, "grad_norm": 7.125011444091797, "learning_rate": 8.13e-06, "loss": 1.3823, "step": 9360 }, { "epoch": 0.937, "grad_norm": 14.872244834899902, "learning_rate": 8.128e-06, "loss": 1.248, "step": 9370 }, { "epoch": 0.938, "grad_norm": 8.084760665893555, "learning_rate": 8.126e-06, "loss": 1.312, "step": 9380 }, { "epoch": 0.939, "grad_norm": 6.097630500793457, "learning_rate": 8.124e-06, "loss": 1.2199, "step": 9390 }, { "epoch": 0.94, "grad_norm": 8.986607551574707, "learning_rate": 8.122000000000001e-06, "loss": 1.4072, "step": 9400 }, { "epoch": 0.941, "grad_norm": 13.678489685058594, "learning_rate": 8.120000000000002e-06, "loss": 1.0304, "step": 9410 }, { "epoch": 0.942, "grad_norm": 10.905418395996094, "learning_rate": 8.118000000000001e-06, "loss": 1.3025, "step": 9420 }, { "epoch": 0.943, "grad_norm": 9.4024019241333, "learning_rate": 8.116e-06, "loss": 1.3334, "step": 9430 }, { "epoch": 0.944, "grad_norm": 13.690103530883789, "learning_rate": 8.114e-06, "loss": 1.4316, "step": 9440 }, { "epoch": 0.945, "grad_norm": 9.857147216796875, "learning_rate": 8.112000000000001e-06, "loss": 1.3847, "step": 9450 }, { "epoch": 0.946, "grad_norm": 9.887187004089355, "learning_rate": 8.110000000000002e-06, "loss": 1.1846, "step": 9460 }, { "epoch": 0.947, "grad_norm": 8.069936752319336, "learning_rate": 8.108000000000001e-06, "loss": 1.2329, "step": 9470 }, { "epoch": 0.948, "grad_norm": 9.554997444152832, "learning_rate": 8.106e-06, "loss": 1.4048, "step": 9480 }, { "epoch": 0.949, "grad_norm": 5.707096576690674, "learning_rate": 8.104e-06, "loss": 1.0651, "step": 9490 }, { "epoch": 0.95, "grad_norm": 9.275796890258789, "learning_rate": 8.102000000000001e-06, "loss": 1.3682, "step": 9500 }, { "epoch": 0.951, "grad_norm": 13.528054237365723, "learning_rate": 8.1e-06, "loss": 1.2079, "step": 9510 }, { "epoch": 0.952, "grad_norm": 7.879480361938477, "learning_rate": 8.098000000000001e-06, "loss": 1.3361, "step": 9520 }, { "epoch": 0.953, "grad_norm": 7.602863788604736, "learning_rate": 8.096e-06, "loss": 1.2125, "step": 9530 }, { "epoch": 0.954, "grad_norm": 7.64185094833374, "learning_rate": 8.094e-06, "loss": 1.1582, "step": 9540 }, { "epoch": 0.955, "grad_norm": 12.65957260131836, "learning_rate": 8.092000000000001e-06, "loss": 1.4462, "step": 9550 }, { "epoch": 0.956, "grad_norm": 12.840446472167969, "learning_rate": 8.09e-06, "loss": 1.2965, "step": 9560 }, { "epoch": 0.957, "grad_norm": 7.0388054847717285, "learning_rate": 8.088e-06, "loss": 1.04, "step": 9570 }, { "epoch": 0.958, "grad_norm": 9.40847110748291, "learning_rate": 8.086e-06, "loss": 1.5487, "step": 9580 }, { "epoch": 0.959, "grad_norm": 10.02834415435791, "learning_rate": 8.084000000000001e-06, "loss": 1.3613, "step": 9590 }, { "epoch": 0.96, "grad_norm": 8.903786659240723, "learning_rate": 8.082000000000002e-06, "loss": 1.2609, "step": 9600 }, { "epoch": 0.961, "grad_norm": 5.779774188995361, "learning_rate": 8.08e-06, "loss": 1.3121, "step": 9610 }, { "epoch": 0.962, "grad_norm": 8.681835174560547, "learning_rate": 8.078e-06, "loss": 1.3508, "step": 9620 }, { "epoch": 0.963, "grad_norm": 12.313486099243164, "learning_rate": 8.076e-06, "loss": 1.2865, "step": 9630 }, { "epoch": 0.964, "grad_norm": 8.353299140930176, "learning_rate": 8.074000000000001e-06, "loss": 1.3234, "step": 9640 }, { "epoch": 0.965, "grad_norm": 7.131963729858398, "learning_rate": 8.072000000000002e-06, "loss": 1.2909, "step": 9650 }, { "epoch": 0.966, "grad_norm": 6.076341152191162, "learning_rate": 8.07e-06, "loss": 1.3893, "step": 9660 }, { "epoch": 0.967, "grad_norm": 9.107584953308105, "learning_rate": 8.068e-06, "loss": 1.5348, "step": 9670 }, { "epoch": 0.968, "grad_norm": 5.60544490814209, "learning_rate": 8.066e-06, "loss": 1.3756, "step": 9680 }, { "epoch": 0.969, "grad_norm": 7.511078357696533, "learning_rate": 8.064000000000001e-06, "loss": 1.3456, "step": 9690 }, { "epoch": 0.97, "grad_norm": 7.475671291351318, "learning_rate": 8.062000000000002e-06, "loss": 1.2894, "step": 9700 }, { "epoch": 0.971, "grad_norm": 5.982749938964844, "learning_rate": 8.06e-06, "loss": 1.3245, "step": 9710 }, { "epoch": 0.972, "grad_norm": 9.417170524597168, "learning_rate": 8.058e-06, "loss": 1.3235, "step": 9720 }, { "epoch": 0.973, "grad_norm": 8.415316581726074, "learning_rate": 8.056e-06, "loss": 1.2492, "step": 9730 }, { "epoch": 0.974, "grad_norm": 10.323545455932617, "learning_rate": 8.054000000000001e-06, "loss": 1.2684, "step": 9740 }, { "epoch": 0.975, "grad_norm": 10.640973091125488, "learning_rate": 8.052e-06, "loss": 1.4315, "step": 9750 }, { "epoch": 0.976, "grad_norm": 4.427105903625488, "learning_rate": 8.050000000000001e-06, "loss": 1.0192, "step": 9760 }, { "epoch": 0.977, "grad_norm": 10.936813354492188, "learning_rate": 8.048e-06, "loss": 1.2552, "step": 9770 }, { "epoch": 0.978, "grad_norm": 5.7032575607299805, "learning_rate": 8.046e-06, "loss": 1.1136, "step": 9780 }, { "epoch": 0.979, "grad_norm": 20.400287628173828, "learning_rate": 8.044000000000001e-06, "loss": 1.2879, "step": 9790 }, { "epoch": 0.98, "grad_norm": 9.807183265686035, "learning_rate": 8.042e-06, "loss": 1.0901, "step": 9800 }, { "epoch": 0.981, "grad_norm": 16.558095932006836, "learning_rate": 8.040000000000001e-06, "loss": 1.1847, "step": 9810 }, { "epoch": 0.982, "grad_norm": 7.894741058349609, "learning_rate": 8.038e-06, "loss": 1.2297, "step": 9820 }, { "epoch": 0.983, "grad_norm": 8.963443756103516, "learning_rate": 8.036e-06, "loss": 1.2507, "step": 9830 }, { "epoch": 0.984, "grad_norm": 15.898109436035156, "learning_rate": 8.034000000000001e-06, "loss": 1.4566, "step": 9840 }, { "epoch": 0.985, "grad_norm": 16.59617805480957, "learning_rate": 8.032e-06, "loss": 1.0611, "step": 9850 }, { "epoch": 0.986, "grad_norm": 13.910177230834961, "learning_rate": 8.030000000000001e-06, "loss": 1.2965, "step": 9860 }, { "epoch": 0.987, "grad_norm": 23.675310134887695, "learning_rate": 8.028e-06, "loss": 1.3111, "step": 9870 }, { "epoch": 0.988, "grad_norm": 9.923601150512695, "learning_rate": 8.026e-06, "loss": 1.3284, "step": 9880 }, { "epoch": 0.989, "grad_norm": 7.386953830718994, "learning_rate": 8.024000000000001e-06, "loss": 1.3151, "step": 9890 }, { "epoch": 0.99, "grad_norm": 9.926563262939453, "learning_rate": 8.022e-06, "loss": 1.3945, "step": 9900 }, { "epoch": 0.991, "grad_norm": 7.803895950317383, "learning_rate": 8.020000000000001e-06, "loss": 1.012, "step": 9910 }, { "epoch": 0.992, "grad_norm": 13.10973072052002, "learning_rate": 8.018e-06, "loss": 1.1112, "step": 9920 }, { "epoch": 0.993, "grad_norm": 10.396162986755371, "learning_rate": 8.016e-06, "loss": 1.224, "step": 9930 }, { "epoch": 0.994, "grad_norm": 10.470932960510254, "learning_rate": 8.014e-06, "loss": 1.2805, "step": 9940 }, { "epoch": 0.995, "grad_norm": 21.379907608032227, "learning_rate": 8.012e-06, "loss": 1.3224, "step": 9950 }, { "epoch": 0.996, "grad_norm": 14.837398529052734, "learning_rate": 8.010000000000001e-06, "loss": 1.2821, "step": 9960 }, { "epoch": 0.997, "grad_norm": 18.319541931152344, "learning_rate": 8.008e-06, "loss": 1.3828, "step": 9970 }, { "epoch": 0.998, "grad_norm": 6.538596153259277, "learning_rate": 8.006000000000001e-06, "loss": 1.288, "step": 9980 }, { "epoch": 0.999, "grad_norm": 14.433701515197754, "learning_rate": 8.004e-06, "loss": 0.9839, "step": 9990 }, { "epoch": 1.0, "grad_norm": 11.867210388183594, "learning_rate": 8.002e-06, "loss": 1.5767, "step": 10000 }, { "epoch": 1.001, "grad_norm": 13.021316528320312, "learning_rate": 8.000000000000001e-06, "loss": 1.3745, "step": 10010 }, { "epoch": 1.002, "grad_norm": 13.234004020690918, "learning_rate": 7.998e-06, "loss": 1.4299, "step": 10020 }, { "epoch": 1.003, "grad_norm": 15.071832656860352, "learning_rate": 7.996000000000001e-06, "loss": 1.2829, "step": 10030 }, { "epoch": 1.004, "grad_norm": 8.7450590133667, "learning_rate": 7.994e-06, "loss": 1.21, "step": 10040 }, { "epoch": 1.005, "grad_norm": 17.60546875, "learning_rate": 7.992e-06, "loss": 1.5388, "step": 10050 }, { "epoch": 1.006, "grad_norm": 9.531593322753906, "learning_rate": 7.990000000000001e-06, "loss": 1.4292, "step": 10060 }, { "epoch": 1.007, "grad_norm": 9.887786865234375, "learning_rate": 7.988e-06, "loss": 1.2278, "step": 10070 }, { "epoch": 1.008, "grad_norm": 5.693431854248047, "learning_rate": 7.986000000000001e-06, "loss": 1.165, "step": 10080 }, { "epoch": 1.009, "grad_norm": 2.636251449584961, "learning_rate": 7.984e-06, "loss": 1.2501, "step": 10090 }, { "epoch": 1.01, "grad_norm": 6.747988700866699, "learning_rate": 7.982e-06, "loss": 1.275, "step": 10100 }, { "epoch": 1.011, "grad_norm": 19.77574920654297, "learning_rate": 7.980000000000002e-06, "loss": 1.3039, "step": 10110 }, { "epoch": 1.012, "grad_norm": 39.11298751831055, "learning_rate": 7.978e-06, "loss": 1.0996, "step": 10120 }, { "epoch": 1.013, "grad_norm": 7.106316566467285, "learning_rate": 7.976000000000001e-06, "loss": 1.0674, "step": 10130 }, { "epoch": 1.014, "grad_norm": 13.42906379699707, "learning_rate": 7.974e-06, "loss": 1.4476, "step": 10140 }, { "epoch": 1.015, "grad_norm": 15.054926872253418, "learning_rate": 7.972000000000001e-06, "loss": 1.3358, "step": 10150 }, { "epoch": 1.016, "grad_norm": 8.390707015991211, "learning_rate": 7.970000000000002e-06, "loss": 1.3159, "step": 10160 }, { "epoch": 1.017, "grad_norm": 4.184990882873535, "learning_rate": 7.968e-06, "loss": 1.2315, "step": 10170 }, { "epoch": 1.018, "grad_norm": 8.678812980651855, "learning_rate": 7.966e-06, "loss": 1.6321, "step": 10180 }, { "epoch": 1.019, "grad_norm": 8.680567741394043, "learning_rate": 7.964e-06, "loss": 1.1665, "step": 10190 }, { "epoch": 1.02, "grad_norm": 6.406949520111084, "learning_rate": 7.962000000000001e-06, "loss": 1.3481, "step": 10200 }, { "epoch": 1.021, "grad_norm": 8.983782768249512, "learning_rate": 7.960000000000002e-06, "loss": 1.0476, "step": 10210 }, { "epoch": 1.022, "grad_norm": 12.514788627624512, "learning_rate": 7.958e-06, "loss": 1.2723, "step": 10220 }, { "epoch": 1.023, "grad_norm": 4.989296913146973, "learning_rate": 7.956e-06, "loss": 1.1372, "step": 10230 }, { "epoch": 1.024, "grad_norm": 11.664644241333008, "learning_rate": 7.954e-06, "loss": 1.4984, "step": 10240 }, { "epoch": 1.025, "grad_norm": 9.101034164428711, "learning_rate": 7.952000000000001e-06, "loss": 1.7968, "step": 10250 }, { "epoch": 1.026, "grad_norm": 9.689080238342285, "learning_rate": 7.950000000000002e-06, "loss": 1.0164, "step": 10260 }, { "epoch": 1.027, "grad_norm": 11.213468551635742, "learning_rate": 7.948e-06, "loss": 1.2014, "step": 10270 }, { "epoch": 1.028, "grad_norm": 13.76309871673584, "learning_rate": 7.946e-06, "loss": 0.8428, "step": 10280 }, { "epoch": 1.029, "grad_norm": 14.89570426940918, "learning_rate": 7.944e-06, "loss": 1.5886, "step": 10290 }, { "epoch": 1.03, "grad_norm": 6.302089691162109, "learning_rate": 7.942000000000001e-06, "loss": 1.0413, "step": 10300 }, { "epoch": 1.031, "grad_norm": 11.200397491455078, "learning_rate": 7.94e-06, "loss": 1.4769, "step": 10310 }, { "epoch": 1.032, "grad_norm": 10.750725746154785, "learning_rate": 7.938000000000001e-06, "loss": 1.1265, "step": 10320 }, { "epoch": 1.033, "grad_norm": 5.839219093322754, "learning_rate": 7.936e-06, "loss": 1.006, "step": 10330 }, { "epoch": 1.034, "grad_norm": 5.188995361328125, "learning_rate": 7.934e-06, "loss": 1.2378, "step": 10340 }, { "epoch": 1.035, "grad_norm": 8.416781425476074, "learning_rate": 7.932000000000001e-06, "loss": 1.2315, "step": 10350 }, { "epoch": 1.036, "grad_norm": 7.830222129821777, "learning_rate": 7.93e-06, "loss": 1.3263, "step": 10360 }, { "epoch": 1.037, "grad_norm": 10.234244346618652, "learning_rate": 7.928e-06, "loss": 1.2813, "step": 10370 }, { "epoch": 1.038, "grad_norm": 13.164898872375488, "learning_rate": 7.926e-06, "loss": 1.2883, "step": 10380 }, { "epoch": 1.039, "grad_norm": 8.59278392791748, "learning_rate": 7.924e-06, "loss": 1.3548, "step": 10390 }, { "epoch": 1.04, "grad_norm": 7.9683661460876465, "learning_rate": 7.922000000000001e-06, "loss": 1.0702, "step": 10400 }, { "epoch": 1.041, "grad_norm": 8.787671089172363, "learning_rate": 7.92e-06, "loss": 1.188, "step": 10410 }, { "epoch": 1.042, "grad_norm": 13.480223655700684, "learning_rate": 7.918e-06, "loss": 1.2948, "step": 10420 }, { "epoch": 1.043, "grad_norm": 9.4530668258667, "learning_rate": 7.916e-06, "loss": 1.3975, "step": 10430 }, { "epoch": 1.044, "grad_norm": 5.430606842041016, "learning_rate": 7.914e-06, "loss": 1.415, "step": 10440 }, { "epoch": 1.045, "grad_norm": 7.470256805419922, "learning_rate": 7.912000000000001e-06, "loss": 1.1531, "step": 10450 }, { "epoch": 1.046, "grad_norm": 13.159626960754395, "learning_rate": 7.91e-06, "loss": 1.434, "step": 10460 }, { "epoch": 1.047, "grad_norm": 10.831557273864746, "learning_rate": 7.908e-06, "loss": 1.3316, "step": 10470 }, { "epoch": 1.048, "grad_norm": 11.775965690612793, "learning_rate": 7.906e-06, "loss": 1.6235, "step": 10480 }, { "epoch": 1.049, "grad_norm": 9.275116920471191, "learning_rate": 7.904000000000001e-06, "loss": 1.3792, "step": 10490 }, { "epoch": 1.05, "grad_norm": 9.438948631286621, "learning_rate": 7.902000000000002e-06, "loss": 1.0216, "step": 10500 }, { "epoch": 1.051, "grad_norm": 14.71841812133789, "learning_rate": 7.9e-06, "loss": 1.4033, "step": 10510 }, { "epoch": 1.052, "grad_norm": 6.452937602996826, "learning_rate": 7.898e-06, "loss": 1.1392, "step": 10520 }, { "epoch": 1.053, "grad_norm": 13.941332817077637, "learning_rate": 7.896e-06, "loss": 1.3272, "step": 10530 }, { "epoch": 1.054, "grad_norm": 7.309688568115234, "learning_rate": 7.894000000000001e-06, "loss": 0.8811, "step": 10540 }, { "epoch": 1.055, "grad_norm": 12.968031883239746, "learning_rate": 7.892e-06, "loss": 0.9547, "step": 10550 }, { "epoch": 1.056, "grad_norm": 16.749902725219727, "learning_rate": 7.89e-06, "loss": 1.2925, "step": 10560 }, { "epoch": 1.057, "grad_norm": 25.595151901245117, "learning_rate": 7.888e-06, "loss": 1.3655, "step": 10570 }, { "epoch": 1.058, "grad_norm": 9.057029724121094, "learning_rate": 7.886e-06, "loss": 0.9852, "step": 10580 }, { "epoch": 1.059, "grad_norm": 9.848952293395996, "learning_rate": 7.884000000000001e-06, "loss": 1.2786, "step": 10590 }, { "epoch": 1.06, "grad_norm": 14.815973281860352, "learning_rate": 7.882e-06, "loss": 1.1167, "step": 10600 }, { "epoch": 1.061, "grad_norm": 12.762728691101074, "learning_rate": 7.88e-06, "loss": 1.3386, "step": 10610 }, { "epoch": 1.062, "grad_norm": 15.096304893493652, "learning_rate": 7.878e-06, "loss": 1.531, "step": 10620 }, { "epoch": 1.063, "grad_norm": 14.376446723937988, "learning_rate": 7.876e-06, "loss": 1.1282, "step": 10630 }, { "epoch": 1.064, "grad_norm": 11.182927131652832, "learning_rate": 7.874000000000001e-06, "loss": 1.3636, "step": 10640 }, { "epoch": 1.065, "grad_norm": 12.463271141052246, "learning_rate": 7.872e-06, "loss": 1.1498, "step": 10650 }, { "epoch": 1.066, "grad_norm": 10.069700241088867, "learning_rate": 7.870000000000001e-06, "loss": 1.184, "step": 10660 }, { "epoch": 1.067, "grad_norm": 14.053030967712402, "learning_rate": 7.868000000000002e-06, "loss": 1.5133, "step": 10670 }, { "epoch": 1.068, "grad_norm": 28.76121711730957, "learning_rate": 7.866e-06, "loss": 1.1383, "step": 10680 }, { "epoch": 1.069, "grad_norm": 19.434999465942383, "learning_rate": 7.864000000000001e-06, "loss": 1.0844, "step": 10690 }, { "epoch": 1.07, "grad_norm": 23.959007263183594, "learning_rate": 7.862e-06, "loss": 1.6005, "step": 10700 }, { "epoch": 1.071, "grad_norm": 14.729728698730469, "learning_rate": 7.860000000000001e-06, "loss": 1.2785, "step": 10710 }, { "epoch": 1.072, "grad_norm": 9.267023086547852, "learning_rate": 7.858000000000002e-06, "loss": 1.4966, "step": 10720 }, { "epoch": 1.073, "grad_norm": 14.093179702758789, "learning_rate": 7.856e-06, "loss": 1.3391, "step": 10730 }, { "epoch": 1.074, "grad_norm": 7.9301018714904785, "learning_rate": 7.854e-06, "loss": 1.5539, "step": 10740 }, { "epoch": 1.075, "grad_norm": 7.72337007522583, "learning_rate": 7.852e-06, "loss": 1.419, "step": 10750 }, { "epoch": 1.076, "grad_norm": 8.273058891296387, "learning_rate": 7.850000000000001e-06, "loss": 1.0777, "step": 10760 }, { "epoch": 1.077, "grad_norm": 8.839051246643066, "learning_rate": 7.848000000000002e-06, "loss": 1.3405, "step": 10770 }, { "epoch": 1.078, "grad_norm": 7.829715251922607, "learning_rate": 7.846e-06, "loss": 1.4492, "step": 10780 }, { "epoch": 1.079, "grad_norm": 11.053963661193848, "learning_rate": 7.844e-06, "loss": 1.3339, "step": 10790 }, { "epoch": 1.08, "grad_norm": 10.65087890625, "learning_rate": 7.842e-06, "loss": 1.1982, "step": 10800 }, { "epoch": 1.081, "grad_norm": 12.625408172607422, "learning_rate": 7.840000000000001e-06, "loss": 1.197, "step": 10810 }, { "epoch": 1.082, "grad_norm": 7.893925666809082, "learning_rate": 7.838000000000002e-06, "loss": 1.3321, "step": 10820 }, { "epoch": 1.083, "grad_norm": 8.165111541748047, "learning_rate": 7.836000000000001e-06, "loss": 1.1833, "step": 10830 }, { "epoch": 1.084, "grad_norm": 10.146799087524414, "learning_rate": 7.834e-06, "loss": 1.0729, "step": 10840 }, { "epoch": 1.085, "grad_norm": 9.213784217834473, "learning_rate": 7.832e-06, "loss": 1.4759, "step": 10850 }, { "epoch": 1.086, "grad_norm": 10.980295181274414, "learning_rate": 7.830000000000001e-06, "loss": 1.3511, "step": 10860 }, { "epoch": 1.087, "grad_norm": 9.844440460205078, "learning_rate": 7.828000000000002e-06, "loss": 1.245, "step": 10870 }, { "epoch": 1.088, "grad_norm": 10.81872272491455, "learning_rate": 7.826000000000001e-06, "loss": 1.5698, "step": 10880 }, { "epoch": 1.089, "grad_norm": 13.098006248474121, "learning_rate": 7.824e-06, "loss": 1.5678, "step": 10890 }, { "epoch": 1.09, "grad_norm": 8.123541831970215, "learning_rate": 7.822e-06, "loss": 1.3145, "step": 10900 }, { "epoch": 1.091, "grad_norm": 9.923827171325684, "learning_rate": 7.820000000000001e-06, "loss": 1.4514, "step": 10910 }, { "epoch": 1.092, "grad_norm": 7.7369608879089355, "learning_rate": 7.818e-06, "loss": 1.1743, "step": 10920 }, { "epoch": 1.093, "grad_norm": 10.460976600646973, "learning_rate": 7.816000000000001e-06, "loss": 1.3105, "step": 10930 }, { "epoch": 1.094, "grad_norm": 8.524801254272461, "learning_rate": 7.814e-06, "loss": 1.3426, "step": 10940 }, { "epoch": 1.095, "grad_norm": 8.432028770446777, "learning_rate": 7.812e-06, "loss": 1.4361, "step": 10950 }, { "epoch": 1.096, "grad_norm": 7.540304660797119, "learning_rate": 7.810000000000001e-06, "loss": 1.2986, "step": 10960 }, { "epoch": 1.097, "grad_norm": 9.396004676818848, "learning_rate": 7.808e-06, "loss": 1.3041, "step": 10970 }, { "epoch": 1.098, "grad_norm": 12.064508438110352, "learning_rate": 7.806e-06, "loss": 1.2322, "step": 10980 }, { "epoch": 1.099, "grad_norm": 12.201428413391113, "learning_rate": 7.804e-06, "loss": 1.5444, "step": 10990 }, { "epoch": 1.1, "grad_norm": 12.476470947265625, "learning_rate": 7.802000000000001e-06, "loss": 1.1996, "step": 11000 }, { "epoch": 1.101, "grad_norm": 10.307188987731934, "learning_rate": 7.800000000000002e-06, "loss": 1.3811, "step": 11010 }, { "epoch": 1.102, "grad_norm": 10.317233085632324, "learning_rate": 7.798e-06, "loss": 1.3866, "step": 11020 }, { "epoch": 1.103, "grad_norm": 8.90229320526123, "learning_rate": 7.796e-06, "loss": 1.2163, "step": 11030 }, { "epoch": 1.104, "grad_norm": 13.345369338989258, "learning_rate": 7.794e-06, "loss": 1.4375, "step": 11040 }, { "epoch": 1.105, "grad_norm": 4.722508430480957, "learning_rate": 7.792000000000001e-06, "loss": 1.1937, "step": 11050 }, { "epoch": 1.106, "grad_norm": 10.615554809570312, "learning_rate": 7.790000000000002e-06, "loss": 1.2031, "step": 11060 }, { "epoch": 1.107, "grad_norm": 8.157193183898926, "learning_rate": 7.788e-06, "loss": 1.1008, "step": 11070 }, { "epoch": 1.108, "grad_norm": 6.913824558258057, "learning_rate": 7.786e-06, "loss": 1.4297, "step": 11080 }, { "epoch": 1.109, "grad_norm": 12.74704360961914, "learning_rate": 7.784e-06, "loss": 1.3847, "step": 11090 }, { "epoch": 1.11, "grad_norm": 11.003418922424316, "learning_rate": 7.782000000000001e-06, "loss": 1.1113, "step": 11100 }, { "epoch": 1.111, "grad_norm": 12.250214576721191, "learning_rate": 7.78e-06, "loss": 1.2174, "step": 11110 }, { "epoch": 1.112, "grad_norm": 7.970766067504883, "learning_rate": 7.778e-06, "loss": 1.3196, "step": 11120 }, { "epoch": 1.113, "grad_norm": 7.981255054473877, "learning_rate": 7.776e-06, "loss": 1.0512, "step": 11130 }, { "epoch": 1.114, "grad_norm": 14.795656204223633, "learning_rate": 7.774e-06, "loss": 1.5375, "step": 11140 }, { "epoch": 1.115, "grad_norm": 7.6331868171691895, "learning_rate": 7.772000000000001e-06, "loss": 1.0928, "step": 11150 }, { "epoch": 1.116, "grad_norm": 9.30523681640625, "learning_rate": 7.77e-06, "loss": 1.4465, "step": 11160 }, { "epoch": 1.117, "grad_norm": 8.648313522338867, "learning_rate": 7.768e-06, "loss": 1.1902, "step": 11170 }, { "epoch": 1.1179999999999999, "grad_norm": 8.85123348236084, "learning_rate": 7.766e-06, "loss": 1.3243, "step": 11180 }, { "epoch": 1.119, "grad_norm": 8.859946250915527, "learning_rate": 7.764e-06, "loss": 1.2548, "step": 11190 }, { "epoch": 1.12, "grad_norm": 9.677680969238281, "learning_rate": 7.762000000000001e-06, "loss": 1.2635, "step": 11200 }, { "epoch": 1.121, "grad_norm": 12.576932907104492, "learning_rate": 7.76e-06, "loss": 1.2675, "step": 11210 }, { "epoch": 1.1219999999999999, "grad_norm": 7.475512981414795, "learning_rate": 7.758000000000001e-06, "loss": 1.3025, "step": 11220 }, { "epoch": 1.123, "grad_norm": 11.932381629943848, "learning_rate": 7.756e-06, "loss": 1.3095, "step": 11230 }, { "epoch": 1.124, "grad_norm": 16.230453491210938, "learning_rate": 7.754e-06, "loss": 1.2659, "step": 11240 }, { "epoch": 1.125, "grad_norm": 14.428094863891602, "learning_rate": 7.752000000000001e-06, "loss": 1.3429, "step": 11250 }, { "epoch": 1.126, "grad_norm": 11.504801750183105, "learning_rate": 7.75e-06, "loss": 1.4332, "step": 11260 }, { "epoch": 1.127, "grad_norm": 7.133755683898926, "learning_rate": 7.748000000000001e-06, "loss": 1.0737, "step": 11270 }, { "epoch": 1.1280000000000001, "grad_norm": 12.80777359008789, "learning_rate": 7.746e-06, "loss": 1.1894, "step": 11280 }, { "epoch": 1.129, "grad_norm": 9.48471450805664, "learning_rate": 7.744e-06, "loss": 1.2706, "step": 11290 }, { "epoch": 1.13, "grad_norm": 10.487293243408203, "learning_rate": 7.742000000000001e-06, "loss": 1.2966, "step": 11300 }, { "epoch": 1.131, "grad_norm": 11.885967254638672, "learning_rate": 7.74e-06, "loss": 0.9832, "step": 11310 }, { "epoch": 1.1320000000000001, "grad_norm": 12.818087577819824, "learning_rate": 7.738000000000001e-06, "loss": 1.2041, "step": 11320 }, { "epoch": 1.133, "grad_norm": 11.201371192932129, "learning_rate": 7.736e-06, "loss": 1.6681, "step": 11330 }, { "epoch": 1.134, "grad_norm": 9.231833457946777, "learning_rate": 7.734e-06, "loss": 1.506, "step": 11340 }, { "epoch": 1.135, "grad_norm": 9.788262367248535, "learning_rate": 7.732e-06, "loss": 1.3471, "step": 11350 }, { "epoch": 1.1360000000000001, "grad_norm": 12.457501411437988, "learning_rate": 7.73e-06, "loss": 1.177, "step": 11360 }, { "epoch": 1.137, "grad_norm": 8.164016723632812, "learning_rate": 7.728000000000001e-06, "loss": 1.2875, "step": 11370 }, { "epoch": 1.138, "grad_norm": 8.233739852905273, "learning_rate": 7.726e-06, "loss": 1.0916, "step": 11380 }, { "epoch": 1.139, "grad_norm": 11.928675651550293, "learning_rate": 7.724000000000001e-06, "loss": 1.3217, "step": 11390 }, { "epoch": 1.1400000000000001, "grad_norm": 7.866297721862793, "learning_rate": 7.722e-06, "loss": 1.3198, "step": 11400 }, { "epoch": 1.141, "grad_norm": 14.815643310546875, "learning_rate": 7.72e-06, "loss": 1.1869, "step": 11410 }, { "epoch": 1.142, "grad_norm": 9.758526802062988, "learning_rate": 7.718000000000001e-06, "loss": 1.4164, "step": 11420 }, { "epoch": 1.143, "grad_norm": 13.417173385620117, "learning_rate": 7.716e-06, "loss": 1.3128, "step": 11430 }, { "epoch": 1.144, "grad_norm": 7.945681095123291, "learning_rate": 7.714000000000001e-06, "loss": 1.4134, "step": 11440 }, { "epoch": 1.145, "grad_norm": 7.368936061859131, "learning_rate": 7.712e-06, "loss": 1.2051, "step": 11450 }, { "epoch": 1.146, "grad_norm": 9.161569595336914, "learning_rate": 7.71e-06, "loss": 1.1362, "step": 11460 }, { "epoch": 1.147, "grad_norm": 7.347046852111816, "learning_rate": 7.708000000000001e-06, "loss": 1.4091, "step": 11470 }, { "epoch": 1.148, "grad_norm": 10.676250457763672, "learning_rate": 7.706e-06, "loss": 1.2735, "step": 11480 }, { "epoch": 1.149, "grad_norm": 16.189468383789062, "learning_rate": 7.704000000000001e-06, "loss": 1.2632, "step": 11490 }, { "epoch": 1.15, "grad_norm": 11.561668395996094, "learning_rate": 7.702e-06, "loss": 1.2617, "step": 11500 }, { "epoch": 1.151, "grad_norm": 12.20193099975586, "learning_rate": 7.7e-06, "loss": 1.3418, "step": 11510 }, { "epoch": 1.152, "grad_norm": 10.418259620666504, "learning_rate": 7.698000000000002e-06, "loss": 1.3261, "step": 11520 }, { "epoch": 1.153, "grad_norm": 12.968474388122559, "learning_rate": 7.696e-06, "loss": 1.284, "step": 11530 }, { "epoch": 1.154, "grad_norm": 9.24622917175293, "learning_rate": 7.694e-06, "loss": 1.3208, "step": 11540 }, { "epoch": 1.155, "grad_norm": 7.529898166656494, "learning_rate": 7.692e-06, "loss": 1.2935, "step": 11550 }, { "epoch": 1.156, "grad_norm": 5.45638370513916, "learning_rate": 7.690000000000001e-06, "loss": 1.1384, "step": 11560 }, { "epoch": 1.157, "grad_norm": 10.629471778869629, "learning_rate": 7.688000000000002e-06, "loss": 1.088, "step": 11570 }, { "epoch": 1.158, "grad_norm": 9.54748249053955, "learning_rate": 7.686e-06, "loss": 1.1691, "step": 11580 }, { "epoch": 1.159, "grad_norm": 11.715593338012695, "learning_rate": 7.684e-06, "loss": 1.3232, "step": 11590 }, { "epoch": 1.16, "grad_norm": 10.700724601745605, "learning_rate": 7.682e-06, "loss": 1.4095, "step": 11600 }, { "epoch": 1.161, "grad_norm": 16.31391143798828, "learning_rate": 7.680000000000001e-06, "loss": 1.4745, "step": 11610 }, { "epoch": 1.162, "grad_norm": 6.418362140655518, "learning_rate": 7.678000000000002e-06, "loss": 1.2342, "step": 11620 }, { "epoch": 1.163, "grad_norm": 10.577417373657227, "learning_rate": 7.676e-06, "loss": 1.5637, "step": 11630 }, { "epoch": 1.164, "grad_norm": 11.727459907531738, "learning_rate": 7.674e-06, "loss": 1.3949, "step": 11640 }, { "epoch": 1.165, "grad_norm": 10.998662948608398, "learning_rate": 7.672e-06, "loss": 1.1799, "step": 11650 }, { "epoch": 1.166, "grad_norm": 12.270730972290039, "learning_rate": 7.670000000000001e-06, "loss": 1.3412, "step": 11660 }, { "epoch": 1.167, "grad_norm": 10.753862380981445, "learning_rate": 7.668000000000002e-06, "loss": 1.3849, "step": 11670 }, { "epoch": 1.168, "grad_norm": 19.613229751586914, "learning_rate": 7.666e-06, "loss": 1.1015, "step": 11680 }, { "epoch": 1.169, "grad_norm": 10.615133285522461, "learning_rate": 7.664e-06, "loss": 1.3754, "step": 11690 }, { "epoch": 1.17, "grad_norm": 7.046148300170898, "learning_rate": 7.662e-06, "loss": 1.5112, "step": 11700 }, { "epoch": 1.171, "grad_norm": 13.549969673156738, "learning_rate": 7.660000000000001e-06, "loss": 1.1653, "step": 11710 }, { "epoch": 1.172, "grad_norm": 7.402976989746094, "learning_rate": 7.658e-06, "loss": 1.0956, "step": 11720 }, { "epoch": 1.173, "grad_norm": 8.378164291381836, "learning_rate": 7.656000000000001e-06, "loss": 1.3736, "step": 11730 }, { "epoch": 1.174, "grad_norm": 7.473861217498779, "learning_rate": 7.654e-06, "loss": 1.3883, "step": 11740 }, { "epoch": 1.175, "grad_norm": 15.151702880859375, "learning_rate": 7.652e-06, "loss": 1.3179, "step": 11750 }, { "epoch": 1.176, "grad_norm": 6.8260087966918945, "learning_rate": 7.650000000000001e-06, "loss": 1.2548, "step": 11760 }, { "epoch": 1.177, "grad_norm": 11.639680862426758, "learning_rate": 7.648e-06, "loss": 1.2572, "step": 11770 }, { "epoch": 1.178, "grad_norm": 13.05052375793457, "learning_rate": 7.646e-06, "loss": 1.2918, "step": 11780 }, { "epoch": 1.179, "grad_norm": 10.76840591430664, "learning_rate": 7.644e-06, "loss": 1.2517, "step": 11790 }, { "epoch": 1.18, "grad_norm": 12.291444778442383, "learning_rate": 7.642e-06, "loss": 1.3506, "step": 11800 }, { "epoch": 1.181, "grad_norm": 15.22093391418457, "learning_rate": 7.640000000000001e-06, "loss": 1.1393, "step": 11810 }, { "epoch": 1.182, "grad_norm": 8.65494441986084, "learning_rate": 7.638e-06, "loss": 1.0385, "step": 11820 }, { "epoch": 1.183, "grad_norm": 8.67208194732666, "learning_rate": 7.636e-06, "loss": 1.3423, "step": 11830 }, { "epoch": 1.184, "grad_norm": 6.062466144561768, "learning_rate": 7.634e-06, "loss": 1.5509, "step": 11840 }, { "epoch": 1.185, "grad_norm": 13.864858627319336, "learning_rate": 7.632e-06, "loss": 1.2959, "step": 11850 }, { "epoch": 1.186, "grad_norm": 11.065735816955566, "learning_rate": 7.630000000000001e-06, "loss": 1.0881, "step": 11860 }, { "epoch": 1.187, "grad_norm": 10.879989624023438, "learning_rate": 7.628000000000001e-06, "loss": 1.5182, "step": 11870 }, { "epoch": 1.188, "grad_norm": 7.6342010498046875, "learning_rate": 7.626e-06, "loss": 1.148, "step": 11880 }, { "epoch": 1.189, "grad_norm": 9.86660099029541, "learning_rate": 7.624e-06, "loss": 1.3272, "step": 11890 }, { "epoch": 1.19, "grad_norm": 8.711140632629395, "learning_rate": 7.622000000000001e-06, "loss": 1.1746, "step": 11900 }, { "epoch": 1.191, "grad_norm": 10.63333511352539, "learning_rate": 7.620000000000001e-06, "loss": 1.2356, "step": 11910 }, { "epoch": 1.192, "grad_norm": 12.542672157287598, "learning_rate": 7.618000000000001e-06, "loss": 1.4485, "step": 11920 }, { "epoch": 1.193, "grad_norm": 9.662715911865234, "learning_rate": 7.616000000000001e-06, "loss": 1.3878, "step": 11930 }, { "epoch": 1.194, "grad_norm": 9.200409889221191, "learning_rate": 7.614e-06, "loss": 1.1596, "step": 11940 }, { "epoch": 1.195, "grad_norm": 7.914130687713623, "learning_rate": 7.612e-06, "loss": 1.4122, "step": 11950 }, { "epoch": 1.196, "grad_norm": 7.759999752044678, "learning_rate": 7.610000000000001e-06, "loss": 1.0072, "step": 11960 }, { "epoch": 1.197, "grad_norm": 8.724486351013184, "learning_rate": 7.608000000000001e-06, "loss": 1.1909, "step": 11970 }, { "epoch": 1.198, "grad_norm": 11.46352481842041, "learning_rate": 7.606000000000001e-06, "loss": 1.3458, "step": 11980 }, { "epoch": 1.199, "grad_norm": 12.991515159606934, "learning_rate": 7.604e-06, "loss": 1.1576, "step": 11990 }, { "epoch": 1.2, "grad_norm": 17.66514015197754, "learning_rate": 7.602e-06, "loss": 1.536, "step": 12000 }, { "epoch": 1.201, "grad_norm": 18.232656478881836, "learning_rate": 7.600000000000001e-06, "loss": 1.2761, "step": 12010 }, { "epoch": 1.202, "grad_norm": 12.759206771850586, "learning_rate": 7.598000000000001e-06, "loss": 1.5201, "step": 12020 }, { "epoch": 1.203, "grad_norm": 12.207402229309082, "learning_rate": 7.5960000000000015e-06, "loss": 1.3229, "step": 12030 }, { "epoch": 1.204, "grad_norm": 11.180853843688965, "learning_rate": 7.5940000000000005e-06, "loss": 1.1238, "step": 12040 }, { "epoch": 1.205, "grad_norm": 10.392066955566406, "learning_rate": 7.592e-06, "loss": 1.1989, "step": 12050 }, { "epoch": 1.206, "grad_norm": 5.742560863494873, "learning_rate": 7.590000000000001e-06, "loss": 1.3367, "step": 12060 }, { "epoch": 1.207, "grad_norm": 7.246772766113281, "learning_rate": 7.588000000000001e-06, "loss": 1.2227, "step": 12070 }, { "epoch": 1.208, "grad_norm": 13.143990516662598, "learning_rate": 7.586000000000001e-06, "loss": 1.2446, "step": 12080 }, { "epoch": 1.209, "grad_norm": 6.164926528930664, "learning_rate": 7.5840000000000006e-06, "loss": 1.2759, "step": 12090 }, { "epoch": 1.21, "grad_norm": 7.76356315612793, "learning_rate": 7.582e-06, "loss": 1.1186, "step": 12100 }, { "epoch": 1.211, "grad_norm": 16.62373161315918, "learning_rate": 7.58e-06, "loss": 1.3238, "step": 12110 }, { "epoch": 1.212, "grad_norm": 6.038971424102783, "learning_rate": 7.578000000000001e-06, "loss": 1.242, "step": 12120 }, { "epoch": 1.213, "grad_norm": 11.073966026306152, "learning_rate": 7.576000000000001e-06, "loss": 1.1042, "step": 12130 }, { "epoch": 1.214, "grad_norm": 10.634995460510254, "learning_rate": 7.574e-06, "loss": 1.4405, "step": 12140 }, { "epoch": 1.215, "grad_norm": 16.380935668945312, "learning_rate": 7.5720000000000005e-06, "loss": 1.4092, "step": 12150 }, { "epoch": 1.216, "grad_norm": 13.855578422546387, "learning_rate": 7.57e-06, "loss": 1.2732, "step": 12160 }, { "epoch": 1.217, "grad_norm": 8.202617645263672, "learning_rate": 7.568000000000001e-06, "loss": 1.3364, "step": 12170 }, { "epoch": 1.218, "grad_norm": 17.642065048217773, "learning_rate": 7.566000000000001e-06, "loss": 1.4802, "step": 12180 }, { "epoch": 1.219, "grad_norm": 4.845138072967529, "learning_rate": 7.564e-06, "loss": 1.2918, "step": 12190 }, { "epoch": 1.22, "grad_norm": 6.942539691925049, "learning_rate": 7.562000000000001e-06, "loss": 1.0857, "step": 12200 }, { "epoch": 1.221, "grad_norm": 25.676342010498047, "learning_rate": 7.5600000000000005e-06, "loss": 1.0119, "step": 12210 }, { "epoch": 1.222, "grad_norm": 11.522448539733887, "learning_rate": 7.558000000000001e-06, "loss": 1.3195, "step": 12220 }, { "epoch": 1.223, "grad_norm": 11.814552307128906, "learning_rate": 7.556000000000001e-06, "loss": 1.0554, "step": 12230 }, { "epoch": 1.224, "grad_norm": 15.423279762268066, "learning_rate": 7.554e-06, "loss": 1.4303, "step": 12240 }, { "epoch": 1.225, "grad_norm": 13.271688461303711, "learning_rate": 7.552000000000001e-06, "loss": 1.3854, "step": 12250 }, { "epoch": 1.226, "grad_norm": 16.32430648803711, "learning_rate": 7.5500000000000006e-06, "loss": 1.4334, "step": 12260 }, { "epoch": 1.227, "grad_norm": 6.646535873413086, "learning_rate": 7.548000000000001e-06, "loss": 1.3039, "step": 12270 }, { "epoch": 1.228, "grad_norm": 12.599492073059082, "learning_rate": 7.546000000000001e-06, "loss": 1.156, "step": 12280 }, { "epoch": 1.229, "grad_norm": 17.048748016357422, "learning_rate": 7.544e-06, "loss": 1.2489, "step": 12290 }, { "epoch": 1.23, "grad_norm": 8.029606819152832, "learning_rate": 7.542000000000001e-06, "loss": 1.2779, "step": 12300 }, { "epoch": 1.231, "grad_norm": 24.1705265045166, "learning_rate": 7.540000000000001e-06, "loss": 1.1881, "step": 12310 }, { "epoch": 1.232, "grad_norm": 9.57801628112793, "learning_rate": 7.5380000000000005e-06, "loss": 1.1823, "step": 12320 }, { "epoch": 1.233, "grad_norm": 5.943355083465576, "learning_rate": 7.536000000000001e-06, "loss": 0.9428, "step": 12330 }, { "epoch": 1.234, "grad_norm": 21.14099884033203, "learning_rate": 7.534e-06, "loss": 1.4832, "step": 12340 }, { "epoch": 1.2349999999999999, "grad_norm": 10.142439842224121, "learning_rate": 7.532e-06, "loss": 1.2087, "step": 12350 }, { "epoch": 1.236, "grad_norm": 28.825613021850586, "learning_rate": 7.530000000000001e-06, "loss": 1.1499, "step": 12360 }, { "epoch": 1.237, "grad_norm": 11.529026985168457, "learning_rate": 7.528000000000001e-06, "loss": 1.2444, "step": 12370 }, { "epoch": 1.238, "grad_norm": 16.578399658203125, "learning_rate": 7.526000000000001e-06, "loss": 1.2293, "step": 12380 }, { "epoch": 1.2389999999999999, "grad_norm": 34.516788482666016, "learning_rate": 7.524e-06, "loss": 1.1576, "step": 12390 }, { "epoch": 1.24, "grad_norm": 25.480560302734375, "learning_rate": 7.522e-06, "loss": 1.1018, "step": 12400 }, { "epoch": 1.241, "grad_norm": 20.168014526367188, "learning_rate": 7.520000000000001e-06, "loss": 0.8801, "step": 12410 }, { "epoch": 1.242, "grad_norm": 28.887697219848633, "learning_rate": 7.518000000000001e-06, "loss": 1.2454, "step": 12420 }, { "epoch": 1.2429999999999999, "grad_norm": 11.10959243774414, "learning_rate": 7.516000000000001e-06, "loss": 1.3504, "step": 12430 }, { "epoch": 1.244, "grad_norm": 19.177244186401367, "learning_rate": 7.514e-06, "loss": 1.5339, "step": 12440 }, { "epoch": 1.245, "grad_norm": 24.700956344604492, "learning_rate": 7.512e-06, "loss": 1.5022, "step": 12450 }, { "epoch": 1.246, "grad_norm": 22.214466094970703, "learning_rate": 7.510000000000001e-06, "loss": 1.2557, "step": 12460 }, { "epoch": 1.2469999999999999, "grad_norm": 20.928890228271484, "learning_rate": 7.508000000000001e-06, "loss": 1.2582, "step": 12470 }, { "epoch": 1.248, "grad_norm": 12.99563217163086, "learning_rate": 7.506000000000001e-06, "loss": 1.4748, "step": 12480 }, { "epoch": 1.249, "grad_norm": 7.460651874542236, "learning_rate": 7.5040000000000005e-06, "loss": 1.0679, "step": 12490 }, { "epoch": 1.25, "grad_norm": 17.53704071044922, "learning_rate": 7.502e-06, "loss": 1.4555, "step": 12500 }, { "epoch": 1.251, "grad_norm": 10.883577346801758, "learning_rate": 7.500000000000001e-06, "loss": 1.1509, "step": 12510 }, { "epoch": 1.252, "grad_norm": 6.623632431030273, "learning_rate": 7.498000000000001e-06, "loss": 1.2003, "step": 12520 }, { "epoch": 1.2530000000000001, "grad_norm": 7.478917598724365, "learning_rate": 7.496000000000001e-06, "loss": 1.0195, "step": 12530 }, { "epoch": 1.254, "grad_norm": 8.727287292480469, "learning_rate": 7.494000000000001e-06, "loss": 1.4454, "step": 12540 }, { "epoch": 1.255, "grad_norm": 12.742966651916504, "learning_rate": 7.4920000000000004e-06, "loss": 1.1863, "step": 12550 }, { "epoch": 1.256, "grad_norm": 21.417184829711914, "learning_rate": 7.49e-06, "loss": 1.445, "step": 12560 }, { "epoch": 1.2570000000000001, "grad_norm": 8.93481159210205, "learning_rate": 7.488000000000001e-06, "loss": 0.986, "step": 12570 }, { "epoch": 1.258, "grad_norm": 13.929096221923828, "learning_rate": 7.486000000000001e-06, "loss": 1.1862, "step": 12580 }, { "epoch": 1.259, "grad_norm": 11.778721809387207, "learning_rate": 7.484e-06, "loss": 1.2531, "step": 12590 }, { "epoch": 1.26, "grad_norm": 17.261938095092773, "learning_rate": 7.4820000000000005e-06, "loss": 1.3644, "step": 12600 }, { "epoch": 1.2610000000000001, "grad_norm": 16.391620635986328, "learning_rate": 7.48e-06, "loss": 1.4228, "step": 12610 }, { "epoch": 1.262, "grad_norm": 10.888384819030762, "learning_rate": 7.478000000000001e-06, "loss": 1.2846, "step": 12620 }, { "epoch": 1.263, "grad_norm": 10.086799621582031, "learning_rate": 7.476000000000001e-06, "loss": 1.2088, "step": 12630 }, { "epoch": 1.264, "grad_norm": 13.628010749816895, "learning_rate": 7.474e-06, "loss": 1.4312, "step": 12640 }, { "epoch": 1.2650000000000001, "grad_norm": 11.260024070739746, "learning_rate": 7.472000000000001e-06, "loss": 1.2929, "step": 12650 }, { "epoch": 1.266, "grad_norm": 9.340113639831543, "learning_rate": 7.4700000000000005e-06, "loss": 1.5371, "step": 12660 }, { "epoch": 1.267, "grad_norm": 13.441415786743164, "learning_rate": 7.468000000000001e-06, "loss": 1.1524, "step": 12670 }, { "epoch": 1.268, "grad_norm": 8.984028816223145, "learning_rate": 7.466000000000001e-06, "loss": 1.3771, "step": 12680 }, { "epoch": 1.2690000000000001, "grad_norm": 7.937169551849365, "learning_rate": 7.464e-06, "loss": 1.1503, "step": 12690 }, { "epoch": 1.27, "grad_norm": 6.185277938842773, "learning_rate": 7.462000000000001e-06, "loss": 1.0524, "step": 12700 }, { "epoch": 1.271, "grad_norm": 9.390480041503906, "learning_rate": 7.4600000000000006e-06, "loss": 1.2773, "step": 12710 }, { "epoch": 1.272, "grad_norm": 11.57876205444336, "learning_rate": 7.458e-06, "loss": 1.4091, "step": 12720 }, { "epoch": 1.2730000000000001, "grad_norm": 8.398019790649414, "learning_rate": 7.456000000000001e-06, "loss": 1.1726, "step": 12730 }, { "epoch": 1.274, "grad_norm": 6.641817092895508, "learning_rate": 7.454e-06, "loss": 1.0784, "step": 12740 }, { "epoch": 1.275, "grad_norm": 11.231045722961426, "learning_rate": 7.452e-06, "loss": 1.5585, "step": 12750 }, { "epoch": 1.276, "grad_norm": 12.783620834350586, "learning_rate": 7.450000000000001e-06, "loss": 1.137, "step": 12760 }, { "epoch": 1.2770000000000001, "grad_norm": 11.353863716125488, "learning_rate": 7.4480000000000005e-06, "loss": 1.3389, "step": 12770 }, { "epoch": 1.278, "grad_norm": 12.47057056427002, "learning_rate": 7.446000000000001e-06, "loss": 1.3517, "step": 12780 }, { "epoch": 1.279, "grad_norm": 9.973525047302246, "learning_rate": 7.444e-06, "loss": 1.1048, "step": 12790 }, { "epoch": 1.28, "grad_norm": 8.632057189941406, "learning_rate": 7.442e-06, "loss": 1.0833, "step": 12800 }, { "epoch": 1.2810000000000001, "grad_norm": 8.092228889465332, "learning_rate": 7.440000000000001e-06, "loss": 1.3958, "step": 12810 }, { "epoch": 1.282, "grad_norm": 7.521067142486572, "learning_rate": 7.438000000000001e-06, "loss": 1.4777, "step": 12820 }, { "epoch": 1.283, "grad_norm": 17.62495231628418, "learning_rate": 7.436000000000001e-06, "loss": 1.5142, "step": 12830 }, { "epoch": 1.284, "grad_norm": 18.596641540527344, "learning_rate": 7.434e-06, "loss": 1.2066, "step": 12840 }, { "epoch": 1.285, "grad_norm": 20.701465606689453, "learning_rate": 7.432e-06, "loss": 1.2025, "step": 12850 }, { "epoch": 1.286, "grad_norm": 7.6519246101379395, "learning_rate": 7.430000000000001e-06, "loss": 1.334, "step": 12860 }, { "epoch": 1.287, "grad_norm": 7.881246089935303, "learning_rate": 7.428000000000001e-06, "loss": 1.1272, "step": 12870 }, { "epoch": 1.288, "grad_norm": 12.39816951751709, "learning_rate": 7.426000000000001e-06, "loss": 1.2971, "step": 12880 }, { "epoch": 1.289, "grad_norm": 15.356783866882324, "learning_rate": 7.424e-06, "loss": 1.3581, "step": 12890 }, { "epoch": 1.29, "grad_norm": 15.02816104888916, "learning_rate": 7.422e-06, "loss": 1.2647, "step": 12900 }, { "epoch": 1.291, "grad_norm": 10.141355514526367, "learning_rate": 7.420000000000001e-06, "loss": 1.2841, "step": 12910 }, { "epoch": 1.292, "grad_norm": 9.79174518585205, "learning_rate": 7.418000000000001e-06, "loss": 1.3142, "step": 12920 }, { "epoch": 1.293, "grad_norm": 11.350287437438965, "learning_rate": 7.416000000000001e-06, "loss": 1.2416, "step": 12930 }, { "epoch": 1.294, "grad_norm": 7.587865829467773, "learning_rate": 7.4140000000000005e-06, "loss": 1.1207, "step": 12940 }, { "epoch": 1.295, "grad_norm": 9.37651538848877, "learning_rate": 7.412e-06, "loss": 1.2608, "step": 12950 }, { "epoch": 1.296, "grad_norm": 13.245941162109375, "learning_rate": 7.41e-06, "loss": 1.139, "step": 12960 }, { "epoch": 1.297, "grad_norm": 8.777225494384766, "learning_rate": 7.408000000000001e-06, "loss": 1.13, "step": 12970 }, { "epoch": 1.298, "grad_norm": 13.86638069152832, "learning_rate": 7.406000000000001e-06, "loss": 1.385, "step": 12980 }, { "epoch": 1.299, "grad_norm": 13.747764587402344, "learning_rate": 7.404e-06, "loss": 1.2783, "step": 12990 }, { "epoch": 1.3, "grad_norm": 10.837451934814453, "learning_rate": 7.4020000000000005e-06, "loss": 1.5108, "step": 13000 }, { "epoch": 1.301, "grad_norm": 14.206830978393555, "learning_rate": 7.4e-06, "loss": 1.3594, "step": 13010 }, { "epoch": 1.302, "grad_norm": 9.273879051208496, "learning_rate": 7.398000000000001e-06, "loss": 1.153, "step": 13020 }, { "epoch": 1.303, "grad_norm": 7.176966667175293, "learning_rate": 7.396000000000001e-06, "loss": 1.2381, "step": 13030 }, { "epoch": 1.304, "grad_norm": 11.362279891967773, "learning_rate": 7.394e-06, "loss": 1.2488, "step": 13040 }, { "epoch": 1.305, "grad_norm": 7.201206684112549, "learning_rate": 7.3920000000000005e-06, "loss": 0.9841, "step": 13050 }, { "epoch": 1.306, "grad_norm": 10.67971134185791, "learning_rate": 7.39e-06, "loss": 1.35, "step": 13060 }, { "epoch": 1.307, "grad_norm": 10.723569869995117, "learning_rate": 7.388000000000001e-06, "loss": 1.0655, "step": 13070 }, { "epoch": 1.308, "grad_norm": 8.019424438476562, "learning_rate": 7.386000000000001e-06, "loss": 1.2706, "step": 13080 }, { "epoch": 1.309, "grad_norm": 10.49849796295166, "learning_rate": 7.384e-06, "loss": 1.0582, "step": 13090 }, { "epoch": 1.31, "grad_norm": 14.695592880249023, "learning_rate": 7.382000000000001e-06, "loss": 1.4206, "step": 13100 }, { "epoch": 1.311, "grad_norm": 9.850425720214844, "learning_rate": 7.3800000000000005e-06, "loss": 1.2525, "step": 13110 }, { "epoch": 1.312, "grad_norm": 13.8251953125, "learning_rate": 7.378e-06, "loss": 1.4042, "step": 13120 }, { "epoch": 1.313, "grad_norm": 21.2772274017334, "learning_rate": 7.376000000000001e-06, "loss": 1.426, "step": 13130 }, { "epoch": 1.314, "grad_norm": 5.766829490661621, "learning_rate": 7.3742000000000005e-06, "loss": 1.339, "step": 13140 }, { "epoch": 1.315, "grad_norm": 23.32453727722168, "learning_rate": 7.3722e-06, "loss": 1.382, "step": 13150 }, { "epoch": 1.316, "grad_norm": 12.960734367370605, "learning_rate": 7.370200000000001e-06, "loss": 1.2511, "step": 13160 }, { "epoch": 1.317, "grad_norm": 13.24185562133789, "learning_rate": 7.368200000000001e-06, "loss": 1.3393, "step": 13170 }, { "epoch": 1.318, "grad_norm": 10.45119857788086, "learning_rate": 7.366200000000001e-06, "loss": 1.2233, "step": 13180 }, { "epoch": 1.319, "grad_norm": 5.770294666290283, "learning_rate": 7.3642000000000006e-06, "loss": 1.2802, "step": 13190 }, { "epoch": 1.32, "grad_norm": 11.63404655456543, "learning_rate": 7.3622000000000004e-06, "loss": 1.3399, "step": 13200 }, { "epoch": 1.321, "grad_norm": 8.770018577575684, "learning_rate": 7.3602e-06, "loss": 1.3127, "step": 13210 }, { "epoch": 1.322, "grad_norm": 8.970390319824219, "learning_rate": 7.358200000000001e-06, "loss": 1.1754, "step": 13220 }, { "epoch": 1.323, "grad_norm": 9.195228576660156, "learning_rate": 7.356200000000001e-06, "loss": 1.3402, "step": 13230 }, { "epoch": 1.324, "grad_norm": 11.386528968811035, "learning_rate": 7.3542e-06, "loss": 1.1891, "step": 13240 }, { "epoch": 1.325, "grad_norm": 7.875195026397705, "learning_rate": 7.3522000000000005e-06, "loss": 1.2294, "step": 13250 }, { "epoch": 1.326, "grad_norm": 15.11997127532959, "learning_rate": 7.3502e-06, "loss": 1.4679, "step": 13260 }, { "epoch": 1.327, "grad_norm": 6.250880241394043, "learning_rate": 7.348200000000001e-06, "loss": 1.2582, "step": 13270 }, { "epoch": 1.328, "grad_norm": 13.189786911010742, "learning_rate": 7.346200000000001e-06, "loss": 1.2988, "step": 13280 }, { "epoch": 1.329, "grad_norm": 7.0882720947265625, "learning_rate": 7.3442e-06, "loss": 1.5291, "step": 13290 }, { "epoch": 1.33, "grad_norm": 5.95711088180542, "learning_rate": 7.342200000000001e-06, "loss": 1.1733, "step": 13300 }, { "epoch": 1.331, "grad_norm": 8.673683166503906, "learning_rate": 7.3402000000000005e-06, "loss": 1.007, "step": 13310 }, { "epoch": 1.332, "grad_norm": 6.83214807510376, "learning_rate": 7.338200000000001e-06, "loss": 1.4372, "step": 13320 }, { "epoch": 1.333, "grad_norm": 10.508989334106445, "learning_rate": 7.336200000000001e-06, "loss": 1.3155, "step": 13330 }, { "epoch": 1.334, "grad_norm": 7.366535663604736, "learning_rate": 7.3342e-06, "loss": 1.3146, "step": 13340 }, { "epoch": 1.335, "grad_norm": 16.379432678222656, "learning_rate": 7.332200000000001e-06, "loss": 1.4279, "step": 13350 }, { "epoch": 1.336, "grad_norm": 4.6006364822387695, "learning_rate": 7.3302000000000006e-06, "loss": 1.2862, "step": 13360 }, { "epoch": 1.337, "grad_norm": 13.502737045288086, "learning_rate": 7.328200000000001e-06, "loss": 1.2018, "step": 13370 }, { "epoch": 1.338, "grad_norm": 7.910653591156006, "learning_rate": 7.326200000000001e-06, "loss": 1.2187, "step": 13380 }, { "epoch": 1.339, "grad_norm": 6.6376848220825195, "learning_rate": 7.3242e-06, "loss": 1.2417, "step": 13390 }, { "epoch": 1.34, "grad_norm": 10.964859962463379, "learning_rate": 7.322200000000001e-06, "loss": 1.3109, "step": 13400 }, { "epoch": 1.341, "grad_norm": 9.054771423339844, "learning_rate": 7.320200000000001e-06, "loss": 1.2533, "step": 13410 }, { "epoch": 1.342, "grad_norm": 9.843973159790039, "learning_rate": 7.3182000000000005e-06, "loss": 1.4883, "step": 13420 }, { "epoch": 1.343, "grad_norm": 8.225391387939453, "learning_rate": 7.316200000000001e-06, "loss": 1.2779, "step": 13430 }, { "epoch": 1.3439999999999999, "grad_norm": 13.641351699829102, "learning_rate": 7.3142e-06, "loss": 1.2187, "step": 13440 }, { "epoch": 1.345, "grad_norm": 9.638875961303711, "learning_rate": 7.3122e-06, "loss": 1.3032, "step": 13450 }, { "epoch": 1.346, "grad_norm": 6.350509166717529, "learning_rate": 7.310200000000001e-06, "loss": 1.2572, "step": 13460 }, { "epoch": 1.347, "grad_norm": 9.542150497436523, "learning_rate": 7.308200000000001e-06, "loss": 0.7907, "step": 13470 }, { "epoch": 1.3479999999999999, "grad_norm": 8.261292457580566, "learning_rate": 7.306200000000001e-06, "loss": 1.2668, "step": 13480 }, { "epoch": 1.349, "grad_norm": 7.193455219268799, "learning_rate": 7.3042e-06, "loss": 1.5214, "step": 13490 }, { "epoch": 1.35, "grad_norm": 7.827322483062744, "learning_rate": 7.3022e-06, "loss": 1.108, "step": 13500 }, { "epoch": 1.351, "grad_norm": 13.296591758728027, "learning_rate": 7.300200000000001e-06, "loss": 1.4409, "step": 13510 }, { "epoch": 1.3519999999999999, "grad_norm": 7.833558082580566, "learning_rate": 7.298200000000001e-06, "loss": 1.2518, "step": 13520 }, { "epoch": 1.353, "grad_norm": 9.504091262817383, "learning_rate": 7.296200000000001e-06, "loss": 1.4139, "step": 13530 }, { "epoch": 1.354, "grad_norm": 8.132758140563965, "learning_rate": 7.2942e-06, "loss": 1.1185, "step": 13540 }, { "epoch": 1.355, "grad_norm": 9.723896980285645, "learning_rate": 7.2922e-06, "loss": 1.1614, "step": 13550 }, { "epoch": 1.3559999999999999, "grad_norm": 11.908557891845703, "learning_rate": 7.290200000000001e-06, "loss": 1.4909, "step": 13560 }, { "epoch": 1.357, "grad_norm": 5.330709934234619, "learning_rate": 7.288200000000001e-06, "loss": 1.0516, "step": 13570 }, { "epoch": 1.358, "grad_norm": 8.052712440490723, "learning_rate": 7.286200000000001e-06, "loss": 1.3234, "step": 13580 }, { "epoch": 1.359, "grad_norm": 10.394590377807617, "learning_rate": 7.2842000000000005e-06, "loss": 1.2204, "step": 13590 }, { "epoch": 1.3599999999999999, "grad_norm": 14.300814628601074, "learning_rate": 7.2822e-06, "loss": 1.3402, "step": 13600 }, { "epoch": 1.361, "grad_norm": 8.241515159606934, "learning_rate": 7.2802e-06, "loss": 1.1731, "step": 13610 }, { "epoch": 1.362, "grad_norm": 8.186539649963379, "learning_rate": 7.278200000000001e-06, "loss": 1.2804, "step": 13620 }, { "epoch": 1.363, "grad_norm": 10.468670845031738, "learning_rate": 7.276200000000001e-06, "loss": 1.2455, "step": 13630 }, { "epoch": 1.3639999999999999, "grad_norm": 9.754251480102539, "learning_rate": 7.2742e-06, "loss": 1.0813, "step": 13640 }, { "epoch": 1.365, "grad_norm": 11.134176254272461, "learning_rate": 7.2722000000000004e-06, "loss": 1.2608, "step": 13650 }, { "epoch": 1.366, "grad_norm": 11.062118530273438, "learning_rate": 7.2702e-06, "loss": 1.0378, "step": 13660 }, { "epoch": 1.367, "grad_norm": 26.284685134887695, "learning_rate": 7.268200000000001e-06, "loss": 1.6669, "step": 13670 }, { "epoch": 1.3679999999999999, "grad_norm": 9.014432907104492, "learning_rate": 7.266200000000001e-06, "loss": 1.2529, "step": 13680 }, { "epoch": 1.369, "grad_norm": 14.38024616241455, "learning_rate": 7.2642e-06, "loss": 1.1239, "step": 13690 }, { "epoch": 1.37, "grad_norm": 9.443144798278809, "learning_rate": 7.2622000000000005e-06, "loss": 1.5148, "step": 13700 }, { "epoch": 1.371, "grad_norm": 12.21552848815918, "learning_rate": 7.2602e-06, "loss": 1.1013, "step": 13710 }, { "epoch": 1.3719999999999999, "grad_norm": 8.875152587890625, "learning_rate": 7.258200000000001e-06, "loss": 1.1181, "step": 13720 }, { "epoch": 1.373, "grad_norm": 14.22554874420166, "learning_rate": 7.256200000000001e-06, "loss": 1.1689, "step": 13730 }, { "epoch": 1.374, "grad_norm": 7.53236722946167, "learning_rate": 7.2542e-06, "loss": 1.2582, "step": 13740 }, { "epoch": 1.375, "grad_norm": 11.949944496154785, "learning_rate": 7.252200000000001e-06, "loss": 1.2268, "step": 13750 }, { "epoch": 1.376, "grad_norm": 11.314998626708984, "learning_rate": 7.2502000000000005e-06, "loss": 1.3058, "step": 13760 }, { "epoch": 1.377, "grad_norm": 10.118354797363281, "learning_rate": 7.248200000000001e-06, "loss": 1.2166, "step": 13770 }, { "epoch": 1.3780000000000001, "grad_norm": 16.461986541748047, "learning_rate": 7.246200000000001e-06, "loss": 1.2769, "step": 13780 }, { "epoch": 1.379, "grad_norm": 4.6789069175720215, "learning_rate": 7.2442e-06, "loss": 1.4389, "step": 13790 }, { "epoch": 1.38, "grad_norm": 5.110429763793945, "learning_rate": 7.242200000000001e-06, "loss": 1.1765, "step": 13800 }, { "epoch": 1.381, "grad_norm": 8.799117088317871, "learning_rate": 7.240200000000001e-06, "loss": 1.3508, "step": 13810 }, { "epoch": 1.3820000000000001, "grad_norm": 14.751773834228516, "learning_rate": 7.2382000000000004e-06, "loss": 1.215, "step": 13820 }, { "epoch": 1.383, "grad_norm": 15.019950866699219, "learning_rate": 7.236200000000001e-06, "loss": 0.9775, "step": 13830 }, { "epoch": 1.384, "grad_norm": 8.33393383026123, "learning_rate": 7.2342e-06, "loss": 1.2834, "step": 13840 }, { "epoch": 1.385, "grad_norm": 10.742020606994629, "learning_rate": 7.2322e-06, "loss": 1.1097, "step": 13850 }, { "epoch": 1.3860000000000001, "grad_norm": 7.863580226898193, "learning_rate": 7.230200000000001e-06, "loss": 1.2195, "step": 13860 }, { "epoch": 1.387, "grad_norm": 12.246222496032715, "learning_rate": 7.2282000000000005e-06, "loss": 1.3681, "step": 13870 }, { "epoch": 1.388, "grad_norm": 25.65707015991211, "learning_rate": 7.226200000000001e-06, "loss": 1.1165, "step": 13880 }, { "epoch": 1.389, "grad_norm": 5.25365686416626, "learning_rate": 7.2242e-06, "loss": 1.3041, "step": 13890 }, { "epoch": 1.3900000000000001, "grad_norm": 10.239368438720703, "learning_rate": 7.2222e-06, "loss": 1.1155, "step": 13900 }, { "epoch": 1.391, "grad_norm": 11.383076667785645, "learning_rate": 7.220200000000001e-06, "loss": 1.6512, "step": 13910 }, { "epoch": 1.392, "grad_norm": 9.969770431518555, "learning_rate": 7.218200000000001e-06, "loss": 0.9722, "step": 13920 }, { "epoch": 1.393, "grad_norm": 25.426753997802734, "learning_rate": 7.216200000000001e-06, "loss": 1.3349, "step": 13930 }, { "epoch": 1.3940000000000001, "grad_norm": 9.211446762084961, "learning_rate": 7.2142e-06, "loss": 1.4592, "step": 13940 }, { "epoch": 1.395, "grad_norm": 7.327635288238525, "learning_rate": 7.2122e-06, "loss": 0.9169, "step": 13950 }, { "epoch": 1.396, "grad_norm": 45.76454544067383, "learning_rate": 7.210200000000001e-06, "loss": 1.3494, "step": 13960 }, { "epoch": 1.397, "grad_norm": 18.0378475189209, "learning_rate": 7.208200000000001e-06, "loss": 1.3069, "step": 13970 }, { "epoch": 1.3980000000000001, "grad_norm": 14.383275032043457, "learning_rate": 7.2062000000000006e-06, "loss": 1.3919, "step": 13980 }, { "epoch": 1.399, "grad_norm": 21.374101638793945, "learning_rate": 7.2042e-06, "loss": 1.0944, "step": 13990 }, { "epoch": 1.4, "grad_norm": 11.653973579406738, "learning_rate": 7.2022e-06, "loss": 1.1478, "step": 14000 }, { "epoch": 1.401, "grad_norm": 7.208438873291016, "learning_rate": 7.2002e-06, "loss": 1.1793, "step": 14010 }, { "epoch": 1.4020000000000001, "grad_norm": 13.47805118560791, "learning_rate": 7.198200000000001e-06, "loss": 1.1393, "step": 14020 }, { "epoch": 1.403, "grad_norm": 18.286855697631836, "learning_rate": 7.196200000000001e-06, "loss": 1.1759, "step": 14030 }, { "epoch": 1.404, "grad_norm": 24.58702278137207, "learning_rate": 7.1942e-06, "loss": 1.2584, "step": 14040 }, { "epoch": 1.405, "grad_norm": 10.144038200378418, "learning_rate": 7.1922e-06, "loss": 1.3497, "step": 14050 }, { "epoch": 1.4060000000000001, "grad_norm": 10.199417114257812, "learning_rate": 7.1902e-06, "loss": 1.2209, "step": 14060 }, { "epoch": 1.407, "grad_norm": 10.762490272521973, "learning_rate": 7.188200000000001e-06, "loss": 1.3025, "step": 14070 }, { "epoch": 1.408, "grad_norm": 8.375807762145996, "learning_rate": 7.186200000000001e-06, "loss": 1.1715, "step": 14080 }, { "epoch": 1.409, "grad_norm": 19.626306533813477, "learning_rate": 7.1842e-06, "loss": 1.4135, "step": 14090 }, { "epoch": 1.41, "grad_norm": 9.503130912780762, "learning_rate": 7.1822000000000005e-06, "loss": 1.2306, "step": 14100 }, { "epoch": 1.411, "grad_norm": 9.41046142578125, "learning_rate": 7.1802e-06, "loss": 1.3685, "step": 14110 }, { "epoch": 1.412, "grad_norm": 8.226174354553223, "learning_rate": 7.178200000000001e-06, "loss": 1.4341, "step": 14120 }, { "epoch": 1.413, "grad_norm": 9.411349296569824, "learning_rate": 7.176200000000001e-06, "loss": 1.2575, "step": 14130 }, { "epoch": 1.414, "grad_norm": 6.921158313751221, "learning_rate": 7.1742e-06, "loss": 1.2587, "step": 14140 }, { "epoch": 1.415, "grad_norm": 16.738494873046875, "learning_rate": 7.1722000000000006e-06, "loss": 1.3701, "step": 14150 }, { "epoch": 1.416, "grad_norm": 9.509946823120117, "learning_rate": 7.1702e-06, "loss": 1.1406, "step": 14160 }, { "epoch": 1.417, "grad_norm": 8.499342918395996, "learning_rate": 7.168200000000001e-06, "loss": 1.3474, "step": 14170 }, { "epoch": 1.418, "grad_norm": 6.621070861816406, "learning_rate": 7.166200000000001e-06, "loss": 0.9643, "step": 14180 }, { "epoch": 1.419, "grad_norm": 11.926077842712402, "learning_rate": 7.1642e-06, "loss": 1.2282, "step": 14190 }, { "epoch": 1.42, "grad_norm": 15.777254104614258, "learning_rate": 7.162200000000001e-06, "loss": 1.2887, "step": 14200 }, { "epoch": 1.421, "grad_norm": 14.917375564575195, "learning_rate": 7.1602000000000005e-06, "loss": 1.1224, "step": 14210 }, { "epoch": 1.422, "grad_norm": 15.165826797485352, "learning_rate": 7.1582e-06, "loss": 1.3799, "step": 14220 }, { "epoch": 1.423, "grad_norm": 13.986403465270996, "learning_rate": 7.156200000000001e-06, "loss": 1.5858, "step": 14230 }, { "epoch": 1.424, "grad_norm": 11.449603080749512, "learning_rate": 7.1542e-06, "loss": 1.4463, "step": 14240 }, { "epoch": 1.425, "grad_norm": 7.283018112182617, "learning_rate": 7.1522e-06, "loss": 1.3586, "step": 14250 }, { "epoch": 1.426, "grad_norm": 8.587998390197754, "learning_rate": 7.150200000000001e-06, "loss": 1.3836, "step": 14260 }, { "epoch": 1.427, "grad_norm": 7.399351596832275, "learning_rate": 7.1482000000000004e-06, "loss": 1.023, "step": 14270 }, { "epoch": 1.428, "grad_norm": 8.503747940063477, "learning_rate": 7.146200000000001e-06, "loss": 1.5389, "step": 14280 }, { "epoch": 1.429, "grad_norm": 5.592132568359375, "learning_rate": 7.144200000000001e-06, "loss": 1.2795, "step": 14290 }, { "epoch": 1.43, "grad_norm": 13.111690521240234, "learning_rate": 7.1422e-06, "loss": 1.2639, "step": 14300 }, { "epoch": 1.431, "grad_norm": 8.574743270874023, "learning_rate": 7.140200000000001e-06, "loss": 1.2554, "step": 14310 }, { "epoch": 1.432, "grad_norm": 7.181344509124756, "learning_rate": 7.1382000000000005e-06, "loss": 1.1177, "step": 14320 }, { "epoch": 1.433, "grad_norm": 9.98621940612793, "learning_rate": 7.136200000000001e-06, "loss": 1.4668, "step": 14330 }, { "epoch": 1.434, "grad_norm": 12.399256706237793, "learning_rate": 7.134200000000001e-06, "loss": 1.2394, "step": 14340 }, { "epoch": 1.435, "grad_norm": 6.919408321380615, "learning_rate": 7.1322e-06, "loss": 1.3338, "step": 14350 }, { "epoch": 1.436, "grad_norm": 7.572735786437988, "learning_rate": 7.130200000000001e-06, "loss": 1.3126, "step": 14360 }, { "epoch": 1.437, "grad_norm": 7.382993221282959, "learning_rate": 7.128200000000001e-06, "loss": 1.1738, "step": 14370 }, { "epoch": 1.438, "grad_norm": 7.283327579498291, "learning_rate": 7.1262000000000005e-06, "loss": 1.0332, "step": 14380 }, { "epoch": 1.439, "grad_norm": 12.89913272857666, "learning_rate": 7.124200000000001e-06, "loss": 1.1935, "step": 14390 }, { "epoch": 1.44, "grad_norm": 12.027283668518066, "learning_rate": 7.1222e-06, "loss": 1.3171, "step": 14400 }, { "epoch": 1.441, "grad_norm": 8.95368480682373, "learning_rate": 7.1202e-06, "loss": 1.1091, "step": 14410 }, { "epoch": 1.442, "grad_norm": 16.604835510253906, "learning_rate": 7.118200000000001e-06, "loss": 1.3715, "step": 14420 }, { "epoch": 1.443, "grad_norm": 15.912152290344238, "learning_rate": 7.116200000000001e-06, "loss": 1.5249, "step": 14430 }, { "epoch": 1.444, "grad_norm": 9.198110580444336, "learning_rate": 7.114200000000001e-06, "loss": 1.3095, "step": 14440 }, { "epoch": 1.445, "grad_norm": 10.667344093322754, "learning_rate": 7.1122e-06, "loss": 1.0611, "step": 14450 }, { "epoch": 1.446, "grad_norm": 7.708301544189453, "learning_rate": 7.1102e-06, "loss": 1.1965, "step": 14460 }, { "epoch": 1.447, "grad_norm": 10.360297203063965, "learning_rate": 7.108200000000001e-06, "loss": 1.0722, "step": 14470 }, { "epoch": 1.448, "grad_norm": 4.330845355987549, "learning_rate": 7.106200000000001e-06, "loss": 1.1352, "step": 14480 }, { "epoch": 1.449, "grad_norm": 9.795511245727539, "learning_rate": 7.104200000000001e-06, "loss": 1.2205, "step": 14490 }, { "epoch": 1.45, "grad_norm": 12.50283145904541, "learning_rate": 7.1022e-06, "loss": 1.593, "step": 14500 }, { "epoch": 1.451, "grad_norm": 7.855091094970703, "learning_rate": 7.1002e-06, "loss": 1.1045, "step": 14510 }, { "epoch": 1.452, "grad_norm": 11.118106842041016, "learning_rate": 7.098200000000001e-06, "loss": 1.1553, "step": 14520 }, { "epoch": 1.453, "grad_norm": 18.665342330932617, "learning_rate": 7.096200000000001e-06, "loss": 1.1672, "step": 14530 }, { "epoch": 1.454, "grad_norm": 12.489663124084473, "learning_rate": 7.0942000000000015e-06, "loss": 1.249, "step": 14540 }, { "epoch": 1.455, "grad_norm": 16.2215518951416, "learning_rate": 7.0922000000000005e-06, "loss": 1.624, "step": 14550 }, { "epoch": 1.456, "grad_norm": 13.97439193725586, "learning_rate": 7.0902e-06, "loss": 1.4765, "step": 14560 }, { "epoch": 1.457, "grad_norm": 7.63004732131958, "learning_rate": 7.088200000000001e-06, "loss": 1.0086, "step": 14570 }, { "epoch": 1.458, "grad_norm": 10.624639511108398, "learning_rate": 7.086200000000001e-06, "loss": 1.1674, "step": 14580 }, { "epoch": 1.459, "grad_norm": 10.076787948608398, "learning_rate": 7.084200000000001e-06, "loss": 1.0841, "step": 14590 }, { "epoch": 1.46, "grad_norm": 19.40399169921875, "learning_rate": 7.0822000000000006e-06, "loss": 1.2659, "step": 14600 }, { "epoch": 1.461, "grad_norm": 11.296906471252441, "learning_rate": 7.0802e-06, "loss": 1.123, "step": 14610 }, { "epoch": 1.462, "grad_norm": 4.923567295074463, "learning_rate": 7.0782e-06, "loss": 1.1108, "step": 14620 }, { "epoch": 1.463, "grad_norm": 12.006319046020508, "learning_rate": 7.076200000000001e-06, "loss": 1.2832, "step": 14630 }, { "epoch": 1.464, "grad_norm": 17.01104736328125, "learning_rate": 7.074200000000001e-06, "loss": 1.2184, "step": 14640 }, { "epoch": 1.465, "grad_norm": 15.426312446594238, "learning_rate": 7.0722e-06, "loss": 1.4265, "step": 14650 }, { "epoch": 1.466, "grad_norm": 14.093155860900879, "learning_rate": 7.0702000000000005e-06, "loss": 1.3895, "step": 14660 }, { "epoch": 1.467, "grad_norm": 11.348994255065918, "learning_rate": 7.0682e-06, "loss": 1.2954, "step": 14670 }, { "epoch": 1.468, "grad_norm": 12.182045936584473, "learning_rate": 7.066200000000001e-06, "loss": 1.2248, "step": 14680 }, { "epoch": 1.4689999999999999, "grad_norm": 9.7340087890625, "learning_rate": 7.064200000000001e-06, "loss": 1.1527, "step": 14690 }, { "epoch": 1.47, "grad_norm": 8.105725288391113, "learning_rate": 7.0622e-06, "loss": 1.4668, "step": 14700 }, { "epoch": 1.471, "grad_norm": 8.590025901794434, "learning_rate": 7.060200000000001e-06, "loss": 1.1746, "step": 14710 }, { "epoch": 1.472, "grad_norm": 7.178893089294434, "learning_rate": 7.0582000000000005e-06, "loss": 1.2665, "step": 14720 }, { "epoch": 1.4729999999999999, "grad_norm": 8.068900108337402, "learning_rate": 7.056200000000001e-06, "loss": 1.2621, "step": 14730 }, { "epoch": 1.474, "grad_norm": 10.239558219909668, "learning_rate": 7.054200000000001e-06, "loss": 1.618, "step": 14740 }, { "epoch": 1.475, "grad_norm": 6.889220237731934, "learning_rate": 7.0522e-06, "loss": 1.2678, "step": 14750 }, { "epoch": 1.476, "grad_norm": 7.706418991088867, "learning_rate": 7.050200000000001e-06, "loss": 1.3864, "step": 14760 }, { "epoch": 1.4769999999999999, "grad_norm": 7.835612773895264, "learning_rate": 7.0482000000000006e-06, "loss": 0.9621, "step": 14770 }, { "epoch": 1.478, "grad_norm": 10.503085136413574, "learning_rate": 7.0462e-06, "loss": 1.4384, "step": 14780 }, { "epoch": 1.479, "grad_norm": 10.450867652893066, "learning_rate": 7.044200000000001e-06, "loss": 1.0949, "step": 14790 }, { "epoch": 1.48, "grad_norm": 13.338970184326172, "learning_rate": 7.0422e-06, "loss": 1.3946, "step": 14800 }, { "epoch": 1.4809999999999999, "grad_norm": 7.3090033531188965, "learning_rate": 7.0402e-06, "loss": 1.0511, "step": 14810 }, { "epoch": 1.482, "grad_norm": 14.847382545471191, "learning_rate": 7.038200000000001e-06, "loss": 1.2712, "step": 14820 }, { "epoch": 1.483, "grad_norm": 8.459646224975586, "learning_rate": 7.0362000000000005e-06, "loss": 1.6407, "step": 14830 }, { "epoch": 1.484, "grad_norm": 9.749418258666992, "learning_rate": 7.034200000000001e-06, "loss": 1.3189, "step": 14840 }, { "epoch": 1.4849999999999999, "grad_norm": 10.94445514678955, "learning_rate": 7.0322e-06, "loss": 1.3561, "step": 14850 }, { "epoch": 1.486, "grad_norm": 6.846621513366699, "learning_rate": 7.0302e-06, "loss": 1.2823, "step": 14860 }, { "epoch": 1.487, "grad_norm": 10.755578994750977, "learning_rate": 7.028200000000001e-06, "loss": 1.3564, "step": 14870 }, { "epoch": 1.488, "grad_norm": 7.979917049407959, "learning_rate": 7.026200000000001e-06, "loss": 1.3215, "step": 14880 }, { "epoch": 1.4889999999999999, "grad_norm": 8.300972938537598, "learning_rate": 7.024200000000001e-06, "loss": 1.1721, "step": 14890 }, { "epoch": 1.49, "grad_norm": 8.525657653808594, "learning_rate": 7.0222e-06, "loss": 1.3552, "step": 14900 }, { "epoch": 1.491, "grad_norm": 15.526237487792969, "learning_rate": 7.0202e-06, "loss": 1.1347, "step": 14910 }, { "epoch": 1.492, "grad_norm": 10.290020942687988, "learning_rate": 7.018200000000001e-06, "loss": 1.2249, "step": 14920 }, { "epoch": 1.4929999999999999, "grad_norm": 8.185857772827148, "learning_rate": 7.016200000000001e-06, "loss": 1.3405, "step": 14930 }, { "epoch": 1.494, "grad_norm": 7.7782979011535645, "learning_rate": 7.0144e-06, "loss": 1.3489, "step": 14940 }, { "epoch": 1.495, "grad_norm": 12.587377548217773, "learning_rate": 7.0124e-06, "loss": 1.3457, "step": 14950 }, { "epoch": 1.496, "grad_norm": 9.310113906860352, "learning_rate": 7.010400000000001e-06, "loss": 1.1627, "step": 14960 }, { "epoch": 1.4969999999999999, "grad_norm": 12.446663856506348, "learning_rate": 7.0084000000000005e-06, "loss": 1.3039, "step": 14970 }, { "epoch": 1.498, "grad_norm": 10.43727970123291, "learning_rate": 7.006400000000001e-06, "loss": 1.555, "step": 14980 }, { "epoch": 1.499, "grad_norm": 6.0159783363342285, "learning_rate": 7.0044e-06, "loss": 1.2897, "step": 14990 }, { "epoch": 1.5, "grad_norm": 8.835289001464844, "learning_rate": 7.0024e-06, "loss": 1.1183, "step": 15000 }, { "epoch": 1.501, "grad_norm": 14.251409530639648, "learning_rate": 7.000400000000001e-06, "loss": 1.1171, "step": 15010 }, { "epoch": 1.502, "grad_norm": 6.640773296356201, "learning_rate": 6.998400000000001e-06, "loss": 1.2432, "step": 15020 }, { "epoch": 1.5030000000000001, "grad_norm": 15.095942497253418, "learning_rate": 6.996400000000001e-06, "loss": 1.332, "step": 15030 }, { "epoch": 1.504, "grad_norm": 8.376407623291016, "learning_rate": 6.9944e-06, "loss": 1.3453, "step": 15040 }, { "epoch": 1.505, "grad_norm": 13.045015335083008, "learning_rate": 6.9924e-06, "loss": 1.3876, "step": 15050 }, { "epoch": 1.506, "grad_norm": 15.300532341003418, "learning_rate": 6.990400000000001e-06, "loss": 1.1918, "step": 15060 }, { "epoch": 1.5070000000000001, "grad_norm": 9.770393371582031, "learning_rate": 6.988400000000001e-06, "loss": 1.2393, "step": 15070 }, { "epoch": 1.508, "grad_norm": 13.042874336242676, "learning_rate": 6.9864000000000006e-06, "loss": 1.316, "step": 15080 }, { "epoch": 1.509, "grad_norm": 9.384196281433105, "learning_rate": 6.9844e-06, "loss": 1.3525, "step": 15090 }, { "epoch": 1.51, "grad_norm": 12.296027183532715, "learning_rate": 6.9824e-06, "loss": 0.99, "step": 15100 }, { "epoch": 1.5110000000000001, "grad_norm": 9.712472915649414, "learning_rate": 6.9804e-06, "loss": 1.2717, "step": 15110 }, { "epoch": 1.512, "grad_norm": 13.843992233276367, "learning_rate": 6.978400000000001e-06, "loss": 1.3215, "step": 15120 }, { "epoch": 1.513, "grad_norm": 4.951822757720947, "learning_rate": 6.976400000000001e-06, "loss": 1.067, "step": 15130 }, { "epoch": 1.514, "grad_norm": 11.851088523864746, "learning_rate": 6.9744e-06, "loss": 1.4878, "step": 15140 }, { "epoch": 1.5150000000000001, "grad_norm": 10.459698677062988, "learning_rate": 6.9724e-06, "loss": 1.2268, "step": 15150 }, { "epoch": 1.516, "grad_norm": 8.015499114990234, "learning_rate": 6.9704e-06, "loss": 1.216, "step": 15160 }, { "epoch": 1.517, "grad_norm": 10.600618362426758, "learning_rate": 6.968400000000001e-06, "loss": 1.1585, "step": 15170 }, { "epoch": 1.518, "grad_norm": 10.666069030761719, "learning_rate": 6.966400000000001e-06, "loss": 1.4376, "step": 15180 }, { "epoch": 1.5190000000000001, "grad_norm": 7.6960883140563965, "learning_rate": 6.9644e-06, "loss": 1.4915, "step": 15190 }, { "epoch": 1.52, "grad_norm": 11.85871410369873, "learning_rate": 6.9624000000000005e-06, "loss": 1.2628, "step": 15200 }, { "epoch": 1.521, "grad_norm": 6.816941738128662, "learning_rate": 6.9604e-06, "loss": 1.2412, "step": 15210 }, { "epoch": 1.522, "grad_norm": 8.195503234863281, "learning_rate": 6.958400000000001e-06, "loss": 1.5238, "step": 15220 }, { "epoch": 1.5230000000000001, "grad_norm": 7.451001167297363, "learning_rate": 6.956400000000001e-06, "loss": 1.1515, "step": 15230 }, { "epoch": 1.524, "grad_norm": 10.793375968933105, "learning_rate": 6.9544e-06, "loss": 1.311, "step": 15240 }, { "epoch": 1.525, "grad_norm": 8.507076263427734, "learning_rate": 6.9524000000000006e-06, "loss": 1.4713, "step": 15250 }, { "epoch": 1.526, "grad_norm": 7.383382320404053, "learning_rate": 6.9504e-06, "loss": 1.1582, "step": 15260 }, { "epoch": 1.5270000000000001, "grad_norm": 4.811766624450684, "learning_rate": 6.948400000000001e-06, "loss": 1.2241, "step": 15270 }, { "epoch": 1.528, "grad_norm": 7.810732364654541, "learning_rate": 6.946400000000001e-06, "loss": 1.2553, "step": 15280 }, { "epoch": 1.529, "grad_norm": 10.471966743469238, "learning_rate": 6.9444e-06, "loss": 1.1727, "step": 15290 }, { "epoch": 1.53, "grad_norm": 12.64487075805664, "learning_rate": 6.942400000000001e-06, "loss": 1.368, "step": 15300 }, { "epoch": 1.5310000000000001, "grad_norm": 8.26953411102295, "learning_rate": 6.9404000000000005e-06, "loss": 1.5585, "step": 15310 }, { "epoch": 1.532, "grad_norm": 6.571100234985352, "learning_rate": 6.9384e-06, "loss": 1.2326, "step": 15320 }, { "epoch": 1.533, "grad_norm": 7.671020984649658, "learning_rate": 6.936400000000001e-06, "loss": 1.2642, "step": 15330 }, { "epoch": 1.534, "grad_norm": 8.543424606323242, "learning_rate": 6.9344e-06, "loss": 1.1284, "step": 15340 }, { "epoch": 1.5350000000000001, "grad_norm": 4.385470390319824, "learning_rate": 6.9324e-06, "loss": 1.1937, "step": 15350 }, { "epoch": 1.536, "grad_norm": 8.393011093139648, "learning_rate": 6.930400000000001e-06, "loss": 1.2204, "step": 15360 }, { "epoch": 1.537, "grad_norm": 7.623699188232422, "learning_rate": 6.9284000000000004e-06, "loss": 1.3624, "step": 15370 }, { "epoch": 1.538, "grad_norm": 13.1136474609375, "learning_rate": 6.926400000000001e-06, "loss": 1.1301, "step": 15380 }, { "epoch": 1.5390000000000001, "grad_norm": 15.149060249328613, "learning_rate": 6.924400000000001e-06, "loss": 1.3565, "step": 15390 }, { "epoch": 1.54, "grad_norm": 10.372309684753418, "learning_rate": 6.9224e-06, "loss": 1.261, "step": 15400 }, { "epoch": 1.541, "grad_norm": 10.722027778625488, "learning_rate": 6.920400000000001e-06, "loss": 1.2085, "step": 15410 }, { "epoch": 1.542, "grad_norm": 7.869482040405273, "learning_rate": 6.9184000000000005e-06, "loss": 1.1406, "step": 15420 }, { "epoch": 1.5430000000000001, "grad_norm": 6.035120964050293, "learning_rate": 6.916400000000001e-06, "loss": 1.2538, "step": 15430 }, { "epoch": 1.544, "grad_norm": 9.672089576721191, "learning_rate": 6.914400000000001e-06, "loss": 1.2618, "step": 15440 }, { "epoch": 1.545, "grad_norm": 12.393998146057129, "learning_rate": 6.9124e-06, "loss": 1.2188, "step": 15450 }, { "epoch": 1.546, "grad_norm": 11.402376174926758, "learning_rate": 6.910400000000001e-06, "loss": 1.3358, "step": 15460 }, { "epoch": 1.5470000000000002, "grad_norm": 10.75451946258545, "learning_rate": 6.908400000000001e-06, "loss": 1.5348, "step": 15470 }, { "epoch": 1.548, "grad_norm": 9.740921974182129, "learning_rate": 6.9064000000000005e-06, "loss": 1.2696, "step": 15480 }, { "epoch": 1.549, "grad_norm": 16.461273193359375, "learning_rate": 6.904400000000001e-06, "loss": 1.2213, "step": 15490 }, { "epoch": 1.55, "grad_norm": 9.36241626739502, "learning_rate": 6.9024e-06, "loss": 1.2269, "step": 15500 }, { "epoch": 1.5510000000000002, "grad_norm": 12.490897178649902, "learning_rate": 6.9004e-06, "loss": 1.3068, "step": 15510 }, { "epoch": 1.552, "grad_norm": 6.087769508361816, "learning_rate": 6.898400000000001e-06, "loss": 1.208, "step": 15520 }, { "epoch": 1.553, "grad_norm": 8.76059341430664, "learning_rate": 6.896400000000001e-06, "loss": 1.2751, "step": 15530 }, { "epoch": 1.554, "grad_norm": 13.79126262664795, "learning_rate": 6.894400000000001e-06, "loss": 1.4272, "step": 15540 }, { "epoch": 1.5550000000000002, "grad_norm": 16.097034454345703, "learning_rate": 6.8924e-06, "loss": 1.4969, "step": 15550 }, { "epoch": 1.556, "grad_norm": 5.611037731170654, "learning_rate": 6.8904e-06, "loss": 1.0194, "step": 15560 }, { "epoch": 1.557, "grad_norm": 7.556801795959473, "learning_rate": 6.888400000000001e-06, "loss": 1.5208, "step": 15570 }, { "epoch": 1.558, "grad_norm": 10.230359077453613, "learning_rate": 6.886400000000001e-06, "loss": 1.4003, "step": 15580 }, { "epoch": 1.5590000000000002, "grad_norm": 12.046246528625488, "learning_rate": 6.884400000000001e-06, "loss": 1.4734, "step": 15590 }, { "epoch": 1.56, "grad_norm": 9.367044448852539, "learning_rate": 6.8824e-06, "loss": 1.2689, "step": 15600 }, { "epoch": 1.561, "grad_norm": 7.186966896057129, "learning_rate": 6.8804e-06, "loss": 1.1527, "step": 15610 }, { "epoch": 1.562, "grad_norm": 9.415060043334961, "learning_rate": 6.878400000000001e-06, "loss": 1.3688, "step": 15620 }, { "epoch": 1.563, "grad_norm": 6.915694713592529, "learning_rate": 6.876400000000001e-06, "loss": 1.0726, "step": 15630 }, { "epoch": 1.564, "grad_norm": 9.139344215393066, "learning_rate": 6.8744000000000015e-06, "loss": 1.6267, "step": 15640 }, { "epoch": 1.565, "grad_norm": 5.538430213928223, "learning_rate": 6.8724000000000005e-06, "loss": 0.9996, "step": 15650 }, { "epoch": 1.5659999999999998, "grad_norm": 7.616328239440918, "learning_rate": 6.8704e-06, "loss": 1.3839, "step": 15660 }, { "epoch": 1.567, "grad_norm": 8.232300758361816, "learning_rate": 6.868400000000001e-06, "loss": 1.133, "step": 15670 }, { "epoch": 1.568, "grad_norm": 7.298717498779297, "learning_rate": 6.866400000000001e-06, "loss": 1.1124, "step": 15680 }, { "epoch": 1.569, "grad_norm": 40.63441467285156, "learning_rate": 6.864400000000001e-06, "loss": 1.4423, "step": 15690 }, { "epoch": 1.5699999999999998, "grad_norm": 11.835037231445312, "learning_rate": 6.8624000000000006e-06, "loss": 1.3468, "step": 15700 }, { "epoch": 1.571, "grad_norm": 11.663403511047363, "learning_rate": 6.8604e-06, "loss": 1.4425, "step": 15710 }, { "epoch": 1.572, "grad_norm": 8.62054443359375, "learning_rate": 6.8584e-06, "loss": 1.0212, "step": 15720 }, { "epoch": 1.573, "grad_norm": 9.630745887756348, "learning_rate": 6.856400000000001e-06, "loss": 1.2821, "step": 15730 }, { "epoch": 1.5739999999999998, "grad_norm": 6.202783584594727, "learning_rate": 6.854400000000001e-06, "loss": 1.3381, "step": 15740 }, { "epoch": 1.575, "grad_norm": 7.880337715148926, "learning_rate": 6.8524e-06, "loss": 1.3547, "step": 15750 }, { "epoch": 1.576, "grad_norm": 5.5128302574157715, "learning_rate": 6.8504000000000005e-06, "loss": 1.3962, "step": 15760 }, { "epoch": 1.577, "grad_norm": 8.24344253540039, "learning_rate": 6.8484e-06, "loss": 1.2654, "step": 15770 }, { "epoch": 1.5779999999999998, "grad_norm": 7.074611186981201, "learning_rate": 6.846400000000001e-06, "loss": 1.3224, "step": 15780 }, { "epoch": 1.579, "grad_norm": 5.93052339553833, "learning_rate": 6.844400000000001e-06, "loss": 1.2586, "step": 15790 }, { "epoch": 1.58, "grad_norm": 10.185494422912598, "learning_rate": 6.8424e-06, "loss": 1.2511, "step": 15800 }, { "epoch": 1.581, "grad_norm": 10.210550308227539, "learning_rate": 6.840400000000001e-06, "loss": 1.3766, "step": 15810 }, { "epoch": 1.5819999999999999, "grad_norm": 10.389945030212402, "learning_rate": 6.8384000000000005e-06, "loss": 1.481, "step": 15820 }, { "epoch": 1.583, "grad_norm": 6.6233696937561035, "learning_rate": 6.836400000000001e-06, "loss": 1.1115, "step": 15830 }, { "epoch": 1.584, "grad_norm": 8.74160099029541, "learning_rate": 6.834400000000001e-06, "loss": 1.106, "step": 15840 }, { "epoch": 1.585, "grad_norm": 12.481822967529297, "learning_rate": 6.8324e-06, "loss": 1.2458, "step": 15850 }, { "epoch": 1.5859999999999999, "grad_norm": 8.929315567016602, "learning_rate": 6.830400000000001e-06, "loss": 1.2746, "step": 15860 }, { "epoch": 1.587, "grad_norm": 13.123950958251953, "learning_rate": 6.8284000000000006e-06, "loss": 1.2344, "step": 15870 }, { "epoch": 1.588, "grad_norm": 14.25854778289795, "learning_rate": 6.8264e-06, "loss": 1.4685, "step": 15880 }, { "epoch": 1.589, "grad_norm": 11.991083145141602, "learning_rate": 6.824400000000001e-06, "loss": 0.9578, "step": 15890 }, { "epoch": 1.5899999999999999, "grad_norm": 6.1386847496032715, "learning_rate": 6.8224e-06, "loss": 0.9362, "step": 15900 }, { "epoch": 1.591, "grad_norm": 17.973047256469727, "learning_rate": 6.8204e-06, "loss": 1.3325, "step": 15910 }, { "epoch": 1.592, "grad_norm": 11.559574127197266, "learning_rate": 6.818400000000001e-06, "loss": 1.223, "step": 15920 }, { "epoch": 1.593, "grad_norm": 10.577466011047363, "learning_rate": 6.8164000000000005e-06, "loss": 0.9836, "step": 15930 }, { "epoch": 1.5939999999999999, "grad_norm": 18.92836570739746, "learning_rate": 6.814400000000001e-06, "loss": 1.1919, "step": 15940 }, { "epoch": 1.595, "grad_norm": 7.690203666687012, "learning_rate": 6.8124e-06, "loss": 1.606, "step": 15950 }, { "epoch": 1.596, "grad_norm": 9.744142532348633, "learning_rate": 6.8104e-06, "loss": 1.3984, "step": 15960 }, { "epoch": 1.597, "grad_norm": 14.467313766479492, "learning_rate": 6.808400000000001e-06, "loss": 1.4522, "step": 15970 }, { "epoch": 1.5979999999999999, "grad_norm": 16.621519088745117, "learning_rate": 6.806400000000001e-06, "loss": 1.7655, "step": 15980 }, { "epoch": 1.599, "grad_norm": 9.667387008666992, "learning_rate": 6.804400000000001e-06, "loss": 1.4684, "step": 15990 }, { "epoch": 1.6, "grad_norm": 7.403086185455322, "learning_rate": 6.8024e-06, "loss": 1.4688, "step": 16000 }, { "epoch": 1.601, "grad_norm": 6.314653396606445, "learning_rate": 6.8004e-06, "loss": 1.4153, "step": 16010 }, { "epoch": 1.6019999999999999, "grad_norm": 5.106753349304199, "learning_rate": 6.798400000000001e-06, "loss": 1.1921, "step": 16020 }, { "epoch": 1.603, "grad_norm": 7.4833083152771, "learning_rate": 6.796400000000001e-06, "loss": 1.2907, "step": 16030 }, { "epoch": 1.604, "grad_norm": 8.037872314453125, "learning_rate": 6.794400000000001e-06, "loss": 1.1569, "step": 16040 }, { "epoch": 1.605, "grad_norm": 7.979098320007324, "learning_rate": 6.7924e-06, "loss": 1.2342, "step": 16050 }, { "epoch": 1.6059999999999999, "grad_norm": 8.758862495422363, "learning_rate": 6.7904e-06, "loss": 1.2851, "step": 16060 }, { "epoch": 1.607, "grad_norm": 8.418973922729492, "learning_rate": 6.788400000000001e-06, "loss": 1.0061, "step": 16070 }, { "epoch": 1.608, "grad_norm": 11.592930793762207, "learning_rate": 6.786400000000001e-06, "loss": 1.3947, "step": 16080 }, { "epoch": 1.609, "grad_norm": 7.405920505523682, "learning_rate": 6.784400000000001e-06, "loss": 1.4812, "step": 16090 }, { "epoch": 1.6099999999999999, "grad_norm": 9.778947830200195, "learning_rate": 6.7824000000000005e-06, "loss": 1.1934, "step": 16100 }, { "epoch": 1.611, "grad_norm": 7.781137466430664, "learning_rate": 6.7804e-06, "loss": 1.3795, "step": 16110 }, { "epoch": 1.612, "grad_norm": 12.58737850189209, "learning_rate": 6.7784e-06, "loss": 0.934, "step": 16120 }, { "epoch": 1.613, "grad_norm": 8.265934944152832, "learning_rate": 6.776400000000001e-06, "loss": 1.3287, "step": 16130 }, { "epoch": 1.6139999999999999, "grad_norm": 7.968300819396973, "learning_rate": 6.774400000000001e-06, "loss": 0.9706, "step": 16140 }, { "epoch": 1.615, "grad_norm": 9.024754524230957, "learning_rate": 6.7724e-06, "loss": 1.2474, "step": 16150 }, { "epoch": 1.616, "grad_norm": 8.75287914276123, "learning_rate": 6.7704000000000004e-06, "loss": 1.2792, "step": 16160 }, { "epoch": 1.617, "grad_norm": 20.98048210144043, "learning_rate": 6.7684e-06, "loss": 1.2669, "step": 16170 }, { "epoch": 1.6179999999999999, "grad_norm": 13.922874450683594, "learning_rate": 6.766400000000001e-06, "loss": 1.3839, "step": 16180 }, { "epoch": 1.619, "grad_norm": 13.084990501403809, "learning_rate": 6.764400000000001e-06, "loss": 0.9566, "step": 16190 }, { "epoch": 1.62, "grad_norm": 16.917865753173828, "learning_rate": 6.7624e-06, "loss": 1.4323, "step": 16200 }, { "epoch": 1.621, "grad_norm": 6.447714805603027, "learning_rate": 6.7604000000000005e-06, "loss": 0.7878, "step": 16210 }, { "epoch": 1.6219999999999999, "grad_norm": 17.720760345458984, "learning_rate": 6.7584e-06, "loss": 1.445, "step": 16220 }, { "epoch": 1.623, "grad_norm": 20.45992088317871, "learning_rate": 6.756400000000001e-06, "loss": 1.1096, "step": 16230 }, { "epoch": 1.624, "grad_norm": 17.10667610168457, "learning_rate": 6.754400000000001e-06, "loss": 1.1297, "step": 16240 }, { "epoch": 1.625, "grad_norm": 10.260255813598633, "learning_rate": 6.7524e-06, "loss": 1.0665, "step": 16250 }, { "epoch": 1.626, "grad_norm": 9.18688678741455, "learning_rate": 6.750400000000001e-06, "loss": 1.6257, "step": 16260 }, { "epoch": 1.627, "grad_norm": 20.15984344482422, "learning_rate": 6.7484000000000005e-06, "loss": 1.3876, "step": 16270 }, { "epoch": 1.6280000000000001, "grad_norm": 15.147734642028809, "learning_rate": 6.7464e-06, "loss": 1.3788, "step": 16280 }, { "epoch": 1.629, "grad_norm": 13.415728569030762, "learning_rate": 6.744400000000001e-06, "loss": 1.4822, "step": 16290 }, { "epoch": 1.63, "grad_norm": 12.754487037658691, "learning_rate": 6.7424e-06, "loss": 1.3638, "step": 16300 }, { "epoch": 1.631, "grad_norm": 11.726311683654785, "learning_rate": 6.7404e-06, "loss": 1.4817, "step": 16310 }, { "epoch": 1.6320000000000001, "grad_norm": 11.55234146118164, "learning_rate": 6.7384000000000006e-06, "loss": 1.187, "step": 16320 }, { "epoch": 1.633, "grad_norm": 8.348124504089355, "learning_rate": 6.7364e-06, "loss": 1.1961, "step": 16330 }, { "epoch": 1.634, "grad_norm": 7.098602771759033, "learning_rate": 6.734400000000001e-06, "loss": 1.3223, "step": 16340 }, { "epoch": 1.635, "grad_norm": 8.422306060791016, "learning_rate": 6.7324e-06, "loss": 1.3164, "step": 16350 }, { "epoch": 1.6360000000000001, "grad_norm": 8.00632095336914, "learning_rate": 6.7304e-06, "loss": 1.2821, "step": 16360 }, { "epoch": 1.637, "grad_norm": 12.497580528259277, "learning_rate": 6.728400000000001e-06, "loss": 1.0983, "step": 16370 }, { "epoch": 1.638, "grad_norm": 9.785317420959473, "learning_rate": 6.7264000000000005e-06, "loss": 1.4127, "step": 16380 }, { "epoch": 1.639, "grad_norm": 12.544650077819824, "learning_rate": 6.724400000000001e-06, "loss": 1.3863, "step": 16390 }, { "epoch": 1.6400000000000001, "grad_norm": 5.582592487335205, "learning_rate": 6.7224e-06, "loss": 0.8999, "step": 16400 }, { "epoch": 1.641, "grad_norm": 11.4360990524292, "learning_rate": 6.7204e-06, "loss": 1.243, "step": 16410 }, { "epoch": 1.642, "grad_norm": 19.785261154174805, "learning_rate": 6.718400000000001e-06, "loss": 1.4591, "step": 16420 }, { "epoch": 1.643, "grad_norm": 15.07401180267334, "learning_rate": 6.716400000000001e-06, "loss": 1.3336, "step": 16430 }, { "epoch": 1.6440000000000001, "grad_norm": 8.21678352355957, "learning_rate": 6.714400000000001e-06, "loss": 1.3294, "step": 16440 }, { "epoch": 1.645, "grad_norm": 9.412454605102539, "learning_rate": 6.7124e-06, "loss": 1.1666, "step": 16450 }, { "epoch": 1.646, "grad_norm": 9.049200057983398, "learning_rate": 6.7104e-06, "loss": 1.4192, "step": 16460 }, { "epoch": 1.647, "grad_norm": 13.69751262664795, "learning_rate": 6.708400000000001e-06, "loss": 1.3504, "step": 16470 }, { "epoch": 1.6480000000000001, "grad_norm": 12.44576644897461, "learning_rate": 6.706400000000001e-06, "loss": 1.3031, "step": 16480 }, { "epoch": 1.649, "grad_norm": 6.793119430541992, "learning_rate": 6.7044000000000006e-06, "loss": 1.2901, "step": 16490 }, { "epoch": 1.65, "grad_norm": 17.76980972290039, "learning_rate": 6.7024e-06, "loss": 1.4884, "step": 16500 }, { "epoch": 1.651, "grad_norm": 7.219597339630127, "learning_rate": 6.7004e-06, "loss": 1.2155, "step": 16510 }, { "epoch": 1.6520000000000001, "grad_norm": 7.737342834472656, "learning_rate": 6.6984e-06, "loss": 1.2364, "step": 16520 }, { "epoch": 1.653, "grad_norm": 8.292745590209961, "learning_rate": 6.696400000000001e-06, "loss": 1.1275, "step": 16530 }, { "epoch": 1.654, "grad_norm": 9.836957931518555, "learning_rate": 6.694400000000001e-06, "loss": 1.279, "step": 16540 }, { "epoch": 1.655, "grad_norm": 13.66272258758545, "learning_rate": 6.6924e-06, "loss": 1.5252, "step": 16550 }, { "epoch": 1.6560000000000001, "grad_norm": 8.891858100891113, "learning_rate": 6.6904e-06, "loss": 1.2793, "step": 16560 }, { "epoch": 1.657, "grad_norm": 12.409144401550293, "learning_rate": 6.6884e-06, "loss": 1.3435, "step": 16570 }, { "epoch": 1.658, "grad_norm": 6.139773368835449, "learning_rate": 6.686400000000001e-06, "loss": 1.4227, "step": 16580 }, { "epoch": 1.659, "grad_norm": 8.435040473937988, "learning_rate": 6.684400000000001e-06, "loss": 1.2365, "step": 16590 }, { "epoch": 1.6600000000000001, "grad_norm": 6.335344314575195, "learning_rate": 6.6824e-06, "loss": 1.2296, "step": 16600 }, { "epoch": 1.661, "grad_norm": 7.833021640777588, "learning_rate": 6.6804000000000004e-06, "loss": 1.2856, "step": 16610 }, { "epoch": 1.662, "grad_norm": 10.130121231079102, "learning_rate": 6.6784e-06, "loss": 1.2319, "step": 16620 }, { "epoch": 1.663, "grad_norm": 18.269407272338867, "learning_rate": 6.676400000000001e-06, "loss": 1.7172, "step": 16630 }, { "epoch": 1.6640000000000001, "grad_norm": 9.960505485534668, "learning_rate": 6.674400000000001e-06, "loss": 1.0778, "step": 16640 }, { "epoch": 1.665, "grad_norm": 15.05043888092041, "learning_rate": 6.672400000000001e-06, "loss": 1.2856, "step": 16650 }, { "epoch": 1.666, "grad_norm": 11.168231010437012, "learning_rate": 6.6704000000000005e-06, "loss": 1.3871, "step": 16660 }, { "epoch": 1.667, "grad_norm": 10.84443187713623, "learning_rate": 6.6684e-06, "loss": 0.9542, "step": 16670 }, { "epoch": 1.6680000000000001, "grad_norm": 10.474294662475586, "learning_rate": 6.6664e-06, "loss": 1.3811, "step": 16680 }, { "epoch": 1.669, "grad_norm": 9.51667308807373, "learning_rate": 6.664400000000001e-06, "loss": 1.0526, "step": 16690 }, { "epoch": 1.67, "grad_norm": 10.226188659667969, "learning_rate": 6.662400000000001e-06, "loss": 1.2785, "step": 16700 }, { "epoch": 1.671, "grad_norm": 9.766279220581055, "learning_rate": 6.6604e-06, "loss": 1.2407, "step": 16710 }, { "epoch": 1.6720000000000002, "grad_norm": 10.285801887512207, "learning_rate": 6.6584000000000005e-06, "loss": 1.3033, "step": 16720 }, { "epoch": 1.673, "grad_norm": 20.798824310302734, "learning_rate": 6.6564e-06, "loss": 1.2847, "step": 16730 }, { "epoch": 1.674, "grad_norm": 11.801972389221191, "learning_rate": 6.654400000000001e-06, "loss": 1.2097, "step": 16740 }, { "epoch": 1.675, "grad_norm": 19.681108474731445, "learning_rate": 6.652400000000001e-06, "loss": 1.1467, "step": 16750 }, { "epoch": 1.6760000000000002, "grad_norm": 8.647676467895508, "learning_rate": 6.6504e-06, "loss": 1.2569, "step": 16760 }, { "epoch": 1.677, "grad_norm": 13.813872337341309, "learning_rate": 6.648400000000001e-06, "loss": 1.6541, "step": 16770 }, { "epoch": 1.678, "grad_norm": 10.192971229553223, "learning_rate": 6.6464000000000004e-06, "loss": 1.7543, "step": 16780 }, { "epoch": 1.679, "grad_norm": 5.64166259765625, "learning_rate": 6.644400000000001e-06, "loss": 1.1935, "step": 16790 }, { "epoch": 1.6800000000000002, "grad_norm": 7.685790538787842, "learning_rate": 6.642400000000001e-06, "loss": 1.3182, "step": 16800 }, { "epoch": 1.681, "grad_norm": 8.959681510925293, "learning_rate": 6.6404e-06, "loss": 1.0787, "step": 16810 }, { "epoch": 1.682, "grad_norm": 8.126368522644043, "learning_rate": 6.638400000000001e-06, "loss": 1.2879, "step": 16820 }, { "epoch": 1.683, "grad_norm": 12.985515594482422, "learning_rate": 6.6364000000000005e-06, "loss": 1.0624, "step": 16830 }, { "epoch": 1.6840000000000002, "grad_norm": 5.611485004425049, "learning_rate": 6.634400000000001e-06, "loss": 1.2374, "step": 16840 }, { "epoch": 1.685, "grad_norm": 12.504964828491211, "learning_rate": 6.632400000000001e-06, "loss": 1.4553, "step": 16850 }, { "epoch": 1.686, "grad_norm": 7.811570167541504, "learning_rate": 6.6304e-06, "loss": 1.3596, "step": 16860 }, { "epoch": 1.687, "grad_norm": 10.156920433044434, "learning_rate": 6.628400000000001e-06, "loss": 1.4218, "step": 16870 }, { "epoch": 1.688, "grad_norm": 7.5734782218933105, "learning_rate": 6.626400000000001e-06, "loss": 1.5275, "step": 16880 }, { "epoch": 1.689, "grad_norm": 10.033520698547363, "learning_rate": 6.6244000000000005e-06, "loss": 1.265, "step": 16890 }, { "epoch": 1.69, "grad_norm": 9.702759742736816, "learning_rate": 6.622400000000001e-06, "loss": 1.0825, "step": 16900 }, { "epoch": 1.6909999999999998, "grad_norm": 9.27287769317627, "learning_rate": 6.6204e-06, "loss": 1.2783, "step": 16910 }, { "epoch": 1.692, "grad_norm": 10.269192695617676, "learning_rate": 6.6184e-06, "loss": 1.0744, "step": 16920 }, { "epoch": 1.693, "grad_norm": 7.256290435791016, "learning_rate": 6.616400000000001e-06, "loss": 1.3133, "step": 16930 }, { "epoch": 1.694, "grad_norm": 7.132981300354004, "learning_rate": 6.6144000000000006e-06, "loss": 1.5425, "step": 16940 }, { "epoch": 1.6949999999999998, "grad_norm": 9.083227157592773, "learning_rate": 6.612400000000001e-06, "loss": 1.4124, "step": 16950 }, { "epoch": 1.696, "grad_norm": 5.652279376983643, "learning_rate": 6.6104e-06, "loss": 1.0046, "step": 16960 }, { "epoch": 1.697, "grad_norm": 11.049751281738281, "learning_rate": 6.6084e-06, "loss": 1.1062, "step": 16970 }, { "epoch": 1.698, "grad_norm": 9.064228057861328, "learning_rate": 6.606400000000001e-06, "loss": 1.1051, "step": 16980 }, { "epoch": 1.6989999999999998, "grad_norm": 13.324773788452148, "learning_rate": 6.604400000000001e-06, "loss": 1.4847, "step": 16990 }, { "epoch": 1.7, "grad_norm": 5.134594917297363, "learning_rate": 6.602400000000001e-06, "loss": 1.1394, "step": 17000 }, { "epoch": 1.701, "grad_norm": 10.17231559753418, "learning_rate": 6.6004e-06, "loss": 1.2347, "step": 17010 }, { "epoch": 1.702, "grad_norm": 21.803075790405273, "learning_rate": 6.5984e-06, "loss": 1.4871, "step": 17020 }, { "epoch": 1.7029999999999998, "grad_norm": 17.74017333984375, "learning_rate": 6.596400000000001e-06, "loss": 1.3803, "step": 17030 }, { "epoch": 1.704, "grad_norm": 9.66507339477539, "learning_rate": 6.594400000000001e-06, "loss": 1.2299, "step": 17040 }, { "epoch": 1.705, "grad_norm": 11.197858810424805, "learning_rate": 6.592400000000001e-06, "loss": 1.4647, "step": 17050 }, { "epoch": 1.706, "grad_norm": 9.514790534973145, "learning_rate": 6.5904000000000005e-06, "loss": 1.3154, "step": 17060 }, { "epoch": 1.7069999999999999, "grad_norm": 5.992875099182129, "learning_rate": 6.5884e-06, "loss": 1.4336, "step": 17070 }, { "epoch": 1.708, "grad_norm": 7.111988067626953, "learning_rate": 6.5864e-06, "loss": 1.0715, "step": 17080 }, { "epoch": 1.709, "grad_norm": 7.063281059265137, "learning_rate": 6.584400000000001e-06, "loss": 1.0936, "step": 17090 }, { "epoch": 1.71, "grad_norm": 11.918425559997559, "learning_rate": 6.582400000000001e-06, "loss": 1.5198, "step": 17100 }, { "epoch": 1.7109999999999999, "grad_norm": 10.749302864074707, "learning_rate": 6.5804e-06, "loss": 1.3205, "step": 17110 }, { "epoch": 1.712, "grad_norm": 9.641958236694336, "learning_rate": 6.5784e-06, "loss": 1.2355, "step": 17120 }, { "epoch": 1.713, "grad_norm": 9.374872207641602, "learning_rate": 6.5764e-06, "loss": 1.4803, "step": 17130 }, { "epoch": 1.714, "grad_norm": 9.141468048095703, "learning_rate": 6.574400000000001e-06, "loss": 1.4271, "step": 17140 }, { "epoch": 1.7149999999999999, "grad_norm": 7.918256759643555, "learning_rate": 6.572400000000001e-06, "loss": 1.1962, "step": 17150 }, { "epoch": 1.716, "grad_norm": 7.652858734130859, "learning_rate": 6.5704e-06, "loss": 0.9708, "step": 17160 }, { "epoch": 1.717, "grad_norm": 13.36904525756836, "learning_rate": 6.5684000000000005e-06, "loss": 1.4896, "step": 17170 }, { "epoch": 1.718, "grad_norm": 12.76654052734375, "learning_rate": 6.5664e-06, "loss": 1.0748, "step": 17180 }, { "epoch": 1.7189999999999999, "grad_norm": 14.119227409362793, "learning_rate": 6.564400000000001e-06, "loss": 1.3388, "step": 17190 }, { "epoch": 1.72, "grad_norm": 8.746657371520996, "learning_rate": 6.562400000000001e-06, "loss": 1.3469, "step": 17200 }, { "epoch": 1.721, "grad_norm": 10.551569938659668, "learning_rate": 6.5604e-06, "loss": 1.3264, "step": 17210 }, { "epoch": 1.722, "grad_norm": 8.185721397399902, "learning_rate": 6.558400000000001e-06, "loss": 1.3188, "step": 17220 }, { "epoch": 1.7229999999999999, "grad_norm": 6.38791036605835, "learning_rate": 6.5564000000000004e-06, "loss": 1.1264, "step": 17230 }, { "epoch": 1.724, "grad_norm": 7.857933044433594, "learning_rate": 6.554400000000001e-06, "loss": 1.3709, "step": 17240 }, { "epoch": 1.725, "grad_norm": 11.569741249084473, "learning_rate": 6.552400000000001e-06, "loss": 1.1421, "step": 17250 }, { "epoch": 1.726, "grad_norm": 8.844929695129395, "learning_rate": 6.5504e-06, "loss": 1.089, "step": 17260 }, { "epoch": 1.7269999999999999, "grad_norm": 8.79920482635498, "learning_rate": 6.548400000000001e-06, "loss": 1.3023, "step": 17270 }, { "epoch": 1.728, "grad_norm": 10.635369300842285, "learning_rate": 6.5464000000000005e-06, "loss": 1.1822, "step": 17280 }, { "epoch": 1.729, "grad_norm": 6.25326681137085, "learning_rate": 6.5444e-06, "loss": 1.3913, "step": 17290 }, { "epoch": 1.73, "grad_norm": 13.307232856750488, "learning_rate": 6.542400000000001e-06, "loss": 1.2711, "step": 17300 }, { "epoch": 1.7309999999999999, "grad_norm": 5.055656433105469, "learning_rate": 6.5404e-06, "loss": 0.9969, "step": 17310 }, { "epoch": 1.732, "grad_norm": 3.5405616760253906, "learning_rate": 6.5384e-06, "loss": 1.1887, "step": 17320 }, { "epoch": 1.733, "grad_norm": 12.887344360351562, "learning_rate": 6.536400000000001e-06, "loss": 1.5161, "step": 17330 }, { "epoch": 1.734, "grad_norm": 10.14722728729248, "learning_rate": 6.5344000000000005e-06, "loss": 1.1244, "step": 17340 }, { "epoch": 1.7349999999999999, "grad_norm": 9.097382545471191, "learning_rate": 6.532400000000001e-06, "loss": 1.3622, "step": 17350 }, { "epoch": 1.736, "grad_norm": 8.575661659240723, "learning_rate": 6.5304e-06, "loss": 1.2681, "step": 17360 }, { "epoch": 1.737, "grad_norm": 14.572662353515625, "learning_rate": 6.5284e-06, "loss": 1.2012, "step": 17370 }, { "epoch": 1.738, "grad_norm": 10.77018928527832, "learning_rate": 6.526400000000001e-06, "loss": 1.3568, "step": 17380 }, { "epoch": 1.7389999999999999, "grad_norm": 19.47421646118164, "learning_rate": 6.524400000000001e-06, "loss": 1.1234, "step": 17390 }, { "epoch": 1.74, "grad_norm": 11.936248779296875, "learning_rate": 6.522400000000001e-06, "loss": 1.1869, "step": 17400 }, { "epoch": 1.741, "grad_norm": 9.056058883666992, "learning_rate": 6.5204e-06, "loss": 1.4234, "step": 17410 }, { "epoch": 1.742, "grad_norm": 9.260425567626953, "learning_rate": 6.5184e-06, "loss": 1.0933, "step": 17420 }, { "epoch": 1.7429999999999999, "grad_norm": 20.885852813720703, "learning_rate": 6.516400000000001e-06, "loss": 2.0323, "step": 17430 }, { "epoch": 1.744, "grad_norm": 12.421066284179688, "learning_rate": 6.514400000000001e-06, "loss": 1.4788, "step": 17440 }, { "epoch": 1.745, "grad_norm": 10.69375228881836, "learning_rate": 6.5124000000000005e-06, "loss": 1.241, "step": 17450 }, { "epoch": 1.746, "grad_norm": 3.594209671020508, "learning_rate": 6.5104e-06, "loss": 1.0037, "step": 17460 }, { "epoch": 1.7469999999999999, "grad_norm": 9.937426567077637, "learning_rate": 6.5084e-06, "loss": 1.4154, "step": 17470 }, { "epoch": 1.748, "grad_norm": 11.345025062561035, "learning_rate": 6.5064e-06, "loss": 1.2853, "step": 17480 }, { "epoch": 1.749, "grad_norm": 11.024571418762207, "learning_rate": 6.504400000000001e-06, "loss": 1.1865, "step": 17490 }, { "epoch": 1.75, "grad_norm": 9.871234893798828, "learning_rate": 6.502400000000001e-06, "loss": 1.2414, "step": 17500 }, { "epoch": 1.751, "grad_norm": 7.327870845794678, "learning_rate": 6.5004e-06, "loss": 1.4958, "step": 17510 }, { "epoch": 1.752, "grad_norm": 12.224602699279785, "learning_rate": 6.4984e-06, "loss": 1.5198, "step": 17520 }, { "epoch": 1.7530000000000001, "grad_norm": 8.838020324707031, "learning_rate": 6.4964e-06, "loss": 1.3378, "step": 17530 }, { "epoch": 1.754, "grad_norm": 6.108428478240967, "learning_rate": 6.494400000000001e-06, "loss": 0.9661, "step": 17540 }, { "epoch": 1.755, "grad_norm": 7.759083271026611, "learning_rate": 6.492400000000001e-06, "loss": 1.0456, "step": 17550 }, { "epoch": 1.756, "grad_norm": 6.2896409034729, "learning_rate": 6.4904e-06, "loss": 1.1347, "step": 17560 }, { "epoch": 1.7570000000000001, "grad_norm": 6.195939064025879, "learning_rate": 6.4884e-06, "loss": 1.5718, "step": 17570 }, { "epoch": 1.758, "grad_norm": 9.065800666809082, "learning_rate": 6.4864e-06, "loss": 1.0689, "step": 17580 }, { "epoch": 1.759, "grad_norm": 15.802556037902832, "learning_rate": 6.484400000000001e-06, "loss": 1.2202, "step": 17590 }, { "epoch": 1.76, "grad_norm": 12.985136032104492, "learning_rate": 6.482400000000001e-06, "loss": 1.3124, "step": 17600 }, { "epoch": 1.7610000000000001, "grad_norm": 9.7377290725708, "learning_rate": 6.4804e-06, "loss": 0.9782, "step": 17610 }, { "epoch": 1.762, "grad_norm": 11.417032241821289, "learning_rate": 6.4784000000000005e-06, "loss": 1.3302, "step": 17620 }, { "epoch": 1.763, "grad_norm": 13.903484344482422, "learning_rate": 6.4764e-06, "loss": 1.2819, "step": 17630 }, { "epoch": 1.764, "grad_norm": 8.76752758026123, "learning_rate": 6.474400000000001e-06, "loss": 1.1499, "step": 17640 }, { "epoch": 1.7650000000000001, "grad_norm": 11.177907943725586, "learning_rate": 6.472400000000001e-06, "loss": 1.3251, "step": 17650 }, { "epoch": 1.766, "grad_norm": 4.6099653244018555, "learning_rate": 6.4704e-06, "loss": 1.0706, "step": 17660 }, { "epoch": 1.767, "grad_norm": 17.86873435974121, "learning_rate": 6.468400000000001e-06, "loss": 1.289, "step": 17670 }, { "epoch": 1.768, "grad_norm": 17.060014724731445, "learning_rate": 6.4664000000000005e-06, "loss": 1.3821, "step": 17680 }, { "epoch": 1.7690000000000001, "grad_norm": 7.6792097091674805, "learning_rate": 6.4644e-06, "loss": 0.8379, "step": 17690 }, { "epoch": 1.77, "grad_norm": 25.646203994750977, "learning_rate": 6.462400000000001e-06, "loss": 1.5493, "step": 17700 }, { "epoch": 1.771, "grad_norm": 9.31892204284668, "learning_rate": 6.4604e-06, "loss": 1.3565, "step": 17710 }, { "epoch": 1.772, "grad_norm": 7.689058303833008, "learning_rate": 6.4584e-06, "loss": 1.0371, "step": 17720 }, { "epoch": 1.7730000000000001, "grad_norm": 19.88788604736328, "learning_rate": 6.4564000000000006e-06, "loss": 1.2126, "step": 17730 }, { "epoch": 1.774, "grad_norm": 7.815741062164307, "learning_rate": 6.4544e-06, "loss": 1.4496, "step": 17740 }, { "epoch": 1.775, "grad_norm": 11.43288803100586, "learning_rate": 6.452400000000001e-06, "loss": 1.1914, "step": 17750 }, { "epoch": 1.776, "grad_norm": 11.255511283874512, "learning_rate": 6.4504e-06, "loss": 0.9973, "step": 17760 }, { "epoch": 1.7770000000000001, "grad_norm": 17.806089401245117, "learning_rate": 6.4484e-06, "loss": 1.4071, "step": 17770 }, { "epoch": 1.778, "grad_norm": 9.085927963256836, "learning_rate": 6.446400000000001e-06, "loss": 1.3405, "step": 17780 }, { "epoch": 1.779, "grad_norm": 3.821648120880127, "learning_rate": 6.4444000000000005e-06, "loss": 1.1532, "step": 17790 }, { "epoch": 1.78, "grad_norm": 8.636872291564941, "learning_rate": 6.442400000000001e-06, "loss": 1.4135, "step": 17800 }, { "epoch": 1.7810000000000001, "grad_norm": 9.926580429077148, "learning_rate": 6.4404e-06, "loss": 1.4668, "step": 17810 }, { "epoch": 1.782, "grad_norm": 5.224086284637451, "learning_rate": 6.4384e-06, "loss": 1.3564, "step": 17820 }, { "epoch": 1.783, "grad_norm": 9.359729766845703, "learning_rate": 6.436400000000001e-06, "loss": 1.2405, "step": 17830 }, { "epoch": 1.784, "grad_norm": 10.380765914916992, "learning_rate": 6.434400000000001e-06, "loss": 0.9209, "step": 17840 }, { "epoch": 1.7850000000000001, "grad_norm": 15.297001838684082, "learning_rate": 6.4324000000000005e-06, "loss": 1.1633, "step": 17850 }, { "epoch": 1.786, "grad_norm": 7.903166770935059, "learning_rate": 6.4304e-06, "loss": 1.0394, "step": 17860 }, { "epoch": 1.787, "grad_norm": 20.099266052246094, "learning_rate": 6.4284e-06, "loss": 1.4571, "step": 17870 }, { "epoch": 1.788, "grad_norm": 11.142775535583496, "learning_rate": 6.4264e-06, "loss": 1.4998, "step": 17880 }, { "epoch": 1.7890000000000001, "grad_norm": 16.28028678894043, "learning_rate": 6.424400000000001e-06, "loss": 1.4892, "step": 17890 }, { "epoch": 1.79, "grad_norm": 9.158763885498047, "learning_rate": 6.4224000000000005e-06, "loss": 1.1765, "step": 17900 }, { "epoch": 1.791, "grad_norm": 9.25641918182373, "learning_rate": 6.420400000000001e-06, "loss": 1.0517, "step": 17910 }, { "epoch": 1.792, "grad_norm": 7.493291854858398, "learning_rate": 6.4184e-06, "loss": 1.287, "step": 17920 }, { "epoch": 1.7930000000000001, "grad_norm": 8.5061674118042, "learning_rate": 6.4164e-06, "loss": 1.0649, "step": 17930 }, { "epoch": 1.794, "grad_norm": 7.219564437866211, "learning_rate": 6.414400000000001e-06, "loss": 1.4622, "step": 17940 }, { "epoch": 1.795, "grad_norm": 8.319295883178711, "learning_rate": 6.412400000000001e-06, "loss": 1.2713, "step": 17950 }, { "epoch": 1.796, "grad_norm": 10.619367599487305, "learning_rate": 6.410400000000001e-06, "loss": 1.2983, "step": 17960 }, { "epoch": 1.7970000000000002, "grad_norm": 7.198877811431885, "learning_rate": 6.4084e-06, "loss": 1.153, "step": 17970 }, { "epoch": 1.798, "grad_norm": 12.236984252929688, "learning_rate": 6.4064e-06, "loss": 1.1791, "step": 17980 }, { "epoch": 1.799, "grad_norm": 8.369654655456543, "learning_rate": 6.404400000000001e-06, "loss": 1.1745, "step": 17990 }, { "epoch": 1.8, "grad_norm": 9.742130279541016, "learning_rate": 6.402400000000001e-06, "loss": 1.1112, "step": 18000 }, { "epoch": 1.8010000000000002, "grad_norm": 14.143494606018066, "learning_rate": 6.4004000000000014e-06, "loss": 1.4662, "step": 18010 }, { "epoch": 1.802, "grad_norm": 6.848531246185303, "learning_rate": 6.3984000000000004e-06, "loss": 1.4054, "step": 18020 }, { "epoch": 1.803, "grad_norm": 8.20616340637207, "learning_rate": 6.3964e-06, "loss": 1.3857, "step": 18030 }, { "epoch": 1.804, "grad_norm": 8.308536529541016, "learning_rate": 6.394400000000001e-06, "loss": 1.0663, "step": 18040 }, { "epoch": 1.8050000000000002, "grad_norm": 8.456562042236328, "learning_rate": 6.392400000000001e-06, "loss": 1.1826, "step": 18050 }, { "epoch": 1.806, "grad_norm": 10.43928337097168, "learning_rate": 6.390400000000001e-06, "loss": 1.1049, "step": 18060 }, { "epoch": 1.807, "grad_norm": 16.199066162109375, "learning_rate": 6.3884000000000005e-06, "loss": 1.5006, "step": 18070 }, { "epoch": 1.808, "grad_norm": 7.254056453704834, "learning_rate": 6.3864e-06, "loss": 0.9878, "step": 18080 }, { "epoch": 1.8090000000000002, "grad_norm": 7.486106872558594, "learning_rate": 6.3844e-06, "loss": 1.4311, "step": 18090 }, { "epoch": 1.81, "grad_norm": 9.611994743347168, "learning_rate": 6.382400000000001e-06, "loss": 1.2608, "step": 18100 }, { "epoch": 1.811, "grad_norm": 9.258016586303711, "learning_rate": 6.380400000000001e-06, "loss": 1.1146, "step": 18110 }, { "epoch": 1.812, "grad_norm": 10.553972244262695, "learning_rate": 6.3784e-06, "loss": 1.2056, "step": 18120 }, { "epoch": 1.813, "grad_norm": 13.9857177734375, "learning_rate": 6.3764000000000005e-06, "loss": 1.1167, "step": 18130 }, { "epoch": 1.814, "grad_norm": 8.718056678771973, "learning_rate": 6.3744e-06, "loss": 1.2386, "step": 18140 }, { "epoch": 1.815, "grad_norm": 19.14798355102539, "learning_rate": 6.372400000000001e-06, "loss": 1.3087, "step": 18150 }, { "epoch": 1.8159999999999998, "grad_norm": 11.233726501464844, "learning_rate": 6.370400000000001e-06, "loss": 1.6024, "step": 18160 }, { "epoch": 1.817, "grad_norm": 11.985554695129395, "learning_rate": 6.3684e-06, "loss": 1.2069, "step": 18170 }, { "epoch": 1.818, "grad_norm": 14.154135704040527, "learning_rate": 6.3664000000000006e-06, "loss": 1.3885, "step": 18180 }, { "epoch": 1.819, "grad_norm": 13.391234397888184, "learning_rate": 6.3644000000000004e-06, "loss": 1.5608, "step": 18190 }, { "epoch": 1.8199999999999998, "grad_norm": 11.697981834411621, "learning_rate": 6.362400000000001e-06, "loss": 1.156, "step": 18200 }, { "epoch": 1.821, "grad_norm": 6.390254974365234, "learning_rate": 6.360400000000001e-06, "loss": 1.1234, "step": 18210 }, { "epoch": 1.822, "grad_norm": 9.185529708862305, "learning_rate": 6.3584e-06, "loss": 1.1235, "step": 18220 }, { "epoch": 1.823, "grad_norm": 11.54269027709961, "learning_rate": 6.356400000000001e-06, "loss": 1.353, "step": 18230 }, { "epoch": 1.8239999999999998, "grad_norm": 9.024720191955566, "learning_rate": 6.3544000000000005e-06, "loss": 1.3518, "step": 18240 }, { "epoch": 1.825, "grad_norm": 6.735665321350098, "learning_rate": 6.3524e-06, "loss": 1.202, "step": 18250 }, { "epoch": 1.826, "grad_norm": 10.326549530029297, "learning_rate": 6.350400000000001e-06, "loss": 1.5044, "step": 18260 }, { "epoch": 1.827, "grad_norm": 10.568848609924316, "learning_rate": 6.3484e-06, "loss": 1.6435, "step": 18270 }, { "epoch": 1.8279999999999998, "grad_norm": 6.68824577331543, "learning_rate": 6.3464e-06, "loss": 1.2139, "step": 18280 }, { "epoch": 1.829, "grad_norm": 11.16109848022461, "learning_rate": 6.344400000000001e-06, "loss": 1.2298, "step": 18290 }, { "epoch": 1.83, "grad_norm": 7.755879878997803, "learning_rate": 6.3424000000000005e-06, "loss": 1.3746, "step": 18300 }, { "epoch": 1.831, "grad_norm": 10.471056938171387, "learning_rate": 6.340400000000001e-06, "loss": 1.256, "step": 18310 }, { "epoch": 1.8319999999999999, "grad_norm": 7.6881937980651855, "learning_rate": 6.3384e-06, "loss": 1.0806, "step": 18320 }, { "epoch": 1.833, "grad_norm": 13.421733856201172, "learning_rate": 6.3364e-06, "loss": 1.2351, "step": 18330 }, { "epoch": 1.834, "grad_norm": 7.560003280639648, "learning_rate": 6.334400000000001e-06, "loss": 1.1276, "step": 18340 }, { "epoch": 1.835, "grad_norm": 11.758442878723145, "learning_rate": 6.3324000000000006e-06, "loss": 1.0762, "step": 18350 }, { "epoch": 1.8359999999999999, "grad_norm": 14.14734935760498, "learning_rate": 6.330400000000001e-06, "loss": 1.3352, "step": 18360 }, { "epoch": 1.837, "grad_norm": 19.587106704711914, "learning_rate": 6.3284e-06, "loss": 1.5357, "step": 18370 }, { "epoch": 1.838, "grad_norm": 8.496346473693848, "learning_rate": 6.3264e-06, "loss": 1.5781, "step": 18380 }, { "epoch": 1.839, "grad_norm": 6.691411972045898, "learning_rate": 6.324400000000001e-06, "loss": 1.4658, "step": 18390 }, { "epoch": 1.8399999999999999, "grad_norm": 5.619304656982422, "learning_rate": 6.322400000000001e-06, "loss": 1.126, "step": 18400 }, { "epoch": 1.841, "grad_norm": 11.753100395202637, "learning_rate": 6.320400000000001e-06, "loss": 1.2064, "step": 18410 }, { "epoch": 1.842, "grad_norm": 8.650150299072266, "learning_rate": 6.3184e-06, "loss": 1.1619, "step": 18420 }, { "epoch": 1.843, "grad_norm": 6.983112812042236, "learning_rate": 6.3164e-06, "loss": 1.1914, "step": 18430 }, { "epoch": 1.8439999999999999, "grad_norm": 14.351408004760742, "learning_rate": 6.314400000000001e-06, "loss": 1.1602, "step": 18440 }, { "epoch": 1.845, "grad_norm": 13.516742706298828, "learning_rate": 6.312400000000001e-06, "loss": 1.39, "step": 18450 }, { "epoch": 1.846, "grad_norm": 6.932905673980713, "learning_rate": 6.310400000000001e-06, "loss": 1.4262, "step": 18460 }, { "epoch": 1.847, "grad_norm": 6.4664306640625, "learning_rate": 6.3084000000000005e-06, "loss": 1.1081, "step": 18470 }, { "epoch": 1.8479999999999999, "grad_norm": 12.392315864562988, "learning_rate": 6.3064e-06, "loss": 1.3286, "step": 18480 }, { "epoch": 1.849, "grad_norm": 9.418574333190918, "learning_rate": 6.3044e-06, "loss": 1.2978, "step": 18490 }, { "epoch": 1.85, "grad_norm": 12.16065788269043, "learning_rate": 6.302400000000001e-06, "loss": 1.2119, "step": 18500 }, { "epoch": 1.851, "grad_norm": 9.540505409240723, "learning_rate": 6.300400000000001e-06, "loss": 1.1737, "step": 18510 }, { "epoch": 1.8519999999999999, "grad_norm": 10.515691757202148, "learning_rate": 6.2984e-06, "loss": 1.4311, "step": 18520 }, { "epoch": 1.853, "grad_norm": 6.830284118652344, "learning_rate": 6.2964e-06, "loss": 1.0866, "step": 18530 }, { "epoch": 1.854, "grad_norm": 3.28430438041687, "learning_rate": 6.2944e-06, "loss": 1.5031, "step": 18540 }, { "epoch": 1.855, "grad_norm": 10.921205520629883, "learning_rate": 6.292400000000001e-06, "loss": 1.3328, "step": 18550 }, { "epoch": 1.8559999999999999, "grad_norm": 11.26567268371582, "learning_rate": 6.290400000000001e-06, "loss": 1.306, "step": 18560 }, { "epoch": 1.857, "grad_norm": 8.76899528503418, "learning_rate": 6.2884e-06, "loss": 1.3676, "step": 18570 }, { "epoch": 1.858, "grad_norm": 5.81062126159668, "learning_rate": 6.2864000000000005e-06, "loss": 0.9241, "step": 18580 }, { "epoch": 1.859, "grad_norm": 6.134930610656738, "learning_rate": 6.2844e-06, "loss": 1.2752, "step": 18590 }, { "epoch": 1.8599999999999999, "grad_norm": 8.963726997375488, "learning_rate": 6.282400000000001e-06, "loss": 1.2727, "step": 18600 }, { "epoch": 1.861, "grad_norm": 8.050171852111816, "learning_rate": 6.280400000000001e-06, "loss": 1.5406, "step": 18610 }, { "epoch": 1.862, "grad_norm": 8.380586624145508, "learning_rate": 6.2784e-06, "loss": 1.1253, "step": 18620 }, { "epoch": 1.863, "grad_norm": 7.93909215927124, "learning_rate": 6.276400000000001e-06, "loss": 1.4443, "step": 18630 }, { "epoch": 1.8639999999999999, "grad_norm": 9.63142204284668, "learning_rate": 6.2744000000000004e-06, "loss": 1.277, "step": 18640 }, { "epoch": 1.865, "grad_norm": 7.523706436157227, "learning_rate": 6.2724e-06, "loss": 1.4844, "step": 18650 }, { "epoch": 1.866, "grad_norm": 8.535152435302734, "learning_rate": 6.270400000000001e-06, "loss": 1.3108, "step": 18660 }, { "epoch": 1.867, "grad_norm": 7.416932582855225, "learning_rate": 6.2684e-06, "loss": 1.3213, "step": 18670 }, { "epoch": 1.8679999999999999, "grad_norm": 9.39720344543457, "learning_rate": 6.2664e-06, "loss": 1.1931, "step": 18680 }, { "epoch": 1.869, "grad_norm": 6.450692176818848, "learning_rate": 6.2644000000000005e-06, "loss": 1.2323, "step": 18690 }, { "epoch": 1.87, "grad_norm": 8.2543306350708, "learning_rate": 6.2624e-06, "loss": 1.0758, "step": 18700 }, { "epoch": 1.871, "grad_norm": 13.860767364501953, "learning_rate": 6.260400000000001e-06, "loss": 1.411, "step": 18710 }, { "epoch": 1.8719999999999999, "grad_norm": 12.570108413696289, "learning_rate": 6.2584e-06, "loss": 1.5203, "step": 18720 }, { "epoch": 1.873, "grad_norm": 6.181728363037109, "learning_rate": 6.2564e-06, "loss": 1.2108, "step": 18730 }, { "epoch": 1.874, "grad_norm": 14.150256156921387, "learning_rate": 6.254400000000001e-06, "loss": 1.4992, "step": 18740 }, { "epoch": 1.875, "grad_norm": 8.060895919799805, "learning_rate": 6.2524000000000005e-06, "loss": 1.0517, "step": 18750 }, { "epoch": 1.876, "grad_norm": 8.713677406311035, "learning_rate": 6.250400000000001e-06, "loss": 1.32, "step": 18760 }, { "epoch": 1.877, "grad_norm": 10.976401329040527, "learning_rate": 6.2484e-06, "loss": 1.1294, "step": 18770 }, { "epoch": 1.8780000000000001, "grad_norm": 7.241365432739258, "learning_rate": 6.2464e-06, "loss": 1.2743, "step": 18780 }, { "epoch": 1.879, "grad_norm": 11.641265869140625, "learning_rate": 6.244400000000001e-06, "loss": 1.3557, "step": 18790 }, { "epoch": 1.88, "grad_norm": 9.33780574798584, "learning_rate": 6.2424000000000006e-06, "loss": 1.1436, "step": 18800 }, { "epoch": 1.881, "grad_norm": 5.319612979888916, "learning_rate": 6.240400000000001e-06, "loss": 1.0522, "step": 18810 }, { "epoch": 1.8820000000000001, "grad_norm": 6.498361110687256, "learning_rate": 6.2384e-06, "loss": 1.4961, "step": 18820 }, { "epoch": 1.883, "grad_norm": 9.72639274597168, "learning_rate": 6.2364e-06, "loss": 1.2587, "step": 18830 }, { "epoch": 1.884, "grad_norm": 6.731999397277832, "learning_rate": 6.234400000000001e-06, "loss": 1.1516, "step": 18840 }, { "epoch": 1.885, "grad_norm": 10.226900100708008, "learning_rate": 6.232400000000001e-06, "loss": 1.5838, "step": 18850 }, { "epoch": 1.8860000000000001, "grad_norm": 9.392486572265625, "learning_rate": 6.2304000000000005e-06, "loss": 1.3219, "step": 18860 }, { "epoch": 1.887, "grad_norm": 9.558850288391113, "learning_rate": 6.2284e-06, "loss": 1.2832, "step": 18870 }, { "epoch": 1.888, "grad_norm": 5.857322692871094, "learning_rate": 6.2264e-06, "loss": 1.1081, "step": 18880 }, { "epoch": 1.889, "grad_norm": 18.342201232910156, "learning_rate": 6.2244e-06, "loss": 1.2177, "step": 18890 }, { "epoch": 1.8900000000000001, "grad_norm": 11.260883331298828, "learning_rate": 6.222400000000001e-06, "loss": 1.1534, "step": 18900 }, { "epoch": 1.891, "grad_norm": 8.755961418151855, "learning_rate": 6.220400000000001e-06, "loss": 1.1932, "step": 18910 }, { "epoch": 1.892, "grad_norm": 11.781874656677246, "learning_rate": 6.2184e-06, "loss": 1.1207, "step": 18920 }, { "epoch": 1.893, "grad_norm": 8.9508056640625, "learning_rate": 6.2164e-06, "loss": 1.4427, "step": 18930 }, { "epoch": 1.8940000000000001, "grad_norm": 8.023183822631836, "learning_rate": 6.2144e-06, "loss": 1.2827, "step": 18940 }, { "epoch": 1.895, "grad_norm": 8.216286659240723, "learning_rate": 6.212400000000001e-06, "loss": 1.006, "step": 18950 }, { "epoch": 1.896, "grad_norm": 9.21390438079834, "learning_rate": 6.210400000000001e-06, "loss": 1.2437, "step": 18960 }, { "epoch": 1.897, "grad_norm": 12.665392875671387, "learning_rate": 6.2084e-06, "loss": 1.3647, "step": 18970 }, { "epoch": 1.8980000000000001, "grad_norm": 11.494665145874023, "learning_rate": 6.2064e-06, "loss": 1.1887, "step": 18980 }, { "epoch": 1.899, "grad_norm": 13.314470291137695, "learning_rate": 6.2044e-06, "loss": 1.3803, "step": 18990 }, { "epoch": 1.9, "grad_norm": 12.163642883300781, "learning_rate": 6.202400000000001e-06, "loss": 1.3669, "step": 19000 }, { "epoch": 1.901, "grad_norm": 7.283416748046875, "learning_rate": 6.200400000000001e-06, "loss": 1.415, "step": 19010 }, { "epoch": 1.9020000000000001, "grad_norm": 12.034050941467285, "learning_rate": 6.1984e-06, "loss": 1.6343, "step": 19020 }, { "epoch": 1.903, "grad_norm": 7.791954040527344, "learning_rate": 6.1964000000000005e-06, "loss": 1.282, "step": 19030 }, { "epoch": 1.904, "grad_norm": 6.993837833404541, "learning_rate": 6.1944e-06, "loss": 1.2519, "step": 19040 }, { "epoch": 1.905, "grad_norm": 8.104583740234375, "learning_rate": 6.192400000000001e-06, "loss": 1.4128, "step": 19050 }, { "epoch": 1.9060000000000001, "grad_norm": 7.489596366882324, "learning_rate": 6.190400000000001e-06, "loss": 1.266, "step": 19060 }, { "epoch": 1.907, "grad_norm": 11.90142822265625, "learning_rate": 6.1884e-06, "loss": 1.3482, "step": 19070 }, { "epoch": 1.908, "grad_norm": 8.12934398651123, "learning_rate": 6.186400000000001e-06, "loss": 1.1743, "step": 19080 }, { "epoch": 1.909, "grad_norm": 8.49506950378418, "learning_rate": 6.1844000000000005e-06, "loss": 1.3684, "step": 19090 }, { "epoch": 1.9100000000000001, "grad_norm": 8.933235168457031, "learning_rate": 6.1824e-06, "loss": 1.2775, "step": 19100 }, { "epoch": 1.911, "grad_norm": 7.132901668548584, "learning_rate": 6.1806000000000014e-06, "loss": 1.1424, "step": 19110 }, { "epoch": 1.912, "grad_norm": 9.866204261779785, "learning_rate": 6.1786000000000004e-06, "loss": 1.2253, "step": 19120 }, { "epoch": 1.913, "grad_norm": 7.24080228805542, "learning_rate": 6.1766e-06, "loss": 1.058, "step": 19130 }, { "epoch": 1.9140000000000001, "grad_norm": 9.692338943481445, "learning_rate": 6.174600000000001e-06, "loss": 1.187, "step": 19140 }, { "epoch": 1.915, "grad_norm": 9.464874267578125, "learning_rate": 6.172600000000001e-06, "loss": 0.903, "step": 19150 }, { "epoch": 1.916, "grad_norm": 15.023681640625, "learning_rate": 6.170600000000001e-06, "loss": 1.4167, "step": 19160 }, { "epoch": 1.917, "grad_norm": 14.472908020019531, "learning_rate": 6.1686000000000005e-06, "loss": 1.6864, "step": 19170 }, { "epoch": 1.9180000000000001, "grad_norm": 8.763924598693848, "learning_rate": 6.1666e-06, "loss": 0.9868, "step": 19180 }, { "epoch": 1.919, "grad_norm": 8.98115348815918, "learning_rate": 6.1646e-06, "loss": 1.3005, "step": 19190 }, { "epoch": 1.92, "grad_norm": 8.18416690826416, "learning_rate": 6.162600000000001e-06, "loss": 1.5538, "step": 19200 }, { "epoch": 1.921, "grad_norm": 13.883437156677246, "learning_rate": 6.160600000000001e-06, "loss": 1.3643, "step": 19210 }, { "epoch": 1.9220000000000002, "grad_norm": 7.319094181060791, "learning_rate": 6.1586e-06, "loss": 1.4569, "step": 19220 }, { "epoch": 1.923, "grad_norm": 8.601305961608887, "learning_rate": 6.1566000000000005e-06, "loss": 1.1708, "step": 19230 }, { "epoch": 1.924, "grad_norm": 7.084498882293701, "learning_rate": 6.1546e-06, "loss": 1.3653, "step": 19240 }, { "epoch": 1.925, "grad_norm": 6.989315986633301, "learning_rate": 6.152600000000001e-06, "loss": 1.2432, "step": 19250 }, { "epoch": 1.9260000000000002, "grad_norm": 10.214592933654785, "learning_rate": 6.150600000000001e-06, "loss": 1.2812, "step": 19260 }, { "epoch": 1.927, "grad_norm": 7.1303887367248535, "learning_rate": 6.1486e-06, "loss": 1.2859, "step": 19270 }, { "epoch": 1.928, "grad_norm": 6.026612758636475, "learning_rate": 6.146600000000001e-06, "loss": 1.1633, "step": 19280 }, { "epoch": 1.929, "grad_norm": 15.087567329406738, "learning_rate": 6.1446000000000004e-06, "loss": 1.2206, "step": 19290 }, { "epoch": 1.9300000000000002, "grad_norm": 8.562554359436035, "learning_rate": 6.142600000000001e-06, "loss": 1.0561, "step": 19300 }, { "epoch": 1.931, "grad_norm": 8.63221549987793, "learning_rate": 6.140600000000001e-06, "loss": 1.1875, "step": 19310 }, { "epoch": 1.932, "grad_norm": 6.397682189941406, "learning_rate": 6.1386e-06, "loss": 1.3385, "step": 19320 }, { "epoch": 1.933, "grad_norm": 10.876133918762207, "learning_rate": 6.136600000000001e-06, "loss": 1.3732, "step": 19330 }, { "epoch": 1.9340000000000002, "grad_norm": 7.535799980163574, "learning_rate": 6.1346000000000005e-06, "loss": 1.4288, "step": 19340 }, { "epoch": 1.935, "grad_norm": 18.05406379699707, "learning_rate": 6.1326e-06, "loss": 1.3481, "step": 19350 }, { "epoch": 1.936, "grad_norm": 6.9458746910095215, "learning_rate": 6.130600000000001e-06, "loss": 1.4158, "step": 19360 }, { "epoch": 1.937, "grad_norm": 9.169232368469238, "learning_rate": 6.1286e-06, "loss": 1.1566, "step": 19370 }, { "epoch": 1.938, "grad_norm": 9.266847610473633, "learning_rate": 6.1266e-06, "loss": 1.1703, "step": 19380 }, { "epoch": 1.939, "grad_norm": 7.9493889808654785, "learning_rate": 6.124600000000001e-06, "loss": 1.3014, "step": 19390 }, { "epoch": 1.94, "grad_norm": 9.748091697692871, "learning_rate": 6.1226000000000005e-06, "loss": 1.4498, "step": 19400 }, { "epoch": 1.9409999999999998, "grad_norm": 12.063608169555664, "learning_rate": 6.120600000000001e-06, "loss": 1.1998, "step": 19410 }, { "epoch": 1.942, "grad_norm": 9.246814727783203, "learning_rate": 6.1186e-06, "loss": 1.1918, "step": 19420 }, { "epoch": 1.943, "grad_norm": 5.803473472595215, "learning_rate": 6.1166e-06, "loss": 1.2184, "step": 19430 }, { "epoch": 1.944, "grad_norm": 7.749234199523926, "learning_rate": 6.114600000000001e-06, "loss": 1.0632, "step": 19440 }, { "epoch": 1.9449999999999998, "grad_norm": 7.274741172790527, "learning_rate": 6.1126000000000006e-06, "loss": 1.316, "step": 19450 }, { "epoch": 1.946, "grad_norm": 4.110113620758057, "learning_rate": 6.110600000000001e-06, "loss": 1.2405, "step": 19460 }, { "epoch": 1.947, "grad_norm": 14.665881156921387, "learning_rate": 6.1086e-06, "loss": 1.4844, "step": 19470 }, { "epoch": 1.948, "grad_norm": 6.842731475830078, "learning_rate": 6.1066e-06, "loss": 1.1146, "step": 19480 }, { "epoch": 1.9489999999999998, "grad_norm": 12.867478370666504, "learning_rate": 6.104600000000001e-06, "loss": 1.3227, "step": 19490 }, { "epoch": 1.95, "grad_norm": 7.028247356414795, "learning_rate": 6.102600000000001e-06, "loss": 1.3198, "step": 19500 }, { "epoch": 1.951, "grad_norm": 10.429786682128906, "learning_rate": 6.100600000000001e-06, "loss": 1.4436, "step": 19510 }, { "epoch": 1.952, "grad_norm": 10.92299747467041, "learning_rate": 6.0986e-06, "loss": 1.0758, "step": 19520 }, { "epoch": 1.9529999999999998, "grad_norm": 10.219425201416016, "learning_rate": 6.0966e-06, "loss": 1.2572, "step": 19530 }, { "epoch": 1.954, "grad_norm": 11.09958553314209, "learning_rate": 6.094600000000001e-06, "loss": 1.1703, "step": 19540 }, { "epoch": 1.955, "grad_norm": 12.474445343017578, "learning_rate": 6.092600000000001e-06, "loss": 1.4062, "step": 19550 }, { "epoch": 1.956, "grad_norm": 7.978271007537842, "learning_rate": 6.090600000000001e-06, "loss": 1.1711, "step": 19560 }, { "epoch": 1.9569999999999999, "grad_norm": 11.760424613952637, "learning_rate": 6.0886000000000005e-06, "loss": 1.1801, "step": 19570 }, { "epoch": 1.958, "grad_norm": 12.557250022888184, "learning_rate": 6.0866e-06, "loss": 1.4763, "step": 19580 }, { "epoch": 1.959, "grad_norm": 12.114015579223633, "learning_rate": 6.0846e-06, "loss": 1.3435, "step": 19590 }, { "epoch": 1.96, "grad_norm": 8.940141677856445, "learning_rate": 6.082600000000001e-06, "loss": 1.3413, "step": 19600 }, { "epoch": 1.9609999999999999, "grad_norm": 9.084573745727539, "learning_rate": 6.080600000000001e-06, "loss": 1.0258, "step": 19610 }, { "epoch": 1.962, "grad_norm": 11.686196327209473, "learning_rate": 6.0786e-06, "loss": 1.2037, "step": 19620 }, { "epoch": 1.963, "grad_norm": 8.21097469329834, "learning_rate": 6.0766e-06, "loss": 1.3144, "step": 19630 }, { "epoch": 1.964, "grad_norm": 12.228205680847168, "learning_rate": 6.0746e-06, "loss": 1.228, "step": 19640 }, { "epoch": 1.9649999999999999, "grad_norm": 10.624167442321777, "learning_rate": 6.072600000000001e-06, "loss": 1.1957, "step": 19650 }, { "epoch": 1.966, "grad_norm": 12.683236122131348, "learning_rate": 6.070600000000001e-06, "loss": 1.2149, "step": 19660 }, { "epoch": 1.967, "grad_norm": 18.320695877075195, "learning_rate": 6.0686e-06, "loss": 1.2989, "step": 19670 }, { "epoch": 1.968, "grad_norm": 15.263041496276855, "learning_rate": 6.0666000000000005e-06, "loss": 1.2909, "step": 19680 }, { "epoch": 1.9689999999999999, "grad_norm": 2.275036334991455, "learning_rate": 6.0646e-06, "loss": 1.1391, "step": 19690 }, { "epoch": 1.97, "grad_norm": 7.815142631530762, "learning_rate": 6.062600000000001e-06, "loss": 1.0587, "step": 19700 }, { "epoch": 1.971, "grad_norm": 15.363265991210938, "learning_rate": 6.060600000000001e-06, "loss": 1.3399, "step": 19710 }, { "epoch": 1.972, "grad_norm": 8.549234390258789, "learning_rate": 6.0586e-06, "loss": 1.3953, "step": 19720 }, { "epoch": 1.9729999999999999, "grad_norm": 8.243337631225586, "learning_rate": 6.056600000000001e-06, "loss": 1.0672, "step": 19730 }, { "epoch": 1.974, "grad_norm": 8.611261367797852, "learning_rate": 6.0546000000000004e-06, "loss": 1.1141, "step": 19740 }, { "epoch": 1.975, "grad_norm": 10.103672981262207, "learning_rate": 6.0526e-06, "loss": 1.5502, "step": 19750 }, { "epoch": 1.976, "grad_norm": 10.806610107421875, "learning_rate": 6.050600000000001e-06, "loss": 1.3437, "step": 19760 }, { "epoch": 1.9769999999999999, "grad_norm": 14.115330696105957, "learning_rate": 6.0486e-06, "loss": 1.3889, "step": 19770 }, { "epoch": 1.978, "grad_norm": 13.094983100891113, "learning_rate": 6.0466e-06, "loss": 1.169, "step": 19780 }, { "epoch": 1.979, "grad_norm": 7.192217826843262, "learning_rate": 6.0446000000000005e-06, "loss": 1.0684, "step": 19790 }, { "epoch": 1.98, "grad_norm": 15.216240882873535, "learning_rate": 6.0426e-06, "loss": 1.5541, "step": 19800 }, { "epoch": 1.9809999999999999, "grad_norm": 5.276790618896484, "learning_rate": 6.040600000000001e-06, "loss": 1.226, "step": 19810 }, { "epoch": 1.982, "grad_norm": 6.967104911804199, "learning_rate": 6.0386e-06, "loss": 1.2491, "step": 19820 }, { "epoch": 1.983, "grad_norm": 17.321969985961914, "learning_rate": 6.0366e-06, "loss": 0.8424, "step": 19830 }, { "epoch": 1.984, "grad_norm": 17.66995620727539, "learning_rate": 6.034600000000001e-06, "loss": 1.3927, "step": 19840 }, { "epoch": 1.9849999999999999, "grad_norm": 6.749570846557617, "learning_rate": 6.0326000000000005e-06, "loss": 1.3048, "step": 19850 }, { "epoch": 1.986, "grad_norm": 11.859375, "learning_rate": 6.030600000000001e-06, "loss": 1.161, "step": 19860 }, { "epoch": 1.987, "grad_norm": 10.691636085510254, "learning_rate": 6.0286e-06, "loss": 1.5632, "step": 19870 }, { "epoch": 1.988, "grad_norm": 14.809621810913086, "learning_rate": 6.0266e-06, "loss": 0.9772, "step": 19880 }, { "epoch": 1.9889999999999999, "grad_norm": 7.563210487365723, "learning_rate": 6.024600000000001e-06, "loss": 1.094, "step": 19890 }, { "epoch": 1.99, "grad_norm": 12.931833267211914, "learning_rate": 6.022600000000001e-06, "loss": 1.3449, "step": 19900 }, { "epoch": 1.991, "grad_norm": 10.986078262329102, "learning_rate": 6.020600000000001e-06, "loss": 1.1526, "step": 19910 }, { "epoch": 1.992, "grad_norm": 10.393342971801758, "learning_rate": 6.0186e-06, "loss": 1.0893, "step": 19920 }, { "epoch": 1.9929999999999999, "grad_norm": 1.711501121520996, "learning_rate": 6.0166e-06, "loss": 1.4122, "step": 19930 }, { "epoch": 1.994, "grad_norm": 9.151814460754395, "learning_rate": 6.014600000000001e-06, "loss": 1.2103, "step": 19940 }, { "epoch": 1.995, "grad_norm": 8.763230323791504, "learning_rate": 6.012600000000001e-06, "loss": 1.1704, "step": 19950 }, { "epoch": 1.996, "grad_norm": 13.51198959350586, "learning_rate": 6.0106000000000005e-06, "loss": 1.2634, "step": 19960 }, { "epoch": 1.9969999999999999, "grad_norm": 9.030797958374023, "learning_rate": 6.0086e-06, "loss": 1.4099, "step": 19970 }, { "epoch": 1.998, "grad_norm": 9.23218059539795, "learning_rate": 6.0066e-06, "loss": 1.3554, "step": 19980 }, { "epoch": 1.999, "grad_norm": 14.310030937194824, "learning_rate": 6.0046e-06, "loss": 1.404, "step": 19990 }, { "epoch": 2.0, "grad_norm": 7.043206214904785, "learning_rate": 6.002600000000001e-06, "loss": 1.3722, "step": 20000 }, { "epoch": 2.001, "grad_norm": 8.359085083007812, "learning_rate": 6.000600000000001e-06, "loss": 1.2906, "step": 20010 }, { "epoch": 2.002, "grad_norm": 8.86248779296875, "learning_rate": 5.9986e-06, "loss": 1.4455, "step": 20020 }, { "epoch": 2.003, "grad_norm": 16.501596450805664, "learning_rate": 5.9966e-06, "loss": 1.3255, "step": 20030 }, { "epoch": 2.004, "grad_norm": 10.979950904846191, "learning_rate": 5.9946e-06, "loss": 1.2596, "step": 20040 }, { "epoch": 2.005, "grad_norm": 9.642014503479004, "learning_rate": 5.992600000000001e-06, "loss": 1.0283, "step": 20050 }, { "epoch": 2.006, "grad_norm": 3.6039867401123047, "learning_rate": 5.990600000000001e-06, "loss": 1.1088, "step": 20060 }, { "epoch": 2.007, "grad_norm": 12.357213020324707, "learning_rate": 5.9886e-06, "loss": 1.3621, "step": 20070 }, { "epoch": 2.008, "grad_norm": 4.142826080322266, "learning_rate": 5.9866e-06, "loss": 0.91, "step": 20080 }, { "epoch": 2.009, "grad_norm": 22.593889236450195, "learning_rate": 5.9846e-06, "loss": 1.2554, "step": 20090 }, { "epoch": 2.01, "grad_norm": 10.526331901550293, "learning_rate": 5.982600000000001e-06, "loss": 1.3209, "step": 20100 }, { "epoch": 2.011, "grad_norm": 11.996562957763672, "learning_rate": 5.980600000000001e-06, "loss": 1.1241, "step": 20110 }, { "epoch": 2.012, "grad_norm": 16.58564567565918, "learning_rate": 5.9786e-06, "loss": 1.2562, "step": 20120 }, { "epoch": 2.013, "grad_norm": 8.690632820129395, "learning_rate": 5.9766000000000005e-06, "loss": 1.1729, "step": 20130 }, { "epoch": 2.014, "grad_norm": 13.694604873657227, "learning_rate": 5.9746e-06, "loss": 1.071, "step": 20140 }, { "epoch": 2.015, "grad_norm": 16.12174415588379, "learning_rate": 5.9726e-06, "loss": 1.4992, "step": 20150 }, { "epoch": 2.016, "grad_norm": 10.422120094299316, "learning_rate": 5.970600000000001e-06, "loss": 1.1519, "step": 20160 }, { "epoch": 2.017, "grad_norm": 11.646449089050293, "learning_rate": 5.9686e-06, "loss": 1.4571, "step": 20170 }, { "epoch": 2.018, "grad_norm": 9.812124252319336, "learning_rate": 5.9666e-06, "loss": 0.9731, "step": 20180 }, { "epoch": 2.019, "grad_norm": 16.334938049316406, "learning_rate": 5.9646000000000005e-06, "loss": 0.8667, "step": 20190 }, { "epoch": 2.02, "grad_norm": 8.324564933776855, "learning_rate": 5.9626e-06, "loss": 1.2358, "step": 20200 }, { "epoch": 2.021, "grad_norm": 9.794655799865723, "learning_rate": 5.960600000000001e-06, "loss": 1.146, "step": 20210 }, { "epoch": 2.022, "grad_norm": 20.031511306762695, "learning_rate": 5.9586e-06, "loss": 1.4812, "step": 20220 }, { "epoch": 2.023, "grad_norm": 10.509167671203613, "learning_rate": 5.9566e-06, "loss": 1.2301, "step": 20230 }, { "epoch": 2.024, "grad_norm": 9.08087158203125, "learning_rate": 5.9546000000000006e-06, "loss": 1.2695, "step": 20240 }, { "epoch": 2.025, "grad_norm": 12.36376953125, "learning_rate": 5.9526e-06, "loss": 1.3356, "step": 20250 }, { "epoch": 2.026, "grad_norm": 7.611872673034668, "learning_rate": 5.950600000000001e-06, "loss": 0.8429, "step": 20260 }, { "epoch": 2.027, "grad_norm": 5.726324081420898, "learning_rate": 5.948600000000001e-06, "loss": 1.1345, "step": 20270 }, { "epoch": 2.028, "grad_norm": 9.937904357910156, "learning_rate": 5.9466e-06, "loss": 1.1463, "step": 20280 }, { "epoch": 2.029, "grad_norm": 6.501011848449707, "learning_rate": 5.944600000000001e-06, "loss": 1.1694, "step": 20290 }, { "epoch": 2.03, "grad_norm": 17.666231155395508, "learning_rate": 5.9426000000000005e-06, "loss": 1.2036, "step": 20300 }, { "epoch": 2.031, "grad_norm": 11.250253677368164, "learning_rate": 5.940600000000001e-06, "loss": 1.2906, "step": 20310 }, { "epoch": 2.032, "grad_norm": 8.481427192687988, "learning_rate": 5.938600000000001e-06, "loss": 1.0696, "step": 20320 }, { "epoch": 2.033, "grad_norm": 12.459898948669434, "learning_rate": 5.9366e-06, "loss": 1.1391, "step": 20330 }, { "epoch": 2.034, "grad_norm": 10.116827964782715, "learning_rate": 5.934600000000001e-06, "loss": 1.7443, "step": 20340 }, { "epoch": 2.035, "grad_norm": 14.605745315551758, "learning_rate": 5.932600000000001e-06, "loss": 1.0545, "step": 20350 }, { "epoch": 2.036, "grad_norm": 9.036723136901855, "learning_rate": 5.9306000000000004e-06, "loss": 1.2539, "step": 20360 }, { "epoch": 2.037, "grad_norm": 15.303418159484863, "learning_rate": 5.928600000000001e-06, "loss": 0.9421, "step": 20370 }, { "epoch": 2.038, "grad_norm": 16.792949676513672, "learning_rate": 5.9266e-06, "loss": 1.1034, "step": 20380 }, { "epoch": 2.039, "grad_norm": 19.234596252441406, "learning_rate": 5.9246e-06, "loss": 1.6186, "step": 20390 }, { "epoch": 2.04, "grad_norm": 6.712567329406738, "learning_rate": 5.922600000000001e-06, "loss": 1.3349, "step": 20400 }, { "epoch": 2.041, "grad_norm": 20.359098434448242, "learning_rate": 5.9206000000000005e-06, "loss": 1.3055, "step": 20410 }, { "epoch": 2.042, "grad_norm": 10.075861930847168, "learning_rate": 5.918600000000001e-06, "loss": 1.4169, "step": 20420 }, { "epoch": 2.043, "grad_norm": 13.075138092041016, "learning_rate": 5.9166e-06, "loss": 1.3647, "step": 20430 }, { "epoch": 2.044, "grad_norm": 11.073700904846191, "learning_rate": 5.9146e-06, "loss": 1.2261, "step": 20440 }, { "epoch": 2.045, "grad_norm": 18.306596755981445, "learning_rate": 5.912600000000001e-06, "loss": 1.1817, "step": 20450 }, { "epoch": 2.046, "grad_norm": 13.988372802734375, "learning_rate": 5.910600000000001e-06, "loss": 1.1849, "step": 20460 }, { "epoch": 2.047, "grad_norm": 14.16682243347168, "learning_rate": 5.908600000000001e-06, "loss": 1.2059, "step": 20470 }, { "epoch": 2.048, "grad_norm": 11.316349983215332, "learning_rate": 5.9066e-06, "loss": 1.5074, "step": 20480 }, { "epoch": 2.049, "grad_norm": 10.561089515686035, "learning_rate": 5.9046e-06, "loss": 1.3633, "step": 20490 }, { "epoch": 2.05, "grad_norm": 13.897974967956543, "learning_rate": 5.902600000000001e-06, "loss": 1.0759, "step": 20500 }, { "epoch": 2.051, "grad_norm": 18.02426528930664, "learning_rate": 5.900600000000001e-06, "loss": 1.3124, "step": 20510 }, { "epoch": 2.052, "grad_norm": 18.582725524902344, "learning_rate": 5.898600000000001e-06, "loss": 1.391, "step": 20520 }, { "epoch": 2.053, "grad_norm": 14.320472717285156, "learning_rate": 5.8966000000000004e-06, "loss": 1.1819, "step": 20530 }, { "epoch": 2.054, "grad_norm": 8.395525932312012, "learning_rate": 5.8946e-06, "loss": 0.9906, "step": 20540 }, { "epoch": 2.055, "grad_norm": 10.70889663696289, "learning_rate": 5.8926e-06, "loss": 1.2421, "step": 20550 }, { "epoch": 2.056, "grad_norm": 16.526103973388672, "learning_rate": 5.890600000000001e-06, "loss": 1.3955, "step": 20560 }, { "epoch": 2.057, "grad_norm": 17.614309310913086, "learning_rate": 5.888600000000001e-06, "loss": 1.0806, "step": 20570 }, { "epoch": 2.058, "grad_norm": 16.932554244995117, "learning_rate": 5.8866e-06, "loss": 1.2914, "step": 20580 }, { "epoch": 2.059, "grad_norm": 11.455682754516602, "learning_rate": 5.8846e-06, "loss": 1.2873, "step": 20590 }, { "epoch": 2.06, "grad_norm": 5.983027458190918, "learning_rate": 5.8826e-06, "loss": 1.0193, "step": 20600 }, { "epoch": 2.061, "grad_norm": 13.491589546203613, "learning_rate": 5.880600000000001e-06, "loss": 1.2566, "step": 20610 }, { "epoch": 2.062, "grad_norm": 16.717126846313477, "learning_rate": 5.878600000000001e-06, "loss": 1.0073, "step": 20620 }, { "epoch": 2.063, "grad_norm": 17.26605224609375, "learning_rate": 5.8766e-06, "loss": 1.3205, "step": 20630 }, { "epoch": 2.064, "grad_norm": 2.3089957237243652, "learning_rate": 5.8746000000000005e-06, "loss": 1.0909, "step": 20640 }, { "epoch": 2.065, "grad_norm": 16.260528564453125, "learning_rate": 5.8726e-06, "loss": 1.2346, "step": 20650 }, { "epoch": 2.066, "grad_norm": 15.177936553955078, "learning_rate": 5.870600000000001e-06, "loss": 1.2497, "step": 20660 }, { "epoch": 2.067, "grad_norm": 8.730908393859863, "learning_rate": 5.868600000000001e-06, "loss": 1.0046, "step": 20670 }, { "epoch": 2.068, "grad_norm": 13.222381591796875, "learning_rate": 5.8666e-06, "loss": 0.9502, "step": 20680 }, { "epoch": 2.069, "grad_norm": 17.92753791809082, "learning_rate": 5.8646000000000006e-06, "loss": 1.2222, "step": 20690 }, { "epoch": 2.07, "grad_norm": 7.362749099731445, "learning_rate": 5.8626e-06, "loss": 1.2039, "step": 20700 }, { "epoch": 2.071, "grad_norm": 15.276329040527344, "learning_rate": 5.860600000000001e-06, "loss": 1.4616, "step": 20710 }, { "epoch": 2.072, "grad_norm": 16.765426635742188, "learning_rate": 5.858600000000001e-06, "loss": 1.2141, "step": 20720 }, { "epoch": 2.073, "grad_norm": 9.752132415771484, "learning_rate": 5.8566e-06, "loss": 1.2949, "step": 20730 }, { "epoch": 2.074, "grad_norm": 9.63822078704834, "learning_rate": 5.854600000000001e-06, "loss": 1.0549, "step": 20740 }, { "epoch": 2.075, "grad_norm": 15.761144638061523, "learning_rate": 5.8526000000000005e-06, "loss": 1.308, "step": 20750 }, { "epoch": 2.076, "grad_norm": 16.825395584106445, "learning_rate": 5.8506e-06, "loss": 1.2491, "step": 20760 }, { "epoch": 2.077, "grad_norm": 11.0346097946167, "learning_rate": 5.848600000000001e-06, "loss": 1.2129, "step": 20770 }, { "epoch": 2.078, "grad_norm": 15.61067008972168, "learning_rate": 5.8466e-06, "loss": 1.6085, "step": 20780 }, { "epoch": 2.079, "grad_norm": 11.953408241271973, "learning_rate": 5.8446e-06, "loss": 1.3122, "step": 20790 }, { "epoch": 2.08, "grad_norm": 18.49625015258789, "learning_rate": 5.842600000000001e-06, "loss": 1.1254, "step": 20800 }, { "epoch": 2.081, "grad_norm": 11.650029182434082, "learning_rate": 5.8406000000000005e-06, "loss": 1.1764, "step": 20810 }, { "epoch": 2.082, "grad_norm": 10.5491943359375, "learning_rate": 5.838600000000001e-06, "loss": 1.3375, "step": 20820 }, { "epoch": 2.083, "grad_norm": 14.041765213012695, "learning_rate": 5.8366e-06, "loss": 1.0253, "step": 20830 }, { "epoch": 2.084, "grad_norm": 11.3214111328125, "learning_rate": 5.8346e-06, "loss": 1.0934, "step": 20840 }, { "epoch": 2.085, "grad_norm": 7.57493782043457, "learning_rate": 5.832600000000001e-06, "loss": 1.4174, "step": 20850 }, { "epoch": 2.086, "grad_norm": 5.070793628692627, "learning_rate": 5.8306000000000006e-06, "loss": 1.0444, "step": 20860 }, { "epoch": 2.087, "grad_norm": 9.336221694946289, "learning_rate": 5.828600000000001e-06, "loss": 1.1773, "step": 20870 }, { "epoch": 2.088, "grad_norm": 10.87009334564209, "learning_rate": 5.8266e-06, "loss": 1.3793, "step": 20880 }, { "epoch": 2.089, "grad_norm": 4.716193199157715, "learning_rate": 5.8246e-06, "loss": 1.3541, "step": 20890 }, { "epoch": 2.09, "grad_norm": 9.604031562805176, "learning_rate": 5.822600000000001e-06, "loss": 1.2471, "step": 20900 }, { "epoch": 2.091, "grad_norm": 10.2149019241333, "learning_rate": 5.820600000000001e-06, "loss": 1.3652, "step": 20910 }, { "epoch": 2.092, "grad_norm": 12.557777404785156, "learning_rate": 5.8186000000000005e-06, "loss": 1.1488, "step": 20920 }, { "epoch": 2.093, "grad_norm": 14.4224271774292, "learning_rate": 5.8166e-06, "loss": 1.1052, "step": 20930 }, { "epoch": 2.094, "grad_norm": 5.627710819244385, "learning_rate": 5.8146e-06, "loss": 1.134, "step": 20940 }, { "epoch": 2.095, "grad_norm": 8.86668586730957, "learning_rate": 5.8126e-06, "loss": 1.228, "step": 20950 }, { "epoch": 2.096, "grad_norm": 10.26710033416748, "learning_rate": 5.810600000000001e-06, "loss": 1.2947, "step": 20960 }, { "epoch": 2.097, "grad_norm": 6.951776504516602, "learning_rate": 5.808600000000001e-06, "loss": 1.3079, "step": 20970 }, { "epoch": 2.098, "grad_norm": 6.9181694984436035, "learning_rate": 5.8066e-06, "loss": 1.4363, "step": 20980 }, { "epoch": 2.099, "grad_norm": 12.030346870422363, "learning_rate": 5.8046e-06, "loss": 1.25, "step": 20990 }, { "epoch": 2.1, "grad_norm": 8.107671737670898, "learning_rate": 5.8026e-06, "loss": 1.0488, "step": 21000 }, { "epoch": 2.101, "grad_norm": 9.09518051147461, "learning_rate": 5.800600000000001e-06, "loss": 1.0411, "step": 21010 }, { "epoch": 2.102, "grad_norm": 9.358942031860352, "learning_rate": 5.798600000000001e-06, "loss": 1.2609, "step": 21020 }, { "epoch": 2.103, "grad_norm": 11.110774040222168, "learning_rate": 5.7966e-06, "loss": 1.5306, "step": 21030 }, { "epoch": 2.104, "grad_norm": 6.599695682525635, "learning_rate": 5.7946e-06, "loss": 1.5228, "step": 21040 }, { "epoch": 2.105, "grad_norm": 13.844576835632324, "learning_rate": 5.7926e-06, "loss": 1.2118, "step": 21050 }, { "epoch": 2.106, "grad_norm": 9.188498497009277, "learning_rate": 5.790600000000001e-06, "loss": 1.1999, "step": 21060 }, { "epoch": 2.107, "grad_norm": 14.822678565979004, "learning_rate": 5.788600000000001e-06, "loss": 1.1082, "step": 21070 }, { "epoch": 2.108, "grad_norm": 9.091185569763184, "learning_rate": 5.7866e-06, "loss": 1.4289, "step": 21080 }, { "epoch": 2.109, "grad_norm": 10.617810249328613, "learning_rate": 5.7846000000000005e-06, "loss": 0.8513, "step": 21090 }, { "epoch": 2.11, "grad_norm": 9.231242179870605, "learning_rate": 5.7826e-06, "loss": 1.1476, "step": 21100 }, { "epoch": 2.111, "grad_norm": 10.921043395996094, "learning_rate": 5.780600000000001e-06, "loss": 0.926, "step": 21110 }, { "epoch": 2.112, "grad_norm": 10.603200912475586, "learning_rate": 5.778600000000001e-06, "loss": 1.4924, "step": 21120 }, { "epoch": 2.113, "grad_norm": 10.499160766601562, "learning_rate": 5.7766e-06, "loss": 1.2843, "step": 21130 }, { "epoch": 2.114, "grad_norm": 5.243667125701904, "learning_rate": 5.774600000000001e-06, "loss": 1.1077, "step": 21140 }, { "epoch": 2.115, "grad_norm": 19.752384185791016, "learning_rate": 5.7726000000000004e-06, "loss": 1.5747, "step": 21150 }, { "epoch": 2.116, "grad_norm": 11.274402618408203, "learning_rate": 5.7706e-06, "loss": 1.1264, "step": 21160 }, { "epoch": 2.117, "grad_norm": 10.811861991882324, "learning_rate": 5.768600000000001e-06, "loss": 1.5061, "step": 21170 }, { "epoch": 2.118, "grad_norm": 8.00974178314209, "learning_rate": 5.7666e-06, "loss": 1.2202, "step": 21180 }, { "epoch": 2.1189999999999998, "grad_norm": 8.496709823608398, "learning_rate": 5.7646e-06, "loss": 1.2213, "step": 21190 }, { "epoch": 2.12, "grad_norm": 12.080475807189941, "learning_rate": 5.7626000000000005e-06, "loss": 1.0192, "step": 21200 }, { "epoch": 2.121, "grad_norm": 7.472439289093018, "learning_rate": 5.7606e-06, "loss": 1.1941, "step": 21210 }, { "epoch": 2.122, "grad_norm": 10.069131851196289, "learning_rate": 5.758600000000001e-06, "loss": 1.3131, "step": 21220 }, { "epoch": 2.123, "grad_norm": 10.276605606079102, "learning_rate": 5.7566e-06, "loss": 1.2206, "step": 21230 }, { "epoch": 2.124, "grad_norm": 14.540628433227539, "learning_rate": 5.7546e-06, "loss": 1.3218, "step": 21240 }, { "epoch": 2.125, "grad_norm": 10.035784721374512, "learning_rate": 5.752600000000001e-06, "loss": 1.2425, "step": 21250 }, { "epoch": 2.126, "grad_norm": 13.00879192352295, "learning_rate": 5.7506000000000005e-06, "loss": 1.3007, "step": 21260 }, { "epoch": 2.127, "grad_norm": 12.644988059997559, "learning_rate": 5.748600000000001e-06, "loss": 1.0665, "step": 21270 }, { "epoch": 2.128, "grad_norm": 10.764633178710938, "learning_rate": 5.7466e-06, "loss": 1.3305, "step": 21280 }, { "epoch": 2.129, "grad_norm": 8.611163139343262, "learning_rate": 5.7446e-06, "loss": 1.094, "step": 21290 }, { "epoch": 2.13, "grad_norm": 15.174383163452148, "learning_rate": 5.742600000000001e-06, "loss": 1.352, "step": 21300 }, { "epoch": 2.1310000000000002, "grad_norm": 16.43441390991211, "learning_rate": 5.7406000000000006e-06, "loss": 1.2479, "step": 21310 }, { "epoch": 2.132, "grad_norm": 12.190568923950195, "learning_rate": 5.7386e-06, "loss": 1.3707, "step": 21320 }, { "epoch": 2.133, "grad_norm": 17.884214401245117, "learning_rate": 5.7366e-06, "loss": 1.4834, "step": 21330 }, { "epoch": 2.134, "grad_norm": 13.847634315490723, "learning_rate": 5.7346e-06, "loss": 1.4357, "step": 21340 }, { "epoch": 2.135, "grad_norm": 5.876624584197998, "learning_rate": 5.7326e-06, "loss": 1.1141, "step": 21350 }, { "epoch": 2.136, "grad_norm": 4.68764066696167, "learning_rate": 5.730600000000001e-06, "loss": 1.167, "step": 21360 }, { "epoch": 2.137, "grad_norm": 6.869556903839111, "learning_rate": 5.7286000000000005e-06, "loss": 1.2313, "step": 21370 }, { "epoch": 2.138, "grad_norm": 11.006397247314453, "learning_rate": 5.7265999999999995e-06, "loss": 1.0951, "step": 21380 }, { "epoch": 2.1390000000000002, "grad_norm": 13.15217113494873, "learning_rate": 5.7246e-06, "loss": 1.142, "step": 21390 }, { "epoch": 2.14, "grad_norm": 8.05618667602539, "learning_rate": 5.7226e-06, "loss": 1.2576, "step": 21400 }, { "epoch": 2.141, "grad_norm": 12.617327690124512, "learning_rate": 5.720600000000001e-06, "loss": 1.5833, "step": 21410 }, { "epoch": 2.142, "grad_norm": 10.060392379760742, "learning_rate": 5.718600000000001e-06, "loss": 1.1448, "step": 21420 }, { "epoch": 2.143, "grad_norm": 10.407938003540039, "learning_rate": 5.7166e-06, "loss": 1.2441, "step": 21430 }, { "epoch": 2.144, "grad_norm": 7.878199577331543, "learning_rate": 5.7146e-06, "loss": 1.3795, "step": 21440 }, { "epoch": 2.145, "grad_norm": 6.762939929962158, "learning_rate": 5.7126e-06, "loss": 1.3003, "step": 21450 }, { "epoch": 2.146, "grad_norm": 8.068944931030273, "learning_rate": 5.710600000000001e-06, "loss": 1.4448, "step": 21460 }, { "epoch": 2.147, "grad_norm": 3.0987188816070557, "learning_rate": 5.708600000000001e-06, "loss": 1.3578, "step": 21470 }, { "epoch": 2.148, "grad_norm": 17.207706451416016, "learning_rate": 5.7066e-06, "loss": 1.2269, "step": 21480 }, { "epoch": 2.149, "grad_norm": 10.210413932800293, "learning_rate": 5.7046e-06, "loss": 1.2357, "step": 21490 }, { "epoch": 2.15, "grad_norm": 13.217605590820312, "learning_rate": 5.7026e-06, "loss": 1.0738, "step": 21500 }, { "epoch": 2.151, "grad_norm": 10.913657188415527, "learning_rate": 5.700600000000001e-06, "loss": 1.1518, "step": 21510 }, { "epoch": 2.152, "grad_norm": 6.151442527770996, "learning_rate": 5.698600000000001e-06, "loss": 0.8677, "step": 21520 }, { "epoch": 2.153, "grad_norm": 19.356172561645508, "learning_rate": 5.696600000000001e-06, "loss": 1.2562, "step": 21530 }, { "epoch": 2.154, "grad_norm": 12.904923439025879, "learning_rate": 5.6946000000000005e-06, "loss": 1.084, "step": 21540 }, { "epoch": 2.155, "grad_norm": 18.40146827697754, "learning_rate": 5.6926e-06, "loss": 1.4468, "step": 21550 }, { "epoch": 2.156, "grad_norm": 13.06874942779541, "learning_rate": 5.6906e-06, "loss": 1.0757, "step": 21560 }, { "epoch": 2.157, "grad_norm": 22.4668025970459, "learning_rate": 5.688600000000001e-06, "loss": 1.2014, "step": 21570 }, { "epoch": 2.158, "grad_norm": 17.596046447753906, "learning_rate": 5.686600000000001e-06, "loss": 1.0823, "step": 21580 }, { "epoch": 2.159, "grad_norm": 13.486132621765137, "learning_rate": 5.6846e-06, "loss": 0.8484, "step": 21590 }, { "epoch": 2.16, "grad_norm": 15.269599914550781, "learning_rate": 5.6826000000000004e-06, "loss": 1.411, "step": 21600 }, { "epoch": 2.161, "grad_norm": 13.218903541564941, "learning_rate": 5.6806e-06, "loss": 1.5654, "step": 21610 }, { "epoch": 2.162, "grad_norm": 11.5575532913208, "learning_rate": 5.678600000000001e-06, "loss": 1.1833, "step": 21620 }, { "epoch": 2.163, "grad_norm": 13.407923698425293, "learning_rate": 5.676600000000001e-06, "loss": 1.4647, "step": 21630 }, { "epoch": 2.164, "grad_norm": 9.270468711853027, "learning_rate": 5.6746e-06, "loss": 1.5015, "step": 21640 }, { "epoch": 2.165, "grad_norm": 7.586433410644531, "learning_rate": 5.6726000000000005e-06, "loss": 1.3256, "step": 21650 }, { "epoch": 2.166, "grad_norm": 7.8579254150390625, "learning_rate": 5.6706e-06, "loss": 1.3053, "step": 21660 }, { "epoch": 2.167, "grad_norm": 10.299223899841309, "learning_rate": 5.668600000000001e-06, "loss": 1.0897, "step": 21670 }, { "epoch": 2.168, "grad_norm": 11.130850791931152, "learning_rate": 5.666600000000001e-06, "loss": 1.2698, "step": 21680 }, { "epoch": 2.169, "grad_norm": 10.24389362335205, "learning_rate": 5.6646e-06, "loss": 1.2051, "step": 21690 }, { "epoch": 2.17, "grad_norm": 8.64181900024414, "learning_rate": 5.662600000000001e-06, "loss": 1.4917, "step": 21700 }, { "epoch": 2.171, "grad_norm": 10.065075874328613, "learning_rate": 5.6606000000000005e-06, "loss": 1.2284, "step": 21710 }, { "epoch": 2.172, "grad_norm": 9.187067985534668, "learning_rate": 5.6586e-06, "loss": 1.3018, "step": 21720 }, { "epoch": 2.173, "grad_norm": 8.424951553344727, "learning_rate": 5.656600000000001e-06, "loss": 1.1876, "step": 21730 }, { "epoch": 2.174, "grad_norm": 7.235924243927002, "learning_rate": 5.6546e-06, "loss": 1.3295, "step": 21740 }, { "epoch": 2.175, "grad_norm": 11.853707313537598, "learning_rate": 5.6526e-06, "loss": 1.3529, "step": 21750 }, { "epoch": 2.176, "grad_norm": 9.206238746643066, "learning_rate": 5.650600000000001e-06, "loss": 1.0752, "step": 21760 }, { "epoch": 2.177, "grad_norm": 8.951179504394531, "learning_rate": 5.6486000000000004e-06, "loss": 1.377, "step": 21770 }, { "epoch": 2.178, "grad_norm": 12.429231643676758, "learning_rate": 5.646600000000001e-06, "loss": 1.3217, "step": 21780 }, { "epoch": 2.179, "grad_norm": 10.023266792297363, "learning_rate": 5.6446e-06, "loss": 1.4005, "step": 21790 }, { "epoch": 2.18, "grad_norm": 11.489121437072754, "learning_rate": 5.6426e-06, "loss": 1.1198, "step": 21800 }, { "epoch": 2.181, "grad_norm": 14.550138473510742, "learning_rate": 5.640600000000001e-06, "loss": 1.3565, "step": 21810 }, { "epoch": 2.182, "grad_norm": 14.225072860717773, "learning_rate": 5.6386000000000005e-06, "loss": 1.2437, "step": 21820 }, { "epoch": 2.183, "grad_norm": 11.032149314880371, "learning_rate": 5.636600000000001e-06, "loss": 1.2676, "step": 21830 }, { "epoch": 2.184, "grad_norm": 12.206518173217773, "learning_rate": 5.6346e-06, "loss": 1.2763, "step": 21840 }, { "epoch": 2.185, "grad_norm": 9.846009254455566, "learning_rate": 5.6326e-06, "loss": 1.3996, "step": 21850 }, { "epoch": 2.186, "grad_norm": 12.982499122619629, "learning_rate": 5.630600000000001e-06, "loss": 1.5355, "step": 21860 }, { "epoch": 2.187, "grad_norm": 10.666458129882812, "learning_rate": 5.628600000000001e-06, "loss": 1.2678, "step": 21870 }, { "epoch": 2.188, "grad_norm": 4.582517147064209, "learning_rate": 5.626600000000001e-06, "loss": 1.0917, "step": 21880 }, { "epoch": 2.189, "grad_norm": 5.828147888183594, "learning_rate": 5.6246e-06, "loss": 1.2607, "step": 21890 }, { "epoch": 2.19, "grad_norm": 5.398689270019531, "learning_rate": 5.6226e-06, "loss": 1.2599, "step": 21900 }, { "epoch": 2.191, "grad_norm": 10.559182167053223, "learning_rate": 5.620600000000001e-06, "loss": 1.3167, "step": 21910 }, { "epoch": 2.192, "grad_norm": 9.884818077087402, "learning_rate": 5.618600000000001e-06, "loss": 1.3074, "step": 21920 }, { "epoch": 2.193, "grad_norm": 4.861186504364014, "learning_rate": 5.6166000000000006e-06, "loss": 1.108, "step": 21930 }, { "epoch": 2.194, "grad_norm": 8.665057182312012, "learning_rate": 5.6146e-06, "loss": 1.026, "step": 21940 }, { "epoch": 2.195, "grad_norm": 13.082036972045898, "learning_rate": 5.6126e-06, "loss": 0.929, "step": 21950 }, { "epoch": 2.196, "grad_norm": 8.794427871704102, "learning_rate": 5.6106e-06, "loss": 1.1623, "step": 21960 }, { "epoch": 2.197, "grad_norm": 7.82203483581543, "learning_rate": 5.608600000000001e-06, "loss": 1.5323, "step": 21970 }, { "epoch": 2.198, "grad_norm": 11.010884284973145, "learning_rate": 5.606600000000001e-06, "loss": 1.4551, "step": 21980 }, { "epoch": 2.199, "grad_norm": 8.048137664794922, "learning_rate": 5.6046e-06, "loss": 1.1592, "step": 21990 }, { "epoch": 2.2, "grad_norm": 10.88292407989502, "learning_rate": 5.6026e-06, "loss": 1.1465, "step": 22000 }, { "epoch": 2.201, "grad_norm": 10.111478805541992, "learning_rate": 5.6006e-06, "loss": 1.4093, "step": 22010 }, { "epoch": 2.202, "grad_norm": 8.807930946350098, "learning_rate": 5.598600000000001e-06, "loss": 1.4189, "step": 22020 }, { "epoch": 2.203, "grad_norm": 9.214773178100586, "learning_rate": 5.596600000000001e-06, "loss": 1.1952, "step": 22030 }, { "epoch": 2.204, "grad_norm": 5.243673324584961, "learning_rate": 5.5946e-06, "loss": 1.3199, "step": 22040 }, { "epoch": 2.205, "grad_norm": 10.847339630126953, "learning_rate": 5.5926000000000005e-06, "loss": 1.4612, "step": 22050 }, { "epoch": 2.206, "grad_norm": 10.307455062866211, "learning_rate": 5.5906e-06, "loss": 1.1696, "step": 22060 }, { "epoch": 2.207, "grad_norm": 9.691621780395508, "learning_rate": 5.588600000000001e-06, "loss": 1.2542, "step": 22070 }, { "epoch": 2.208, "grad_norm": 8.657464027404785, "learning_rate": 5.586600000000001e-06, "loss": 1.1465, "step": 22080 }, { "epoch": 2.209, "grad_norm": 6.987508296966553, "learning_rate": 5.5846e-06, "loss": 1.4316, "step": 22090 }, { "epoch": 2.21, "grad_norm": 8.247391700744629, "learning_rate": 5.5826000000000006e-06, "loss": 1.4881, "step": 22100 }, { "epoch": 2.211, "grad_norm": 28.656522750854492, "learning_rate": 5.5806e-06, "loss": 1.2171, "step": 22110 }, { "epoch": 2.212, "grad_norm": 7.986879348754883, "learning_rate": 5.578600000000001e-06, "loss": 1.3549, "step": 22120 }, { "epoch": 2.213, "grad_norm": 5.683054447174072, "learning_rate": 5.576600000000001e-06, "loss": 1.1965, "step": 22130 }, { "epoch": 2.214, "grad_norm": 7.406133651733398, "learning_rate": 5.5746e-06, "loss": 1.424, "step": 22140 }, { "epoch": 2.215, "grad_norm": 5.793790817260742, "learning_rate": 5.572600000000001e-06, "loss": 1.1521, "step": 22150 }, { "epoch": 2.216, "grad_norm": 6.853982925415039, "learning_rate": 5.5706000000000005e-06, "loss": 1.4042, "step": 22160 }, { "epoch": 2.217, "grad_norm": 6.123567581176758, "learning_rate": 5.5686e-06, "loss": 1.2953, "step": 22170 }, { "epoch": 2.218, "grad_norm": 10.586806297302246, "learning_rate": 5.566600000000001e-06, "loss": 1.0775, "step": 22180 }, { "epoch": 2.219, "grad_norm": 6.696011543273926, "learning_rate": 5.5646e-06, "loss": 0.9103, "step": 22190 }, { "epoch": 2.22, "grad_norm": 8.582566261291504, "learning_rate": 5.5626e-06, "loss": 1.292, "step": 22200 }, { "epoch": 2.221, "grad_norm": 13.480894088745117, "learning_rate": 5.560600000000001e-06, "loss": 1.0996, "step": 22210 }, { "epoch": 2.222, "grad_norm": 6.7279839515686035, "learning_rate": 5.5586000000000004e-06, "loss": 1.3479, "step": 22220 }, { "epoch": 2.223, "grad_norm": 13.075544357299805, "learning_rate": 5.556600000000001e-06, "loss": 1.1347, "step": 22230 }, { "epoch": 2.224, "grad_norm": 8.447406768798828, "learning_rate": 5.5546e-06, "loss": 1.1557, "step": 22240 }, { "epoch": 2.225, "grad_norm": 9.735660552978516, "learning_rate": 5.5526e-06, "loss": 1.4598, "step": 22250 }, { "epoch": 2.226, "grad_norm": 11.862610816955566, "learning_rate": 5.550600000000001e-06, "loss": 1.0291, "step": 22260 }, { "epoch": 2.227, "grad_norm": 8.65831184387207, "learning_rate": 5.5486000000000005e-06, "loss": 1.7534, "step": 22270 }, { "epoch": 2.228, "grad_norm": 13.28524398803711, "learning_rate": 5.546600000000001e-06, "loss": 1.1519, "step": 22280 }, { "epoch": 2.229, "grad_norm": 5.044071674346924, "learning_rate": 5.5446e-06, "loss": 1.0504, "step": 22290 }, { "epoch": 2.23, "grad_norm": 5.706343173980713, "learning_rate": 5.5426e-06, "loss": 1.0858, "step": 22300 }, { "epoch": 2.231, "grad_norm": 9.080405235290527, "learning_rate": 5.540600000000001e-06, "loss": 1.3628, "step": 22310 }, { "epoch": 2.232, "grad_norm": 9.303692817687988, "learning_rate": 5.538600000000001e-06, "loss": 1.2831, "step": 22320 }, { "epoch": 2.233, "grad_norm": 20.242961883544922, "learning_rate": 5.5366000000000005e-06, "loss": 1.3622, "step": 22330 }, { "epoch": 2.234, "grad_norm": 8.471404075622559, "learning_rate": 5.5346e-06, "loss": 1.1554, "step": 22340 }, { "epoch": 2.235, "grad_norm": 8.011940002441406, "learning_rate": 5.5326e-06, "loss": 1.0231, "step": 22350 }, { "epoch": 2.2359999999999998, "grad_norm": 13.45334529876709, "learning_rate": 5.5306e-06, "loss": 1.1945, "step": 22360 }, { "epoch": 2.237, "grad_norm": 8.338071823120117, "learning_rate": 5.528600000000001e-06, "loss": 1.2589, "step": 22370 }, { "epoch": 2.238, "grad_norm": 8.586181640625, "learning_rate": 5.526600000000001e-06, "loss": 1.129, "step": 22380 }, { "epoch": 2.239, "grad_norm": 8.471375465393066, "learning_rate": 5.5246e-06, "loss": 1.3513, "step": 22390 }, { "epoch": 2.24, "grad_norm": 7.68324089050293, "learning_rate": 5.5226e-06, "loss": 1.089, "step": 22400 }, { "epoch": 2.241, "grad_norm": 12.676066398620605, "learning_rate": 5.5206e-06, "loss": 1.175, "step": 22410 }, { "epoch": 2.242, "grad_norm": 7.4438557624816895, "learning_rate": 5.518600000000001e-06, "loss": 1.1331, "step": 22420 }, { "epoch": 2.243, "grad_norm": 12.723482131958008, "learning_rate": 5.516600000000001e-06, "loss": 1.5297, "step": 22430 }, { "epoch": 2.2439999999999998, "grad_norm": 19.047794342041016, "learning_rate": 5.5146e-06, "loss": 1.4948, "step": 22440 }, { "epoch": 2.245, "grad_norm": 14.310566902160645, "learning_rate": 5.5126e-06, "loss": 1.4248, "step": 22450 }, { "epoch": 2.246, "grad_norm": 10.241689682006836, "learning_rate": 5.5106e-06, "loss": 1.3848, "step": 22460 }, { "epoch": 2.247, "grad_norm": 16.529415130615234, "learning_rate": 5.508600000000001e-06, "loss": 0.9871, "step": 22470 }, { "epoch": 2.248, "grad_norm": 10.888535499572754, "learning_rate": 5.506600000000001e-06, "loss": 1.175, "step": 22480 }, { "epoch": 2.249, "grad_norm": 11.143275260925293, "learning_rate": 5.5046e-06, "loss": 1.0882, "step": 22490 }, { "epoch": 2.25, "grad_norm": 10.507320404052734, "learning_rate": 5.5026000000000005e-06, "loss": 1.2492, "step": 22500 }, { "epoch": 2.251, "grad_norm": 14.305598258972168, "learning_rate": 5.5006e-06, "loss": 1.0122, "step": 22510 }, { "epoch": 2.252, "grad_norm": 13.53376579284668, "learning_rate": 5.498600000000001e-06, "loss": 1.4048, "step": 22520 }, { "epoch": 2.253, "grad_norm": 9.51317310333252, "learning_rate": 5.496600000000001e-06, "loss": 1.279, "step": 22530 }, { "epoch": 2.254, "grad_norm": 8.208245277404785, "learning_rate": 5.4946e-06, "loss": 0.9466, "step": 22540 }, { "epoch": 2.255, "grad_norm": 26.110170364379883, "learning_rate": 5.4926000000000006e-06, "loss": 1.2996, "step": 22550 }, { "epoch": 2.2560000000000002, "grad_norm": 11.034186363220215, "learning_rate": 5.4906e-06, "loss": 1.1808, "step": 22560 }, { "epoch": 2.257, "grad_norm": 19.216848373413086, "learning_rate": 5.4886e-06, "loss": 1.4543, "step": 22570 }, { "epoch": 2.258, "grad_norm": 13.166850090026855, "learning_rate": 5.486600000000001e-06, "loss": 1.3837, "step": 22580 }, { "epoch": 2.259, "grad_norm": 11.04210090637207, "learning_rate": 5.4846e-06, "loss": 1.0819, "step": 22590 }, { "epoch": 2.26, "grad_norm": 8.344645500183105, "learning_rate": 5.4826e-06, "loss": 1.1535, "step": 22600 }, { "epoch": 2.261, "grad_norm": 17.270158767700195, "learning_rate": 5.4806000000000005e-06, "loss": 1.4068, "step": 22610 }, { "epoch": 2.262, "grad_norm": 8.952864646911621, "learning_rate": 5.4786e-06, "loss": 1.2565, "step": 22620 }, { "epoch": 2.263, "grad_norm": 12.726444244384766, "learning_rate": 5.476600000000001e-06, "loss": 1.6248, "step": 22630 }, { "epoch": 2.2640000000000002, "grad_norm": 8.026226997375488, "learning_rate": 5.4746e-06, "loss": 1.3197, "step": 22640 }, { "epoch": 2.265, "grad_norm": 13.65671157836914, "learning_rate": 5.4726e-06, "loss": 1.4576, "step": 22650 }, { "epoch": 2.266, "grad_norm": 15.084198951721191, "learning_rate": 5.470600000000001e-06, "loss": 1.4266, "step": 22660 }, { "epoch": 2.267, "grad_norm": 7.077334880828857, "learning_rate": 5.4686000000000005e-06, "loss": 1.1332, "step": 22670 }, { "epoch": 2.268, "grad_norm": 7.264120578765869, "learning_rate": 5.466600000000001e-06, "loss": 0.87, "step": 22680 }, { "epoch": 2.269, "grad_norm": 6.307496547698975, "learning_rate": 5.4646e-06, "loss": 1.3693, "step": 22690 }, { "epoch": 2.27, "grad_norm": 9.293829917907715, "learning_rate": 5.4626e-06, "loss": 1.2385, "step": 22700 }, { "epoch": 2.271, "grad_norm": 9.18850040435791, "learning_rate": 5.460600000000001e-06, "loss": 1.3172, "step": 22710 }, { "epoch": 2.2720000000000002, "grad_norm": 10.206730842590332, "learning_rate": 5.4586000000000006e-06, "loss": 1.3253, "step": 22720 }, { "epoch": 2.273, "grad_norm": 12.961861610412598, "learning_rate": 5.4566e-06, "loss": 1.1689, "step": 22730 }, { "epoch": 2.274, "grad_norm": 8.449858665466309, "learning_rate": 5.4546e-06, "loss": 0.9109, "step": 22740 }, { "epoch": 2.275, "grad_norm": 10.943973541259766, "learning_rate": 5.4526e-06, "loss": 1.1515, "step": 22750 }, { "epoch": 2.276, "grad_norm": 16.4788761138916, "learning_rate": 5.4506e-06, "loss": 1.2697, "step": 22760 }, { "epoch": 2.277, "grad_norm": 10.368868827819824, "learning_rate": 5.448600000000001e-06, "loss": 1.1936, "step": 22770 }, { "epoch": 2.278, "grad_norm": 8.941060066223145, "learning_rate": 5.4466000000000005e-06, "loss": 1.2329, "step": 22780 }, { "epoch": 2.279, "grad_norm": 10.879658699035645, "learning_rate": 5.444600000000001e-06, "loss": 1.3362, "step": 22790 }, { "epoch": 2.2800000000000002, "grad_norm": 8.639434814453125, "learning_rate": 5.4426e-06, "loss": 1.1566, "step": 22800 }, { "epoch": 2.281, "grad_norm": 10.775321960449219, "learning_rate": 5.4406e-06, "loss": 1.0648, "step": 22810 }, { "epoch": 2.282, "grad_norm": 9.480378150939941, "learning_rate": 5.438600000000001e-06, "loss": 1.0926, "step": 22820 }, { "epoch": 2.283, "grad_norm": 10.367137908935547, "learning_rate": 5.436600000000001e-06, "loss": 1.5026, "step": 22830 }, { "epoch": 2.284, "grad_norm": 3.5830249786376953, "learning_rate": 5.434600000000001e-06, "loss": 1.1268, "step": 22840 }, { "epoch": 2.285, "grad_norm": 11.447843551635742, "learning_rate": 5.4326e-06, "loss": 1.2587, "step": 22850 }, { "epoch": 2.286, "grad_norm": 17.20610237121582, "learning_rate": 5.4306e-06, "loss": 1.5133, "step": 22860 }, { "epoch": 2.287, "grad_norm": 7.152506351470947, "learning_rate": 5.428600000000001e-06, "loss": 1.2009, "step": 22870 }, { "epoch": 2.288, "grad_norm": 11.042107582092285, "learning_rate": 5.426600000000001e-06, "loss": 0.995, "step": 22880 }, { "epoch": 2.289, "grad_norm": 13.691304206848145, "learning_rate": 5.424600000000001e-06, "loss": 0.9361, "step": 22890 }, { "epoch": 2.29, "grad_norm": 12.811963081359863, "learning_rate": 5.4226e-06, "loss": 1.3763, "step": 22900 }, { "epoch": 2.291, "grad_norm": 9.776972770690918, "learning_rate": 5.4206e-06, "loss": 1.1546, "step": 22910 }, { "epoch": 2.292, "grad_norm": 11.64928913116455, "learning_rate": 5.418600000000001e-06, "loss": 1.1234, "step": 22920 }, { "epoch": 2.293, "grad_norm": 13.304095268249512, "learning_rate": 5.416600000000001e-06, "loss": 0.9275, "step": 22930 }, { "epoch": 2.294, "grad_norm": 17.260528564453125, "learning_rate": 5.414600000000001e-06, "loss": 1.4321, "step": 22940 }, { "epoch": 2.295, "grad_norm": 13.52609920501709, "learning_rate": 5.4126000000000005e-06, "loss": 1.7417, "step": 22950 }, { "epoch": 2.296, "grad_norm": 12.572790145874023, "learning_rate": 5.4106e-06, "loss": 1.0007, "step": 22960 }, { "epoch": 2.297, "grad_norm": 8.41349983215332, "learning_rate": 5.4086e-06, "loss": 1.2643, "step": 22970 }, { "epoch": 2.298, "grad_norm": 14.40434455871582, "learning_rate": 5.406600000000001e-06, "loss": 1.6647, "step": 22980 }, { "epoch": 2.299, "grad_norm": 10.715378761291504, "learning_rate": 5.404600000000001e-06, "loss": 1.0832, "step": 22990 }, { "epoch": 2.3, "grad_norm": 18.76213264465332, "learning_rate": 5.4026e-06, "loss": 1.3152, "step": 23000 }, { "epoch": 2.301, "grad_norm": 18.19464683532715, "learning_rate": 5.4006000000000004e-06, "loss": 1.3884, "step": 23010 }, { "epoch": 2.302, "grad_norm": 14.572473526000977, "learning_rate": 5.3986e-06, "loss": 1.2597, "step": 23020 }, { "epoch": 2.303, "grad_norm": 15.225605010986328, "learning_rate": 5.396600000000001e-06, "loss": 1.0289, "step": 23030 }, { "epoch": 2.304, "grad_norm": 14.167146682739258, "learning_rate": 5.394600000000001e-06, "loss": 1.4605, "step": 23040 }, { "epoch": 2.305, "grad_norm": 22.412216186523438, "learning_rate": 5.3926e-06, "loss": 1.3453, "step": 23050 }, { "epoch": 2.306, "grad_norm": 11.043853759765625, "learning_rate": 5.3906000000000005e-06, "loss": 1.278, "step": 23060 }, { "epoch": 2.307, "grad_norm": 14.686028480529785, "learning_rate": 5.3886e-06, "loss": 1.2475, "step": 23070 }, { "epoch": 2.308, "grad_norm": 20.43958282470703, "learning_rate": 5.386600000000001e-06, "loss": 1.3043, "step": 23080 }, { "epoch": 2.309, "grad_norm": 6.865183353424072, "learning_rate": 5.384600000000001e-06, "loss": 1.0491, "step": 23090 }, { "epoch": 2.31, "grad_norm": 12.705528259277344, "learning_rate": 5.3826e-06, "loss": 1.2093, "step": 23100 }, { "epoch": 2.311, "grad_norm": 5.040121555328369, "learning_rate": 5.3808e-06, "loss": 0.7982, "step": 23110 }, { "epoch": 2.312, "grad_norm": 9.07545280456543, "learning_rate": 5.378800000000001e-06, "loss": 1.2325, "step": 23120 }, { "epoch": 2.313, "grad_norm": 15.09417724609375, "learning_rate": 5.376800000000001e-06, "loss": 1.2077, "step": 23130 }, { "epoch": 2.314, "grad_norm": 15.135221481323242, "learning_rate": 5.3748e-06, "loss": 1.4887, "step": 23140 }, { "epoch": 2.315, "grad_norm": 14.838386535644531, "learning_rate": 5.3728000000000005e-06, "loss": 1.7409, "step": 23150 }, { "epoch": 2.316, "grad_norm": 15.028022766113281, "learning_rate": 5.3708e-06, "loss": 0.9885, "step": 23160 }, { "epoch": 2.317, "grad_norm": 7.591363430023193, "learning_rate": 5.368800000000001e-06, "loss": 1.1306, "step": 23170 }, { "epoch": 2.318, "grad_norm": 13.363445281982422, "learning_rate": 5.366800000000001e-06, "loss": 1.2976, "step": 23180 }, { "epoch": 2.319, "grad_norm": 40.85466003417969, "learning_rate": 5.3648e-06, "loss": 1.3988, "step": 23190 }, { "epoch": 2.32, "grad_norm": 14.200333595275879, "learning_rate": 5.3628000000000006e-06, "loss": 1.175, "step": 23200 }, { "epoch": 2.321, "grad_norm": 15.561279296875, "learning_rate": 5.3608e-06, "loss": 0.9477, "step": 23210 }, { "epoch": 2.322, "grad_norm": 2.5632102489471436, "learning_rate": 5.3588e-06, "loss": 1.2427, "step": 23220 }, { "epoch": 2.323, "grad_norm": 4.13681697845459, "learning_rate": 5.356800000000001e-06, "loss": 1.2345, "step": 23230 }, { "epoch": 2.324, "grad_norm": 16.059484481811523, "learning_rate": 5.355e-06, "loss": 1.247, "step": 23240 }, { "epoch": 2.325, "grad_norm": 9.758614540100098, "learning_rate": 5.353e-06, "loss": 1.3734, "step": 23250 }, { "epoch": 2.326, "grad_norm": 17.958019256591797, "learning_rate": 5.351000000000001e-06, "loss": 1.0631, "step": 23260 }, { "epoch": 2.327, "grad_norm": 16.632963180541992, "learning_rate": 5.349000000000001e-06, "loss": 1.2471, "step": 23270 }, { "epoch": 2.328, "grad_norm": 11.678964614868164, "learning_rate": 5.347e-06, "loss": 1.0634, "step": 23280 }, { "epoch": 2.329, "grad_norm": 10.8238525390625, "learning_rate": 5.3450000000000005e-06, "loss": 1.4298, "step": 23290 }, { "epoch": 2.33, "grad_norm": 9.760405540466309, "learning_rate": 5.343e-06, "loss": 1.2593, "step": 23300 }, { "epoch": 2.331, "grad_norm": 16.072694778442383, "learning_rate": 5.341000000000001e-06, "loss": 1.0543, "step": 23310 }, { "epoch": 2.332, "grad_norm": 18.025001525878906, "learning_rate": 5.339000000000001e-06, "loss": 1.2941, "step": 23320 }, { "epoch": 2.333, "grad_norm": 9.332871437072754, "learning_rate": 5.337e-06, "loss": 1.0489, "step": 23330 }, { "epoch": 2.334, "grad_norm": 9.917584419250488, "learning_rate": 5.335000000000001e-06, "loss": 1.5029, "step": 23340 }, { "epoch": 2.335, "grad_norm": 14.89936351776123, "learning_rate": 5.3330000000000004e-06, "loss": 1.1345, "step": 23350 }, { "epoch": 2.336, "grad_norm": 9.956517219543457, "learning_rate": 5.331e-06, "loss": 1.0849, "step": 23360 }, { "epoch": 2.337, "grad_norm": 12.565289497375488, "learning_rate": 5.329000000000001e-06, "loss": 1.0988, "step": 23370 }, { "epoch": 2.338, "grad_norm": 5.757551670074463, "learning_rate": 5.327e-06, "loss": 0.9565, "step": 23380 }, { "epoch": 2.339, "grad_norm": 12.883478164672852, "learning_rate": 5.325e-06, "loss": 1.3874, "step": 23390 }, { "epoch": 2.34, "grad_norm": 9.466730117797852, "learning_rate": 5.3230000000000005e-06, "loss": 1.0231, "step": 23400 }, { "epoch": 2.341, "grad_norm": 19.090383529663086, "learning_rate": 5.321e-06, "loss": 1.3984, "step": 23410 }, { "epoch": 2.342, "grad_norm": 9.475041389465332, "learning_rate": 5.319000000000001e-06, "loss": 1.0983, "step": 23420 }, { "epoch": 2.343, "grad_norm": 11.125555992126465, "learning_rate": 5.317e-06, "loss": 1.2932, "step": 23430 }, { "epoch": 2.344, "grad_norm": 4.500854015350342, "learning_rate": 5.315e-06, "loss": 1.138, "step": 23440 }, { "epoch": 2.3449999999999998, "grad_norm": 12.107810020446777, "learning_rate": 5.313000000000001e-06, "loss": 1.1493, "step": 23450 }, { "epoch": 2.346, "grad_norm": 9.1337890625, "learning_rate": 5.3110000000000005e-06, "loss": 1.2079, "step": 23460 }, { "epoch": 2.347, "grad_norm": 12.877277374267578, "learning_rate": 5.309000000000001e-06, "loss": 1.2121, "step": 23470 }, { "epoch": 2.348, "grad_norm": 13.884976387023926, "learning_rate": 5.307e-06, "loss": 1.5234, "step": 23480 }, { "epoch": 2.349, "grad_norm": 7.24168586730957, "learning_rate": 5.305e-06, "loss": 1.357, "step": 23490 }, { "epoch": 2.35, "grad_norm": 12.99600601196289, "learning_rate": 5.303000000000001e-06, "loss": 1.2899, "step": 23500 }, { "epoch": 2.351, "grad_norm": 9.401188850402832, "learning_rate": 5.3010000000000006e-06, "loss": 1.0223, "step": 23510 }, { "epoch": 2.352, "grad_norm": 14.33315372467041, "learning_rate": 5.2990000000000004e-06, "loss": 1.2943, "step": 23520 }, { "epoch": 2.3529999999999998, "grad_norm": 25.65981674194336, "learning_rate": 5.297e-06, "loss": 1.4906, "step": 23530 }, { "epoch": 2.354, "grad_norm": 4.736303329467773, "learning_rate": 5.295e-06, "loss": 1.015, "step": 23540 }, { "epoch": 2.355, "grad_norm": 12.778898239135742, "learning_rate": 5.293e-06, "loss": 1.5305, "step": 23550 }, { "epoch": 2.356, "grad_norm": 11.301053047180176, "learning_rate": 5.291000000000001e-06, "loss": 1.5079, "step": 23560 }, { "epoch": 2.357, "grad_norm": 11.399186134338379, "learning_rate": 5.2890000000000005e-06, "loss": 1.2815, "step": 23570 }, { "epoch": 2.358, "grad_norm": 3.763278007507324, "learning_rate": 5.2869999999999995e-06, "loss": 1.1386, "step": 23580 }, { "epoch": 2.359, "grad_norm": 9.968294143676758, "learning_rate": 5.285e-06, "loss": 1.1293, "step": 23590 }, { "epoch": 2.36, "grad_norm": 8.61181354522705, "learning_rate": 5.283e-06, "loss": 1.4135, "step": 23600 }, { "epoch": 2.3609999999999998, "grad_norm": 10.00186824798584, "learning_rate": 5.281000000000001e-06, "loss": 1.3307, "step": 23610 }, { "epoch": 2.362, "grad_norm": 6.54819393157959, "learning_rate": 5.279000000000001e-06, "loss": 1.0753, "step": 23620 }, { "epoch": 2.363, "grad_norm": 7.2237114906311035, "learning_rate": 5.277e-06, "loss": 0.9287, "step": 23630 }, { "epoch": 2.364, "grad_norm": 9.761820793151855, "learning_rate": 5.275e-06, "loss": 1.6771, "step": 23640 }, { "epoch": 2.365, "grad_norm": 7.802646160125732, "learning_rate": 5.273e-06, "loss": 1.3068, "step": 23650 }, { "epoch": 2.366, "grad_norm": 11.063340187072754, "learning_rate": 5.271000000000001e-06, "loss": 1.3893, "step": 23660 }, { "epoch": 2.367, "grad_norm": 9.857451438903809, "learning_rate": 5.269000000000001e-06, "loss": 1.2879, "step": 23670 }, { "epoch": 2.368, "grad_norm": 5.861509799957275, "learning_rate": 5.267e-06, "loss": 1.2032, "step": 23680 }, { "epoch": 2.3689999999999998, "grad_norm": 14.61808967590332, "learning_rate": 5.265e-06, "loss": 1.3297, "step": 23690 }, { "epoch": 2.37, "grad_norm": 12.182709693908691, "learning_rate": 5.263e-06, "loss": 1.1614, "step": 23700 }, { "epoch": 2.371, "grad_norm": 10.071157455444336, "learning_rate": 5.261000000000001e-06, "loss": 1.1501, "step": 23710 }, { "epoch": 2.372, "grad_norm": 11.942830085754395, "learning_rate": 5.259000000000001e-06, "loss": 1.524, "step": 23720 }, { "epoch": 2.373, "grad_norm": 12.14952564239502, "learning_rate": 5.257e-06, "loss": 1.1549, "step": 23730 }, { "epoch": 2.374, "grad_norm": 12.79142951965332, "learning_rate": 5.2550000000000005e-06, "loss": 1.3113, "step": 23740 }, { "epoch": 2.375, "grad_norm": 13.655325889587402, "learning_rate": 5.253e-06, "loss": 1.2593, "step": 23750 }, { "epoch": 2.376, "grad_norm": 6.805649280548096, "learning_rate": 5.251e-06, "loss": 1.1454, "step": 23760 }, { "epoch": 2.377, "grad_norm": 15.38922119140625, "learning_rate": 5.249000000000001e-06, "loss": 1.2722, "step": 23770 }, { "epoch": 2.378, "grad_norm": 7.095585346221924, "learning_rate": 5.247000000000001e-06, "loss": 0.9897, "step": 23780 }, { "epoch": 2.379, "grad_norm": 8.356823921203613, "learning_rate": 5.245e-06, "loss": 1.1524, "step": 23790 }, { "epoch": 2.38, "grad_norm": 13.172138214111328, "learning_rate": 5.2430000000000005e-06, "loss": 0.9835, "step": 23800 }, { "epoch": 2.3810000000000002, "grad_norm": 9.999981880187988, "learning_rate": 5.241e-06, "loss": 1.1563, "step": 23810 }, { "epoch": 2.382, "grad_norm": 8.065911293029785, "learning_rate": 5.239000000000001e-06, "loss": 1.1181, "step": 23820 }, { "epoch": 2.383, "grad_norm": 18.793659210205078, "learning_rate": 5.237000000000001e-06, "loss": 1.0101, "step": 23830 }, { "epoch": 2.384, "grad_norm": 13.889545440673828, "learning_rate": 5.235e-06, "loss": 0.9996, "step": 23840 }, { "epoch": 2.385, "grad_norm": 11.8282470703125, "learning_rate": 5.2330000000000005e-06, "loss": 1.3268, "step": 23850 }, { "epoch": 2.386, "grad_norm": 20.201583862304688, "learning_rate": 5.231e-06, "loss": 1.0372, "step": 23860 }, { "epoch": 2.387, "grad_norm": 11.244546890258789, "learning_rate": 5.229000000000001e-06, "loss": 1.4243, "step": 23870 }, { "epoch": 2.388, "grad_norm": 10.571415901184082, "learning_rate": 5.227000000000001e-06, "loss": 1.471, "step": 23880 }, { "epoch": 2.3890000000000002, "grad_norm": 7.211154460906982, "learning_rate": 5.225e-06, "loss": 1.2841, "step": 23890 }, { "epoch": 2.39, "grad_norm": 13.82706356048584, "learning_rate": 5.223000000000001e-06, "loss": 1.5018, "step": 23900 }, { "epoch": 2.391, "grad_norm": 7.877462863922119, "learning_rate": 5.2210000000000005e-06, "loss": 1.024, "step": 23910 }, { "epoch": 2.392, "grad_norm": 7.223837852478027, "learning_rate": 5.219e-06, "loss": 1.3082, "step": 23920 }, { "epoch": 2.393, "grad_norm": 8.009160995483398, "learning_rate": 5.217000000000001e-06, "loss": 1.1259, "step": 23930 }, { "epoch": 2.394, "grad_norm": 15.286426544189453, "learning_rate": 5.215e-06, "loss": 1.4169, "step": 23940 }, { "epoch": 2.395, "grad_norm": 9.313706398010254, "learning_rate": 5.213e-06, "loss": 1.2741, "step": 23950 }, { "epoch": 2.396, "grad_norm": 7.3609299659729, "learning_rate": 5.211000000000001e-06, "loss": 0.8381, "step": 23960 }, { "epoch": 2.3970000000000002, "grad_norm": 9.206737518310547, "learning_rate": 5.2090000000000004e-06, "loss": 1.0569, "step": 23970 }, { "epoch": 2.398, "grad_norm": 13.066150665283203, "learning_rate": 5.207000000000001e-06, "loss": 1.3572, "step": 23980 }, { "epoch": 2.399, "grad_norm": 10.896574974060059, "learning_rate": 5.205e-06, "loss": 1.3516, "step": 23990 }, { "epoch": 2.4, "grad_norm": 12.532232284545898, "learning_rate": 5.203e-06, "loss": 1.1421, "step": 24000 }, { "epoch": 2.401, "grad_norm": 9.322113990783691, "learning_rate": 5.201000000000001e-06, "loss": 1.0456, "step": 24010 }, { "epoch": 2.402, "grad_norm": 16.16337013244629, "learning_rate": 5.1990000000000005e-06, "loss": 1.1896, "step": 24020 }, { "epoch": 2.403, "grad_norm": 11.655999183654785, "learning_rate": 5.197000000000001e-06, "loss": 1.0523, "step": 24030 }, { "epoch": 2.404, "grad_norm": 9.204489707946777, "learning_rate": 5.195e-06, "loss": 1.4828, "step": 24040 }, { "epoch": 2.4050000000000002, "grad_norm": 6.11624813079834, "learning_rate": 5.193e-06, "loss": 1.2332, "step": 24050 }, { "epoch": 2.406, "grad_norm": 8.199590682983398, "learning_rate": 5.191000000000001e-06, "loss": 1.0405, "step": 24060 }, { "epoch": 2.407, "grad_norm": 16.67262840270996, "learning_rate": 5.189000000000001e-06, "loss": 1.525, "step": 24070 }, { "epoch": 2.408, "grad_norm": 18.354745864868164, "learning_rate": 5.187000000000001e-06, "loss": 1.1378, "step": 24080 }, { "epoch": 2.409, "grad_norm": 14.36613655090332, "learning_rate": 5.185e-06, "loss": 1.1876, "step": 24090 }, { "epoch": 2.41, "grad_norm": 11.032489776611328, "learning_rate": 5.183e-06, "loss": 1.4759, "step": 24100 }, { "epoch": 2.411, "grad_norm": 15.60702133178711, "learning_rate": 5.181000000000001e-06, "loss": 1.2272, "step": 24110 }, { "epoch": 2.412, "grad_norm": 13.59268856048584, "learning_rate": 5.179000000000001e-06, "loss": 1.3718, "step": 24120 }, { "epoch": 2.413, "grad_norm": 14.630642890930176, "learning_rate": 5.177000000000001e-06, "loss": 1.1899, "step": 24130 }, { "epoch": 2.414, "grad_norm": 8.775884628295898, "learning_rate": 5.1750000000000004e-06, "loss": 0.9302, "step": 24140 }, { "epoch": 2.415, "grad_norm": 8.916512489318848, "learning_rate": 5.173e-06, "loss": 1.0955, "step": 24150 }, { "epoch": 2.416, "grad_norm": 5.961061477661133, "learning_rate": 5.171e-06, "loss": 1.6022, "step": 24160 }, { "epoch": 2.417, "grad_norm": 12.301736831665039, "learning_rate": 5.169000000000001e-06, "loss": 1.3805, "step": 24170 }, { "epoch": 2.418, "grad_norm": 14.681328773498535, "learning_rate": 5.167000000000001e-06, "loss": 0.9842, "step": 24180 }, { "epoch": 2.419, "grad_norm": 16.97111701965332, "learning_rate": 5.165e-06, "loss": 1.3266, "step": 24190 }, { "epoch": 2.42, "grad_norm": 9.471613883972168, "learning_rate": 5.163e-06, "loss": 0.9993, "step": 24200 }, { "epoch": 2.421, "grad_norm": 22.498395919799805, "learning_rate": 5.161e-06, "loss": 1.7415, "step": 24210 }, { "epoch": 2.422, "grad_norm": 15.253742218017578, "learning_rate": 5.159000000000001e-06, "loss": 1.4886, "step": 24220 }, { "epoch": 2.423, "grad_norm": 15.189777374267578, "learning_rate": 5.157000000000001e-06, "loss": 1.1121, "step": 24230 }, { "epoch": 2.424, "grad_norm": 13.42461109161377, "learning_rate": 5.155e-06, "loss": 1.6515, "step": 24240 }, { "epoch": 2.425, "grad_norm": 7.6108503341674805, "learning_rate": 5.1530000000000005e-06, "loss": 1.2482, "step": 24250 }, { "epoch": 2.426, "grad_norm": 12.12304973602295, "learning_rate": 5.151e-06, "loss": 1.393, "step": 24260 }, { "epoch": 2.427, "grad_norm": 5.6547088623046875, "learning_rate": 5.149000000000001e-06, "loss": 1.2924, "step": 24270 }, { "epoch": 2.428, "grad_norm": 9.837066650390625, "learning_rate": 5.147000000000001e-06, "loss": 1.1194, "step": 24280 }, { "epoch": 2.429, "grad_norm": 8.39636516571045, "learning_rate": 5.145e-06, "loss": 1.1021, "step": 24290 }, { "epoch": 2.43, "grad_norm": 13.06580638885498, "learning_rate": 5.1430000000000006e-06, "loss": 1.1997, "step": 24300 }, { "epoch": 2.431, "grad_norm": 10.818870544433594, "learning_rate": 5.141e-06, "loss": 1.2876, "step": 24310 }, { "epoch": 2.432, "grad_norm": 11.100872993469238, "learning_rate": 5.139e-06, "loss": 1.2004, "step": 24320 }, { "epoch": 2.433, "grad_norm": 11.12897777557373, "learning_rate": 5.137000000000001e-06, "loss": 1.1815, "step": 24330 }, { "epoch": 2.434, "grad_norm": 11.122133255004883, "learning_rate": 5.135e-06, "loss": 1.0263, "step": 24340 }, { "epoch": 2.435, "grad_norm": 12.251120567321777, "learning_rate": 5.133e-06, "loss": 1.2534, "step": 24350 }, { "epoch": 2.436, "grad_norm": 9.940631866455078, "learning_rate": 5.1310000000000005e-06, "loss": 1.4686, "step": 24360 }, { "epoch": 2.437, "grad_norm": 9.03281021118164, "learning_rate": 5.129e-06, "loss": 1.1916, "step": 24370 }, { "epoch": 2.438, "grad_norm": 7.199070930480957, "learning_rate": 5.127000000000001e-06, "loss": 1.1182, "step": 24380 }, { "epoch": 2.439, "grad_norm": 19.38608741760254, "learning_rate": 5.125e-06, "loss": 1.4333, "step": 24390 }, { "epoch": 2.44, "grad_norm": 15.471979141235352, "learning_rate": 5.123e-06, "loss": 1.3505, "step": 24400 }, { "epoch": 2.441, "grad_norm": 8.3968505859375, "learning_rate": 5.121000000000001e-06, "loss": 1.2305, "step": 24410 }, { "epoch": 2.442, "grad_norm": 8.903448104858398, "learning_rate": 5.1190000000000005e-06, "loss": 1.5862, "step": 24420 }, { "epoch": 2.443, "grad_norm": 9.338850975036621, "learning_rate": 5.117000000000001e-06, "loss": 1.143, "step": 24430 }, { "epoch": 2.444, "grad_norm": 8.315506935119629, "learning_rate": 5.115e-06, "loss": 1.0614, "step": 24440 }, { "epoch": 2.445, "grad_norm": 9.64361572265625, "learning_rate": 5.113e-06, "loss": 1.009, "step": 24450 }, { "epoch": 2.446, "grad_norm": 15.084073066711426, "learning_rate": 5.111000000000001e-06, "loss": 1.2788, "step": 24460 }, { "epoch": 2.447, "grad_norm": 8.576889038085938, "learning_rate": 5.1090000000000006e-06, "loss": 0.8396, "step": 24470 }, { "epoch": 2.448, "grad_norm": 10.776226997375488, "learning_rate": 5.107000000000001e-06, "loss": 1.2495, "step": 24480 }, { "epoch": 2.449, "grad_norm": 15.37048625946045, "learning_rate": 5.105e-06, "loss": 1.1894, "step": 24490 }, { "epoch": 2.45, "grad_norm": 12.534494400024414, "learning_rate": 5.103e-06, "loss": 0.9862, "step": 24500 }, { "epoch": 2.451, "grad_norm": 17.807559967041016, "learning_rate": 5.101000000000001e-06, "loss": 1.5625, "step": 24510 }, { "epoch": 2.452, "grad_norm": 17.98372459411621, "learning_rate": 5.099000000000001e-06, "loss": 1.4896, "step": 24520 }, { "epoch": 2.453, "grad_norm": 8.372767448425293, "learning_rate": 5.0970000000000005e-06, "loss": 1.3094, "step": 24530 }, { "epoch": 2.454, "grad_norm": 9.983580589294434, "learning_rate": 5.095e-06, "loss": 1.2774, "step": 24540 }, { "epoch": 2.455, "grad_norm": 6.735473155975342, "learning_rate": 5.093e-06, "loss": 1.3247, "step": 24550 }, { "epoch": 2.456, "grad_norm": 7.080766677856445, "learning_rate": 5.091e-06, "loss": 1.0852, "step": 24560 }, { "epoch": 2.457, "grad_norm": 10.762736320495605, "learning_rate": 5.089000000000001e-06, "loss": 1.3521, "step": 24570 }, { "epoch": 2.458, "grad_norm": 7.101816654205322, "learning_rate": 5.087000000000001e-06, "loss": 1.0287, "step": 24580 }, { "epoch": 2.459, "grad_norm": 11.656147003173828, "learning_rate": 5.085e-06, "loss": 1.0968, "step": 24590 }, { "epoch": 2.46, "grad_norm": 7.463164806365967, "learning_rate": 5.083e-06, "loss": 1.171, "step": 24600 }, { "epoch": 2.461, "grad_norm": 6.540295124053955, "learning_rate": 5.081e-06, "loss": 1.3163, "step": 24610 }, { "epoch": 2.462, "grad_norm": 11.730252265930176, "learning_rate": 5.079000000000001e-06, "loss": 1.3014, "step": 24620 }, { "epoch": 2.463, "grad_norm": 11.771647453308105, "learning_rate": 5.077000000000001e-06, "loss": 1.0723, "step": 24630 }, { "epoch": 2.464, "grad_norm": 8.913616180419922, "learning_rate": 5.075e-06, "loss": 1.1711, "step": 24640 }, { "epoch": 2.465, "grad_norm": 6.837693214416504, "learning_rate": 5.073e-06, "loss": 1.0737, "step": 24650 }, { "epoch": 2.466, "grad_norm": 9.476807594299316, "learning_rate": 5.071e-06, "loss": 1.4946, "step": 24660 }, { "epoch": 2.467, "grad_norm": 3.5682055950164795, "learning_rate": 5.069000000000001e-06, "loss": 1.2866, "step": 24670 }, { "epoch": 2.468, "grad_norm": 7.187636852264404, "learning_rate": 5.067000000000001e-06, "loss": 1.1046, "step": 24680 }, { "epoch": 2.469, "grad_norm": 9.186333656311035, "learning_rate": 5.065e-06, "loss": 1.2625, "step": 24690 }, { "epoch": 2.4699999999999998, "grad_norm": 20.119373321533203, "learning_rate": 5.0630000000000005e-06, "loss": 1.1955, "step": 24700 }, { "epoch": 2.471, "grad_norm": 11.67829418182373, "learning_rate": 5.061e-06, "loss": 1.2156, "step": 24710 }, { "epoch": 2.472, "grad_norm": 6.792496681213379, "learning_rate": 5.059e-06, "loss": 1.3602, "step": 24720 }, { "epoch": 2.473, "grad_norm": 11.983834266662598, "learning_rate": 5.057000000000001e-06, "loss": 1.4964, "step": 24730 }, { "epoch": 2.474, "grad_norm": 11.775550842285156, "learning_rate": 5.055e-06, "loss": 1.2672, "step": 24740 }, { "epoch": 2.475, "grad_norm": 14.719745635986328, "learning_rate": 5.053e-06, "loss": 1.3043, "step": 24750 }, { "epoch": 2.476, "grad_norm": 11.976553916931152, "learning_rate": 5.0510000000000004e-06, "loss": 0.9475, "step": 24760 }, { "epoch": 2.477, "grad_norm": 10.892024993896484, "learning_rate": 5.049e-06, "loss": 1.3257, "step": 24770 }, { "epoch": 2.4779999999999998, "grad_norm": 8.432369232177734, "learning_rate": 5.047000000000001e-06, "loss": 1.1718, "step": 24780 }, { "epoch": 2.479, "grad_norm": 8.4509916305542, "learning_rate": 5.045e-06, "loss": 1.1574, "step": 24790 }, { "epoch": 2.48, "grad_norm": 8.522741317749023, "learning_rate": 5.043e-06, "loss": 0.9383, "step": 24800 }, { "epoch": 2.481, "grad_norm": 12.17015266418457, "learning_rate": 5.0410000000000005e-06, "loss": 0.8669, "step": 24810 }, { "epoch": 2.482, "grad_norm": 9.889249801635742, "learning_rate": 5.039e-06, "loss": 1.2073, "step": 24820 }, { "epoch": 2.483, "grad_norm": 17.451194763183594, "learning_rate": 5.037000000000001e-06, "loss": 0.9985, "step": 24830 }, { "epoch": 2.484, "grad_norm": 11.744634628295898, "learning_rate": 5.035e-06, "loss": 1.1974, "step": 24840 }, { "epoch": 2.485, "grad_norm": 15.490137100219727, "learning_rate": 5.033e-06, "loss": 1.4591, "step": 24850 }, { "epoch": 2.4859999999999998, "grad_norm": 14.294090270996094, "learning_rate": 5.031000000000001e-06, "loss": 1.2102, "step": 24860 }, { "epoch": 2.487, "grad_norm": 16.38015365600586, "learning_rate": 5.0290000000000005e-06, "loss": 1.0734, "step": 24870 }, { "epoch": 2.488, "grad_norm": 12.732451438903809, "learning_rate": 5.027000000000001e-06, "loss": 1.1332, "step": 24880 }, { "epoch": 2.489, "grad_norm": 20.034570693969727, "learning_rate": 5.025e-06, "loss": 1.3036, "step": 24890 }, { "epoch": 2.49, "grad_norm": 6.090413570404053, "learning_rate": 5.023e-06, "loss": 1.0751, "step": 24900 }, { "epoch": 2.491, "grad_norm": 8.177401542663574, "learning_rate": 5.021000000000001e-06, "loss": 1.6145, "step": 24910 }, { "epoch": 2.492, "grad_norm": 13.899701118469238, "learning_rate": 5.0190000000000006e-06, "loss": 1.4446, "step": 24920 }, { "epoch": 2.493, "grad_norm": 13.760589599609375, "learning_rate": 5.017e-06, "loss": 1.4236, "step": 24930 }, { "epoch": 2.4939999999999998, "grad_norm": 7.740062713623047, "learning_rate": 5.015e-06, "loss": 1.0533, "step": 24940 }, { "epoch": 2.495, "grad_norm": 5.959550380706787, "learning_rate": 5.013e-06, "loss": 1.3672, "step": 24950 }, { "epoch": 2.496, "grad_norm": 7.89862585067749, "learning_rate": 5.011e-06, "loss": 1.0519, "step": 24960 }, { "epoch": 2.497, "grad_norm": 13.841778755187988, "learning_rate": 5.009000000000001e-06, "loss": 1.1921, "step": 24970 }, { "epoch": 2.498, "grad_norm": 12.801328659057617, "learning_rate": 5.0070000000000005e-06, "loss": 0.87, "step": 24980 }, { "epoch": 2.499, "grad_norm": 24.86789321899414, "learning_rate": 5.0049999999999995e-06, "loss": 1.5257, "step": 24990 }, { "epoch": 2.5, "grad_norm": 7.720008373260498, "learning_rate": 5.003e-06, "loss": 1.3248, "step": 25000 }, { "epoch": 2.501, "grad_norm": 15.345523834228516, "learning_rate": 5.001e-06, "loss": 1.4427, "step": 25010 }, { "epoch": 2.502, "grad_norm": 10.080619812011719, "learning_rate": 4.999000000000001e-06, "loss": 1.3019, "step": 25020 }, { "epoch": 2.503, "grad_norm": 14.245401382446289, "learning_rate": 4.997000000000001e-06, "loss": 1.3882, "step": 25030 }, { "epoch": 2.504, "grad_norm": 14.883753776550293, "learning_rate": 4.9950000000000005e-06, "loss": 1.2462, "step": 25040 }, { "epoch": 2.505, "grad_norm": 8.885193824768066, "learning_rate": 4.993e-06, "loss": 1.3583, "step": 25050 }, { "epoch": 2.5060000000000002, "grad_norm": 9.735286712646484, "learning_rate": 4.991e-06, "loss": 0.9712, "step": 25060 }, { "epoch": 2.507, "grad_norm": 7.8614959716796875, "learning_rate": 4.989000000000001e-06, "loss": 1.0366, "step": 25070 }, { "epoch": 2.508, "grad_norm": 11.43816089630127, "learning_rate": 4.987e-06, "loss": 1.2967, "step": 25080 }, { "epoch": 2.509, "grad_norm": 16.647661209106445, "learning_rate": 4.9850000000000006e-06, "loss": 1.1818, "step": 25090 }, { "epoch": 2.51, "grad_norm": 9.492711067199707, "learning_rate": 4.983e-06, "loss": 1.3037, "step": 25100 }, { "epoch": 2.511, "grad_norm": 7.692164897918701, "learning_rate": 4.981e-06, "loss": 1.2377, "step": 25110 }, { "epoch": 2.512, "grad_norm": 12.17497444152832, "learning_rate": 4.979e-06, "loss": 1.3742, "step": 25120 }, { "epoch": 2.513, "grad_norm": 11.467120170593262, "learning_rate": 4.977e-06, "loss": 1.3241, "step": 25130 }, { "epoch": 2.5140000000000002, "grad_norm": 10.841195106506348, "learning_rate": 4.975000000000001e-06, "loss": 1.5505, "step": 25140 }, { "epoch": 2.515, "grad_norm": 8.942300796508789, "learning_rate": 4.9730000000000005e-06, "loss": 1.2481, "step": 25150 }, { "epoch": 2.516, "grad_norm": 10.203912734985352, "learning_rate": 4.971e-06, "loss": 1.2863, "step": 25160 }, { "epoch": 2.517, "grad_norm": 7.29220724105835, "learning_rate": 4.969e-06, "loss": 1.1825, "step": 25170 }, { "epoch": 2.518, "grad_norm": 6.429839611053467, "learning_rate": 4.967e-06, "loss": 1.0452, "step": 25180 }, { "epoch": 2.519, "grad_norm": 12.162367820739746, "learning_rate": 4.965000000000001e-06, "loss": 1.3665, "step": 25190 }, { "epoch": 2.52, "grad_norm": 5.209897041320801, "learning_rate": 4.963000000000001e-06, "loss": 1.181, "step": 25200 }, { "epoch": 2.521, "grad_norm": 8.346477508544922, "learning_rate": 4.9610000000000004e-06, "loss": 1.2568, "step": 25210 }, { "epoch": 2.5220000000000002, "grad_norm": 10.70533275604248, "learning_rate": 4.959e-06, "loss": 1.154, "step": 25220 }, { "epoch": 2.523, "grad_norm": 4.948952674865723, "learning_rate": 4.957e-06, "loss": 0.8296, "step": 25230 }, { "epoch": 2.524, "grad_norm": 11.4292631149292, "learning_rate": 4.955e-06, "loss": 1.422, "step": 25240 }, { "epoch": 2.525, "grad_norm": 13.320700645446777, "learning_rate": 4.953000000000001e-06, "loss": 1.0497, "step": 25250 }, { "epoch": 2.526, "grad_norm": 13.019256591796875, "learning_rate": 4.9510000000000005e-06, "loss": 1.1571, "step": 25260 }, { "epoch": 2.527, "grad_norm": 15.313298225402832, "learning_rate": 4.949e-06, "loss": 0.9357, "step": 25270 }, { "epoch": 2.528, "grad_norm": 11.603731155395508, "learning_rate": 4.947e-06, "loss": 1.5463, "step": 25280 }, { "epoch": 2.529, "grad_norm": 12.498459815979004, "learning_rate": 4.945e-06, "loss": 1.3405, "step": 25290 }, { "epoch": 2.5300000000000002, "grad_norm": 8.238338470458984, "learning_rate": 4.943000000000001e-06, "loss": 1.3568, "step": 25300 }, { "epoch": 2.531, "grad_norm": 14.322158813476562, "learning_rate": 4.941000000000001e-06, "loss": 1.3303, "step": 25310 }, { "epoch": 2.532, "grad_norm": 12.228023529052734, "learning_rate": 4.9390000000000005e-06, "loss": 1.4199, "step": 25320 }, { "epoch": 2.533, "grad_norm": 10.042830467224121, "learning_rate": 4.937e-06, "loss": 1.3111, "step": 25330 }, { "epoch": 2.534, "grad_norm": 9.31440258026123, "learning_rate": 4.935e-06, "loss": 1.3725, "step": 25340 }, { "epoch": 2.535, "grad_norm": 13.399893760681152, "learning_rate": 4.933000000000001e-06, "loss": 1.1597, "step": 25350 }, { "epoch": 2.536, "grad_norm": 11.934523582458496, "learning_rate": 4.931e-06, "loss": 1.3943, "step": 25360 }, { "epoch": 2.537, "grad_norm": 6.322368621826172, "learning_rate": 4.929000000000001e-06, "loss": 1.2908, "step": 25370 }, { "epoch": 2.5380000000000003, "grad_norm": 16.470020294189453, "learning_rate": 4.9270000000000004e-06, "loss": 1.3329, "step": 25380 }, { "epoch": 2.539, "grad_norm": 4.128391742706299, "learning_rate": 4.925e-06, "loss": 1.0898, "step": 25390 }, { "epoch": 2.54, "grad_norm": 8.682218551635742, "learning_rate": 4.923000000000001e-06, "loss": 1.4055, "step": 25400 }, { "epoch": 2.541, "grad_norm": 5.133202075958252, "learning_rate": 4.921e-06, "loss": 1.2289, "step": 25410 }, { "epoch": 2.542, "grad_norm": 6.550359725952148, "learning_rate": 4.919000000000001e-06, "loss": 1.0912, "step": 25420 }, { "epoch": 2.543, "grad_norm": 13.211549758911133, "learning_rate": 4.9170000000000005e-06, "loss": 1.6193, "step": 25430 }, { "epoch": 2.544, "grad_norm": 9.031594276428223, "learning_rate": 4.915e-06, "loss": 1.4027, "step": 25440 }, { "epoch": 2.545, "grad_norm": 8.984578132629395, "learning_rate": 4.913e-06, "loss": 1.2187, "step": 25450 }, { "epoch": 2.5460000000000003, "grad_norm": 13.293842315673828, "learning_rate": 4.911e-06, "loss": 1.5042, "step": 25460 }, { "epoch": 2.547, "grad_norm": 8.034464836120605, "learning_rate": 4.909000000000001e-06, "loss": 1.0389, "step": 25470 }, { "epoch": 2.548, "grad_norm": 5.235793113708496, "learning_rate": 4.907000000000001e-06, "loss": 0.9791, "step": 25480 }, { "epoch": 2.549, "grad_norm": 6.596670150756836, "learning_rate": 4.9050000000000005e-06, "loss": 1.0969, "step": 25490 }, { "epoch": 2.55, "grad_norm": 5.380162239074707, "learning_rate": 4.903e-06, "loss": 0.9389, "step": 25500 }, { "epoch": 2.551, "grad_norm": 10.392274856567383, "learning_rate": 4.901e-06, "loss": 1.4905, "step": 25510 }, { "epoch": 2.552, "grad_norm": 10.611968994140625, "learning_rate": 4.899e-06, "loss": 1.1442, "step": 25520 }, { "epoch": 2.553, "grad_norm": 13.596555709838867, "learning_rate": 4.897000000000001e-06, "loss": 1.6295, "step": 25530 }, { "epoch": 2.5540000000000003, "grad_norm": 12.75092887878418, "learning_rate": 4.8950000000000006e-06, "loss": 1.0914, "step": 25540 }, { "epoch": 2.555, "grad_norm": 10.796504974365234, "learning_rate": 4.893e-06, "loss": 1.6198, "step": 25550 }, { "epoch": 2.556, "grad_norm": 14.74598503112793, "learning_rate": 4.891e-06, "loss": 1.1922, "step": 25560 }, { "epoch": 2.557, "grad_norm": 7.4982733726501465, "learning_rate": 4.889e-06, "loss": 1.2047, "step": 25570 }, { "epoch": 2.558, "grad_norm": 8.198343276977539, "learning_rate": 4.887000000000001e-06, "loss": 1.3659, "step": 25580 }, { "epoch": 2.559, "grad_norm": 3.060401678085327, "learning_rate": 4.885000000000001e-06, "loss": 1.0155, "step": 25590 }, { "epoch": 2.56, "grad_norm": 6.742977142333984, "learning_rate": 4.8830000000000005e-06, "loss": 1.3511, "step": 25600 }, { "epoch": 2.561, "grad_norm": 19.35199546813965, "learning_rate": 4.881e-06, "loss": 1.1283, "step": 25610 }, { "epoch": 2.5620000000000003, "grad_norm": 7.197390556335449, "learning_rate": 4.879e-06, "loss": 1.1932, "step": 25620 }, { "epoch": 2.5629999999999997, "grad_norm": 19.396137237548828, "learning_rate": 4.877000000000001e-06, "loss": 1.3161, "step": 25630 }, { "epoch": 2.564, "grad_norm": 8.057609558105469, "learning_rate": 4.875e-06, "loss": 1.1845, "step": 25640 }, { "epoch": 2.565, "grad_norm": 10.1448974609375, "learning_rate": 4.873000000000001e-06, "loss": 1.0903, "step": 25650 }, { "epoch": 2.566, "grad_norm": 12.81747817993164, "learning_rate": 4.8710000000000005e-06, "loss": 1.482, "step": 25660 }, { "epoch": 2.567, "grad_norm": 14.84013843536377, "learning_rate": 4.869e-06, "loss": 1.3081, "step": 25670 }, { "epoch": 2.568, "grad_norm": 7.044368743896484, "learning_rate": 4.867000000000001e-06, "loss": 1.1896, "step": 25680 }, { "epoch": 2.569, "grad_norm": 7.157289505004883, "learning_rate": 4.865e-06, "loss": 1.3086, "step": 25690 }, { "epoch": 2.57, "grad_norm": 11.085453987121582, "learning_rate": 4.863000000000001e-06, "loss": 1.3573, "step": 25700 }, { "epoch": 2.5709999999999997, "grad_norm": 11.509424209594727, "learning_rate": 4.8610000000000006e-06, "loss": 1.1029, "step": 25710 }, { "epoch": 2.572, "grad_norm": 5.13484001159668, "learning_rate": 4.859e-06, "loss": 1.0824, "step": 25720 }, { "epoch": 2.573, "grad_norm": 12.935089111328125, "learning_rate": 4.857e-06, "loss": 1.3368, "step": 25730 }, { "epoch": 2.574, "grad_norm": 11.456624031066895, "learning_rate": 4.855e-06, "loss": 1.4018, "step": 25740 }, { "epoch": 2.575, "grad_norm": 10.057692527770996, "learning_rate": 4.853000000000001e-06, "loss": 1.5307, "step": 25750 }, { "epoch": 2.576, "grad_norm": 7.123587131500244, "learning_rate": 4.851e-06, "loss": 1.3565, "step": 25760 }, { "epoch": 2.577, "grad_norm": 11.030284881591797, "learning_rate": 4.8490000000000005e-06, "loss": 1.1617, "step": 25770 }, { "epoch": 2.578, "grad_norm": 5.462807655334473, "learning_rate": 4.847e-06, "loss": 1.0779, "step": 25780 }, { "epoch": 2.5789999999999997, "grad_norm": 7.859730243682861, "learning_rate": 4.845e-06, "loss": 1.3789, "step": 25790 }, { "epoch": 2.58, "grad_norm": 11.208824157714844, "learning_rate": 4.843000000000001e-06, "loss": 1.4797, "step": 25800 }, { "epoch": 2.581, "grad_norm": 10.176225662231445, "learning_rate": 4.841e-06, "loss": 1.2174, "step": 25810 }, { "epoch": 2.582, "grad_norm": 11.604830741882324, "learning_rate": 4.839000000000001e-06, "loss": 1.2563, "step": 25820 }, { "epoch": 2.583, "grad_norm": 6.560490608215332, "learning_rate": 4.8370000000000004e-06, "loss": 1.1948, "step": 25830 }, { "epoch": 2.584, "grad_norm": 8.994233131408691, "learning_rate": 4.835e-06, "loss": 1.0174, "step": 25840 }, { "epoch": 2.585, "grad_norm": 6.133030891418457, "learning_rate": 4.833e-06, "loss": 1.2223, "step": 25850 }, { "epoch": 2.586, "grad_norm": 8.370345115661621, "learning_rate": 4.831e-06, "loss": 1.2866, "step": 25860 }, { "epoch": 2.5869999999999997, "grad_norm": 10.667707443237305, "learning_rate": 4.829000000000001e-06, "loss": 1.3542, "step": 25870 }, { "epoch": 2.588, "grad_norm": 13.363102912902832, "learning_rate": 4.8270000000000005e-06, "loss": 1.5625, "step": 25880 }, { "epoch": 2.589, "grad_norm": 9.463552474975586, "learning_rate": 4.825e-06, "loss": 1.3249, "step": 25890 }, { "epoch": 2.59, "grad_norm": 10.113780975341797, "learning_rate": 4.823e-06, "loss": 1.2496, "step": 25900 }, { "epoch": 2.591, "grad_norm": 3.6320672035217285, "learning_rate": 4.821e-06, "loss": 1.2655, "step": 25910 }, { "epoch": 2.592, "grad_norm": 12.241517066955566, "learning_rate": 4.819e-06, "loss": 1.2073, "step": 25920 }, { "epoch": 2.593, "grad_norm": 9.718478202819824, "learning_rate": 4.817000000000001e-06, "loss": 1.3205, "step": 25930 }, { "epoch": 2.594, "grad_norm": 9.223180770874023, "learning_rate": 4.8150000000000005e-06, "loss": 1.0735, "step": 25940 }, { "epoch": 2.5949999999999998, "grad_norm": 7.450496673583984, "learning_rate": 4.813e-06, "loss": 0.9463, "step": 25950 }, { "epoch": 2.596, "grad_norm": 10.048382759094238, "learning_rate": 4.811000000000001e-06, "loss": 1.0738, "step": 25960 }, { "epoch": 2.597, "grad_norm": 9.352211952209473, "learning_rate": 4.809e-06, "loss": 1.0032, "step": 25970 }, { "epoch": 2.598, "grad_norm": 5.309989929199219, "learning_rate": 4.807000000000001e-06, "loss": 0.9776, "step": 25980 }, { "epoch": 2.599, "grad_norm": 16.22735023498535, "learning_rate": 4.805000000000001e-06, "loss": 0.9952, "step": 25990 }, { "epoch": 2.6, "grad_norm": 26.991445541381836, "learning_rate": 4.8030000000000004e-06, "loss": 1.5169, "step": 26000 }, { "epoch": 2.601, "grad_norm": 6.5229034423828125, "learning_rate": 4.801e-06, "loss": 1.5254, "step": 26010 }, { "epoch": 2.602, "grad_norm": 10.04957103729248, "learning_rate": 4.799e-06, "loss": 1.2772, "step": 26020 }, { "epoch": 2.6029999999999998, "grad_norm": 2.6091344356536865, "learning_rate": 4.797000000000001e-06, "loss": 1.1965, "step": 26030 }, { "epoch": 2.604, "grad_norm": 18.193498611450195, "learning_rate": 4.795e-06, "loss": 1.3206, "step": 26040 }, { "epoch": 2.605, "grad_norm": 7.127922058105469, "learning_rate": 4.7930000000000005e-06, "loss": 1.1143, "step": 26050 }, { "epoch": 2.606, "grad_norm": 5.348570823669434, "learning_rate": 4.791e-06, "loss": 0.866, "step": 26060 }, { "epoch": 2.607, "grad_norm": 12.911650657653809, "learning_rate": 4.789e-06, "loss": 1.079, "step": 26070 }, { "epoch": 2.608, "grad_norm": 8.899086952209473, "learning_rate": 4.787000000000001e-06, "loss": 1.4836, "step": 26080 }, { "epoch": 2.609, "grad_norm": 9.376489639282227, "learning_rate": 4.785e-06, "loss": 1.3028, "step": 26090 }, { "epoch": 2.61, "grad_norm": 8.030030250549316, "learning_rate": 4.783000000000001e-06, "loss": 1.095, "step": 26100 }, { "epoch": 2.6109999999999998, "grad_norm": 23.64223289489746, "learning_rate": 4.7810000000000005e-06, "loss": 1.4464, "step": 26110 }, { "epoch": 2.612, "grad_norm": 18.664932250976562, "learning_rate": 4.779e-06, "loss": 1.5965, "step": 26120 }, { "epoch": 2.613, "grad_norm": 13.562324523925781, "learning_rate": 4.777e-06, "loss": 1.3286, "step": 26130 }, { "epoch": 2.614, "grad_norm": 9.1472749710083, "learning_rate": 4.775e-06, "loss": 1.1693, "step": 26140 }, { "epoch": 2.615, "grad_norm": 11.029375076293945, "learning_rate": 4.773000000000001e-06, "loss": 1.3019, "step": 26150 }, { "epoch": 2.616, "grad_norm": 10.083722114562988, "learning_rate": 4.7710000000000006e-06, "loss": 0.9964, "step": 26160 }, { "epoch": 2.617, "grad_norm": 16.661026000976562, "learning_rate": 4.769e-06, "loss": 1.2263, "step": 26170 }, { "epoch": 2.618, "grad_norm": 9.473404884338379, "learning_rate": 4.767e-06, "loss": 1.2907, "step": 26180 }, { "epoch": 2.6189999999999998, "grad_norm": 14.172446250915527, "learning_rate": 4.765e-06, "loss": 1.0556, "step": 26190 }, { "epoch": 2.62, "grad_norm": 6.923572540283203, "learning_rate": 4.763000000000001e-06, "loss": 1.2667, "step": 26200 }, { "epoch": 2.621, "grad_norm": 27.346187591552734, "learning_rate": 4.761000000000001e-06, "loss": 1.1474, "step": 26210 }, { "epoch": 2.622, "grad_norm": 9.219493865966797, "learning_rate": 4.7590000000000005e-06, "loss": 1.3691, "step": 26220 }, { "epoch": 2.623, "grad_norm": 16.144615173339844, "learning_rate": 4.757e-06, "loss": 0.9929, "step": 26230 }, { "epoch": 2.624, "grad_norm": 11.472280502319336, "learning_rate": 4.755e-06, "loss": 1.2388, "step": 26240 }, { "epoch": 2.625, "grad_norm": 19.197513580322266, "learning_rate": 4.753e-06, "loss": 1.4861, "step": 26250 }, { "epoch": 2.626, "grad_norm": 18.671621322631836, "learning_rate": 4.751000000000001e-06, "loss": 1.2143, "step": 26260 }, { "epoch": 2.627, "grad_norm": 12.738090515136719, "learning_rate": 4.749000000000001e-06, "loss": 1.4203, "step": 26270 }, { "epoch": 2.628, "grad_norm": 10.31644058227539, "learning_rate": 4.7470000000000005e-06, "loss": 1.4357, "step": 26280 }, { "epoch": 2.629, "grad_norm": 13.442634582519531, "learning_rate": 4.745e-06, "loss": 1.2694, "step": 26290 }, { "epoch": 2.63, "grad_norm": 4.789092540740967, "learning_rate": 4.743e-06, "loss": 1.1184, "step": 26300 }, { "epoch": 2.6310000000000002, "grad_norm": 9.319384574890137, "learning_rate": 4.741000000000001e-06, "loss": 1.1107, "step": 26310 }, { "epoch": 2.632, "grad_norm": 10.924176216125488, "learning_rate": 4.739e-06, "loss": 1.1989, "step": 26320 }, { "epoch": 2.633, "grad_norm": 11.36841869354248, "learning_rate": 4.7370000000000006e-06, "loss": 1.2313, "step": 26330 }, { "epoch": 2.634, "grad_norm": 7.689656734466553, "learning_rate": 4.735e-06, "loss": 1.3506, "step": 26340 }, { "epoch": 2.635, "grad_norm": 6.9337615966796875, "learning_rate": 4.733e-06, "loss": 1.2938, "step": 26350 }, { "epoch": 2.636, "grad_norm": 9.652322769165039, "learning_rate": 4.731000000000001e-06, "loss": 1.3206, "step": 26360 }, { "epoch": 2.637, "grad_norm": 9.131610870361328, "learning_rate": 4.729e-06, "loss": 1.3155, "step": 26370 }, { "epoch": 2.638, "grad_norm": 8.75113582611084, "learning_rate": 4.727000000000001e-06, "loss": 1.1633, "step": 26380 }, { "epoch": 2.6390000000000002, "grad_norm": 8.681668281555176, "learning_rate": 4.7250000000000005e-06, "loss": 1.3196, "step": 26390 }, { "epoch": 2.64, "grad_norm": 9.447102546691895, "learning_rate": 4.723e-06, "loss": 1.0491, "step": 26400 }, { "epoch": 2.641, "grad_norm": 10.83787727355957, "learning_rate": 4.721e-06, "loss": 0.9296, "step": 26410 }, { "epoch": 2.642, "grad_norm": 13.144947052001953, "learning_rate": 4.719e-06, "loss": 1.4103, "step": 26420 }, { "epoch": 2.643, "grad_norm": 11.414794921875, "learning_rate": 4.717000000000001e-06, "loss": 1.252, "step": 26430 }, { "epoch": 2.644, "grad_norm": 11.831024169921875, "learning_rate": 4.715e-06, "loss": 1.4681, "step": 26440 }, { "epoch": 2.645, "grad_norm": 9.7584867477417, "learning_rate": 4.7130000000000004e-06, "loss": 1.2719, "step": 26450 }, { "epoch": 2.646, "grad_norm": 11.30492877960205, "learning_rate": 4.711e-06, "loss": 1.6659, "step": 26460 }, { "epoch": 2.6470000000000002, "grad_norm": 10.472000122070312, "learning_rate": 4.709e-06, "loss": 1.1753, "step": 26470 }, { "epoch": 2.648, "grad_norm": 9.543447494506836, "learning_rate": 4.707000000000001e-06, "loss": 1.3994, "step": 26480 }, { "epoch": 2.649, "grad_norm": 12.89002513885498, "learning_rate": 4.705e-06, "loss": 1.3252, "step": 26490 }, { "epoch": 2.65, "grad_norm": 12.548370361328125, "learning_rate": 4.7030000000000005e-06, "loss": 1.5421, "step": 26500 }, { "epoch": 2.651, "grad_norm": 6.604366302490234, "learning_rate": 4.701e-06, "loss": 1.0686, "step": 26510 }, { "epoch": 2.652, "grad_norm": 7.722643852233887, "learning_rate": 4.699e-06, "loss": 0.9194, "step": 26520 }, { "epoch": 2.653, "grad_norm": 7.2298455238342285, "learning_rate": 4.697e-06, "loss": 1.0095, "step": 26530 }, { "epoch": 2.654, "grad_norm": 13.385978698730469, "learning_rate": 4.695e-06, "loss": 1.4402, "step": 26540 }, { "epoch": 2.6550000000000002, "grad_norm": 16.399097442626953, "learning_rate": 4.693000000000001e-06, "loss": 1.2924, "step": 26550 }, { "epoch": 2.656, "grad_norm": 10.525229454040527, "learning_rate": 4.6910000000000005e-06, "loss": 1.2452, "step": 26560 }, { "epoch": 2.657, "grad_norm": 8.278346061706543, "learning_rate": 4.689e-06, "loss": 1.2384, "step": 26570 }, { "epoch": 2.658, "grad_norm": 7.807348251342773, "learning_rate": 4.687e-06, "loss": 1.0564, "step": 26580 }, { "epoch": 2.659, "grad_norm": 10.60027027130127, "learning_rate": 4.685000000000001e-06, "loss": 1.2073, "step": 26590 }, { "epoch": 2.66, "grad_norm": 5.6771240234375, "learning_rate": 4.683000000000001e-06, "loss": 1.0055, "step": 26600 }, { "epoch": 2.661, "grad_norm": 11.969417572021484, "learning_rate": 4.681000000000001e-06, "loss": 1.4215, "step": 26610 }, { "epoch": 2.662, "grad_norm": 12.573792457580566, "learning_rate": 4.6790000000000004e-06, "loss": 1.1644, "step": 26620 }, { "epoch": 2.6630000000000003, "grad_norm": 15.218626022338867, "learning_rate": 4.677e-06, "loss": 1.3725, "step": 26630 }, { "epoch": 2.664, "grad_norm": 9.412232398986816, "learning_rate": 4.675000000000001e-06, "loss": 1.2399, "step": 26640 }, { "epoch": 2.665, "grad_norm": 5.982435703277588, "learning_rate": 4.673e-06, "loss": 1.1142, "step": 26650 }, { "epoch": 2.666, "grad_norm": 11.966681480407715, "learning_rate": 4.671000000000001e-06, "loss": 1.2253, "step": 26660 }, { "epoch": 2.667, "grad_norm": 11.43183422088623, "learning_rate": 4.6690000000000005e-06, "loss": 1.5589, "step": 26670 }, { "epoch": 2.668, "grad_norm": 10.430108070373535, "learning_rate": 4.667e-06, "loss": 1.1282, "step": 26680 }, { "epoch": 2.669, "grad_norm": 7.460301876068115, "learning_rate": 4.665e-06, "loss": 1.2238, "step": 26690 }, { "epoch": 2.67, "grad_norm": 14.373332977294922, "learning_rate": 4.663e-06, "loss": 1.3281, "step": 26700 }, { "epoch": 2.6710000000000003, "grad_norm": 12.55157470703125, "learning_rate": 4.661000000000001e-06, "loss": 1.5208, "step": 26710 }, { "epoch": 2.672, "grad_norm": 7.130515098571777, "learning_rate": 4.659e-06, "loss": 1.1701, "step": 26720 }, { "epoch": 2.673, "grad_norm": 7.142449378967285, "learning_rate": 4.6570000000000005e-06, "loss": 1.1118, "step": 26730 }, { "epoch": 2.674, "grad_norm": 9.152042388916016, "learning_rate": 4.655e-06, "loss": 1.44, "step": 26740 }, { "epoch": 2.675, "grad_norm": 15.04335880279541, "learning_rate": 4.653e-06, "loss": 1.3362, "step": 26750 }, { "epoch": 2.676, "grad_norm": 13.294509887695312, "learning_rate": 4.651000000000001e-06, "loss": 1.2667, "step": 26760 }, { "epoch": 2.677, "grad_norm": 5.947910785675049, "learning_rate": 4.649e-06, "loss": 1.3602, "step": 26770 }, { "epoch": 2.678, "grad_norm": 10.367618560791016, "learning_rate": 4.6470000000000006e-06, "loss": 1.3463, "step": 26780 }, { "epoch": 2.6790000000000003, "grad_norm": 8.124629020690918, "learning_rate": 4.645e-06, "loss": 1.3154, "step": 26790 }, { "epoch": 2.68, "grad_norm": 10.032644271850586, "learning_rate": 4.643e-06, "loss": 1.3324, "step": 26800 }, { "epoch": 2.681, "grad_norm": 11.967508316040039, "learning_rate": 4.641e-06, "loss": 1.0967, "step": 26810 }, { "epoch": 2.682, "grad_norm": 11.274967193603516, "learning_rate": 4.639e-06, "loss": 1.0953, "step": 26820 }, { "epoch": 2.683, "grad_norm": 10.751545906066895, "learning_rate": 4.637000000000001e-06, "loss": 1.1377, "step": 26830 }, { "epoch": 2.684, "grad_norm": 12.683024406433105, "learning_rate": 4.6350000000000005e-06, "loss": 1.4662, "step": 26840 }, { "epoch": 2.685, "grad_norm": 5.929715633392334, "learning_rate": 4.633e-06, "loss": 1.2446, "step": 26850 }, { "epoch": 2.686, "grad_norm": 7.272774696350098, "learning_rate": 4.631e-06, "loss": 1.0924, "step": 26860 }, { "epoch": 2.6870000000000003, "grad_norm": 8.574056625366211, "learning_rate": 4.629e-06, "loss": 1.2384, "step": 26870 }, { "epoch": 2.6879999999999997, "grad_norm": 9.3987398147583, "learning_rate": 4.627000000000001e-06, "loss": 1.0662, "step": 26880 }, { "epoch": 2.689, "grad_norm": 11.280902862548828, "learning_rate": 4.625000000000001e-06, "loss": 1.3434, "step": 26890 }, { "epoch": 2.69, "grad_norm": 9.289864540100098, "learning_rate": 4.6230000000000005e-06, "loss": 1.22, "step": 26900 }, { "epoch": 2.691, "grad_norm": 11.578208923339844, "learning_rate": 4.621e-06, "loss": 1.1021, "step": 26910 }, { "epoch": 2.692, "grad_norm": 14.280811309814453, "learning_rate": 4.619e-06, "loss": 1.2812, "step": 26920 }, { "epoch": 2.693, "grad_norm": 12.243753433227539, "learning_rate": 4.617e-06, "loss": 1.2799, "step": 26930 }, { "epoch": 2.694, "grad_norm": 25.44944953918457, "learning_rate": 4.615000000000001e-06, "loss": 1.3998, "step": 26940 }, { "epoch": 2.695, "grad_norm": 13.645099639892578, "learning_rate": 4.6130000000000006e-06, "loss": 1.0764, "step": 26950 }, { "epoch": 2.6959999999999997, "grad_norm": 16.16865348815918, "learning_rate": 4.611e-06, "loss": 1.1492, "step": 26960 }, { "epoch": 2.697, "grad_norm": 14.500459671020508, "learning_rate": 4.609e-06, "loss": 1.2908, "step": 26970 }, { "epoch": 2.698, "grad_norm": 8.940522193908691, "learning_rate": 4.607e-06, "loss": 1.0041, "step": 26980 }, { "epoch": 2.699, "grad_norm": 8.997739791870117, "learning_rate": 4.605000000000001e-06, "loss": 1.2535, "step": 26990 }, { "epoch": 2.7, "grad_norm": 12.771944046020508, "learning_rate": 4.603000000000001e-06, "loss": 1.2124, "step": 27000 }, { "epoch": 2.701, "grad_norm": 26.312469482421875, "learning_rate": 4.6010000000000005e-06, "loss": 1.3646, "step": 27010 }, { "epoch": 2.702, "grad_norm": 14.173383712768555, "learning_rate": 4.599e-06, "loss": 1.2588, "step": 27020 }, { "epoch": 2.703, "grad_norm": 8.262104988098145, "learning_rate": 4.597e-06, "loss": 1.4555, "step": 27030 }, { "epoch": 2.7039999999999997, "grad_norm": 9.5214204788208, "learning_rate": 4.595000000000001e-06, "loss": 1.1892, "step": 27040 }, { "epoch": 2.705, "grad_norm": 8.4497709274292, "learning_rate": 4.593e-06, "loss": 1.1133, "step": 27050 }, { "epoch": 2.706, "grad_norm": 7.358310222625732, "learning_rate": 4.591000000000001e-06, "loss": 1.1579, "step": 27060 }, { "epoch": 2.707, "grad_norm": 5.321040153503418, "learning_rate": 4.5890000000000004e-06, "loss": 1.0372, "step": 27070 }, { "epoch": 2.708, "grad_norm": 11.205801963806152, "learning_rate": 4.587e-06, "loss": 1.0711, "step": 27080 }, { "epoch": 2.709, "grad_norm": 12.505595207214355, "learning_rate": 4.585e-06, "loss": 1.3812, "step": 27090 }, { "epoch": 2.71, "grad_norm": 12.984840393066406, "learning_rate": 4.583e-06, "loss": 1.0914, "step": 27100 }, { "epoch": 2.711, "grad_norm": 15.475408554077148, "learning_rate": 4.581000000000001e-06, "loss": 1.437, "step": 27110 }, { "epoch": 2.7119999999999997, "grad_norm": 19.339683532714844, "learning_rate": 4.579e-06, "loss": 1.2219, "step": 27120 }, { "epoch": 2.713, "grad_norm": 10.133574485778809, "learning_rate": 4.577e-06, "loss": 1.1068, "step": 27130 }, { "epoch": 2.714, "grad_norm": 5.451807498931885, "learning_rate": 4.575e-06, "loss": 0.8961, "step": 27140 }, { "epoch": 2.715, "grad_norm": 13.879626274108887, "learning_rate": 4.573e-06, "loss": 1.1955, "step": 27150 }, { "epoch": 2.716, "grad_norm": 17.83310890197754, "learning_rate": 4.571000000000001e-06, "loss": 1.4792, "step": 27160 }, { "epoch": 2.717, "grad_norm": 12.177143096923828, "learning_rate": 4.569e-06, "loss": 1.5278, "step": 27170 }, { "epoch": 2.718, "grad_norm": 18.39118003845215, "learning_rate": 4.5670000000000005e-06, "loss": 1.6774, "step": 27180 }, { "epoch": 2.719, "grad_norm": 9.37175464630127, "learning_rate": 4.565e-06, "loss": 1.3039, "step": 27190 }, { "epoch": 2.7199999999999998, "grad_norm": 12.88314437866211, "learning_rate": 4.563e-06, "loss": 1.1662, "step": 27200 }, { "epoch": 2.721, "grad_norm": 5.249074935913086, "learning_rate": 4.561e-06, "loss": 1.5159, "step": 27210 }, { "epoch": 2.722, "grad_norm": 6.209475040435791, "learning_rate": 4.559000000000001e-06, "loss": 1.0245, "step": 27220 }, { "epoch": 2.723, "grad_norm": 10.597536087036133, "learning_rate": 4.557000000000001e-06, "loss": 1.5545, "step": 27230 }, { "epoch": 2.724, "grad_norm": 9.719051361083984, "learning_rate": 4.5552e-06, "loss": 0.9715, "step": 27240 }, { "epoch": 2.725, "grad_norm": 7.649946212768555, "learning_rate": 4.553200000000001e-06, "loss": 1.0094, "step": 27250 }, { "epoch": 2.726, "grad_norm": 36.46924591064453, "learning_rate": 4.5512000000000006e-06, "loss": 1.5695, "step": 27260 }, { "epoch": 2.727, "grad_norm": 10.225464820861816, "learning_rate": 4.5492e-06, "loss": 1.2064, "step": 27270 }, { "epoch": 2.7279999999999998, "grad_norm": 11.404330253601074, "learning_rate": 4.5472e-06, "loss": 1.2016, "step": 27280 }, { "epoch": 2.729, "grad_norm": 13.81934642791748, "learning_rate": 4.5452e-06, "loss": 1.281, "step": 27290 }, { "epoch": 2.73, "grad_norm": 4.1190361976623535, "learning_rate": 4.5432e-06, "loss": 1.1359, "step": 27300 }, { "epoch": 2.731, "grad_norm": 11.372203826904297, "learning_rate": 4.541200000000001e-06, "loss": 1.2631, "step": 27310 }, { "epoch": 2.732, "grad_norm": 5.8521728515625, "learning_rate": 4.5392000000000005e-06, "loss": 1.1554, "step": 27320 }, { "epoch": 2.733, "grad_norm": 8.973188400268555, "learning_rate": 4.5372e-06, "loss": 1.2489, "step": 27330 }, { "epoch": 2.734, "grad_norm": 13.418599128723145, "learning_rate": 4.5352e-06, "loss": 1.2681, "step": 27340 }, { "epoch": 2.735, "grad_norm": 12.3944673538208, "learning_rate": 4.5332e-06, "loss": 1.6205, "step": 27350 }, { "epoch": 2.7359999999999998, "grad_norm": 16.795387268066406, "learning_rate": 4.531200000000001e-06, "loss": 1.2915, "step": 27360 }, { "epoch": 2.737, "grad_norm": 11.032565116882324, "learning_rate": 4.529200000000001e-06, "loss": 1.4337, "step": 27370 }, { "epoch": 2.738, "grad_norm": 6.078767776489258, "learning_rate": 4.5272000000000005e-06, "loss": 1.3812, "step": 27380 }, { "epoch": 2.739, "grad_norm": 7.59435510635376, "learning_rate": 4.5252e-06, "loss": 1.2787, "step": 27390 }, { "epoch": 2.74, "grad_norm": 8.870795249938965, "learning_rate": 4.5232e-06, "loss": 1.2942, "step": 27400 }, { "epoch": 2.741, "grad_norm": 7.041812419891357, "learning_rate": 4.521200000000001e-06, "loss": 1.3039, "step": 27410 }, { "epoch": 2.742, "grad_norm": 7.892660617828369, "learning_rate": 4.5192e-06, "loss": 1.1432, "step": 27420 }, { "epoch": 2.743, "grad_norm": 16.688968658447266, "learning_rate": 4.5172000000000006e-06, "loss": 1.1663, "step": 27430 }, { "epoch": 2.7439999999999998, "grad_norm": 7.6263108253479, "learning_rate": 4.5152e-06, "loss": 1.2131, "step": 27440 }, { "epoch": 2.745, "grad_norm": 11.467942237854004, "learning_rate": 4.5132e-06, "loss": 1.1913, "step": 27450 }, { "epoch": 2.746, "grad_norm": 11.104551315307617, "learning_rate": 4.511200000000001e-06, "loss": 1.2166, "step": 27460 }, { "epoch": 2.747, "grad_norm": 9.436031341552734, "learning_rate": 4.5092e-06, "loss": 1.0558, "step": 27470 }, { "epoch": 2.748, "grad_norm": 6.731126308441162, "learning_rate": 4.507200000000001e-06, "loss": 1.2248, "step": 27480 }, { "epoch": 2.749, "grad_norm": 10.219453811645508, "learning_rate": 4.5052000000000005e-06, "loss": 1.2185, "step": 27490 }, { "epoch": 2.75, "grad_norm": 9.223464012145996, "learning_rate": 4.5032e-06, "loss": 0.9611, "step": 27500 }, { "epoch": 2.751, "grad_norm": 10.569936752319336, "learning_rate": 4.5012e-06, "loss": 1.4572, "step": 27510 }, { "epoch": 2.752, "grad_norm": 4.6022491455078125, "learning_rate": 4.4992e-06, "loss": 1.2579, "step": 27520 }, { "epoch": 2.753, "grad_norm": 9.147900581359863, "learning_rate": 4.497200000000001e-06, "loss": 1.2259, "step": 27530 }, { "epoch": 2.754, "grad_norm": 11.9164457321167, "learning_rate": 4.4952e-06, "loss": 1.1863, "step": 27540 }, { "epoch": 2.755, "grad_norm": 9.15384292602539, "learning_rate": 4.4932000000000005e-06, "loss": 1.233, "step": 27550 }, { "epoch": 2.7560000000000002, "grad_norm": 12.440742492675781, "learning_rate": 4.4912e-06, "loss": 1.3557, "step": 27560 }, { "epoch": 2.757, "grad_norm": 10.166177749633789, "learning_rate": 4.4892e-06, "loss": 1.3221, "step": 27570 }, { "epoch": 2.758, "grad_norm": 11.141758918762207, "learning_rate": 4.487200000000001e-06, "loss": 1.1085, "step": 27580 }, { "epoch": 2.759, "grad_norm": 7.708592414855957, "learning_rate": 4.4852e-06, "loss": 1.1943, "step": 27590 }, { "epoch": 2.76, "grad_norm": 18.95536231994629, "learning_rate": 4.4832000000000005e-06, "loss": 1.5089, "step": 27600 }, { "epoch": 2.761, "grad_norm": 28.337188720703125, "learning_rate": 4.4812e-06, "loss": 1.2865, "step": 27610 }, { "epoch": 2.762, "grad_norm": 15.606249809265137, "learning_rate": 4.4792e-06, "loss": 1.4922, "step": 27620 }, { "epoch": 2.763, "grad_norm": 14.185953140258789, "learning_rate": 4.4772e-06, "loss": 1.3172, "step": 27630 }, { "epoch": 2.7640000000000002, "grad_norm": 14.517483711242676, "learning_rate": 4.4752e-06, "loss": 1.3086, "step": 27640 }, { "epoch": 2.765, "grad_norm": 10.955769538879395, "learning_rate": 4.473200000000001e-06, "loss": 1.0893, "step": 27650 }, { "epoch": 2.766, "grad_norm": 13.020503044128418, "learning_rate": 4.4712000000000005e-06, "loss": 1.2418, "step": 27660 }, { "epoch": 2.767, "grad_norm": 9.835814476013184, "learning_rate": 4.4692e-06, "loss": 1.153, "step": 27670 }, { "epoch": 2.768, "grad_norm": 12.4320068359375, "learning_rate": 4.4672e-06, "loss": 1.0933, "step": 27680 }, { "epoch": 2.769, "grad_norm": 12.99984073638916, "learning_rate": 4.4652e-06, "loss": 1.447, "step": 27690 }, { "epoch": 2.77, "grad_norm": 3.4129481315612793, "learning_rate": 4.4632e-06, "loss": 1.4131, "step": 27700 }, { "epoch": 2.771, "grad_norm": 7.3676981925964355, "learning_rate": 4.461200000000001e-06, "loss": 1.2516, "step": 27710 }, { "epoch": 2.7720000000000002, "grad_norm": 27.299226760864258, "learning_rate": 4.4592000000000004e-06, "loss": 1.0941, "step": 27720 }, { "epoch": 2.773, "grad_norm": 15.56152057647705, "learning_rate": 4.4572e-06, "loss": 1.361, "step": 27730 }, { "epoch": 2.774, "grad_norm": 8.988409996032715, "learning_rate": 4.455200000000001e-06, "loss": 1.2053, "step": 27740 }, { "epoch": 2.775, "grad_norm": 6.986486911773682, "learning_rate": 4.4532e-06, "loss": 1.1982, "step": 27750 }, { "epoch": 2.776, "grad_norm": 9.51746654510498, "learning_rate": 4.451200000000001e-06, "loss": 1.1153, "step": 27760 }, { "epoch": 2.777, "grad_norm": 15.146944999694824, "learning_rate": 4.4492000000000005e-06, "loss": 1.0904, "step": 27770 }, { "epoch": 2.778, "grad_norm": 7.820342063903809, "learning_rate": 4.4472e-06, "loss": 1.3126, "step": 27780 }, { "epoch": 2.779, "grad_norm": 10.594219207763672, "learning_rate": 4.4452e-06, "loss": 1.4638, "step": 27790 }, { "epoch": 2.7800000000000002, "grad_norm": 11.066313743591309, "learning_rate": 4.4432e-06, "loss": 1.3394, "step": 27800 }, { "epoch": 2.781, "grad_norm": 11.49566650390625, "learning_rate": 4.441200000000001e-06, "loss": 1.2803, "step": 27810 }, { "epoch": 2.782, "grad_norm": 14.474987983703613, "learning_rate": 4.4392e-06, "loss": 1.5068, "step": 27820 }, { "epoch": 2.783, "grad_norm": 12.786811828613281, "learning_rate": 4.4372000000000005e-06, "loss": 1.1315, "step": 27830 }, { "epoch": 2.784, "grad_norm": 9.150422096252441, "learning_rate": 4.4352e-06, "loss": 1.275, "step": 27840 }, { "epoch": 2.785, "grad_norm": 13.220783233642578, "learning_rate": 4.4332e-06, "loss": 0.9721, "step": 27850 }, { "epoch": 2.786, "grad_norm": 11.27572250366211, "learning_rate": 4.431200000000001e-06, "loss": 1.3375, "step": 27860 }, { "epoch": 2.787, "grad_norm": 12.031418800354004, "learning_rate": 4.4292e-06, "loss": 1.1549, "step": 27870 }, { "epoch": 2.7880000000000003, "grad_norm": 11.848184585571289, "learning_rate": 4.4272000000000006e-06, "loss": 1.0158, "step": 27880 }, { "epoch": 2.789, "grad_norm": 18.599966049194336, "learning_rate": 4.4252000000000004e-06, "loss": 1.2044, "step": 27890 }, { "epoch": 2.79, "grad_norm": 8.868123054504395, "learning_rate": 4.4232e-06, "loss": 1.2063, "step": 27900 }, { "epoch": 2.791, "grad_norm": 7.2801008224487305, "learning_rate": 4.4212e-06, "loss": 1.0785, "step": 27910 }, { "epoch": 2.792, "grad_norm": 6.970773696899414, "learning_rate": 4.4192e-06, "loss": 1.4054, "step": 27920 }, { "epoch": 2.793, "grad_norm": 5.805993556976318, "learning_rate": 4.417200000000001e-06, "loss": 0.9688, "step": 27930 }, { "epoch": 2.794, "grad_norm": 10.535320281982422, "learning_rate": 4.4152000000000005e-06, "loss": 1.0839, "step": 27940 }, { "epoch": 2.795, "grad_norm": 16.748455047607422, "learning_rate": 4.4132e-06, "loss": 1.0149, "step": 27950 }, { "epoch": 2.7960000000000003, "grad_norm": 17.600894927978516, "learning_rate": 4.4112e-06, "loss": 1.1299, "step": 27960 }, { "epoch": 2.797, "grad_norm": 19.349349975585938, "learning_rate": 4.4092e-06, "loss": 1.532, "step": 27970 }, { "epoch": 2.798, "grad_norm": 14.593655586242676, "learning_rate": 4.407200000000001e-06, "loss": 1.0577, "step": 27980 }, { "epoch": 2.799, "grad_norm": 11.048125267028809, "learning_rate": 4.405200000000001e-06, "loss": 1.2791, "step": 27990 }, { "epoch": 2.8, "grad_norm": 13.676397323608398, "learning_rate": 4.4032000000000005e-06, "loss": 1.4034, "step": 28000 }, { "epoch": 2.801, "grad_norm": 12.322213172912598, "learning_rate": 4.4012e-06, "loss": 1.1741, "step": 28010 }, { "epoch": 2.802, "grad_norm": 13.067313194274902, "learning_rate": 4.3992e-06, "loss": 1.293, "step": 28020 }, { "epoch": 2.803, "grad_norm": 7.767413139343262, "learning_rate": 4.3972e-06, "loss": 0.8584, "step": 28030 }, { "epoch": 2.8040000000000003, "grad_norm": 14.64617919921875, "learning_rate": 4.395200000000001e-06, "loss": 1.2161, "step": 28040 }, { "epoch": 2.805, "grad_norm": 5.752593517303467, "learning_rate": 4.3932000000000006e-06, "loss": 1.0044, "step": 28050 }, { "epoch": 2.806, "grad_norm": 10.72436237335205, "learning_rate": 4.3912e-06, "loss": 1.087, "step": 28060 }, { "epoch": 2.807, "grad_norm": 9.92006778717041, "learning_rate": 4.3892e-06, "loss": 1.2796, "step": 28070 }, { "epoch": 2.808, "grad_norm": 3.7616310119628906, "learning_rate": 4.3872e-06, "loss": 0.8576, "step": 28080 }, { "epoch": 2.809, "grad_norm": 37.09492492675781, "learning_rate": 4.385200000000001e-06, "loss": 1.6485, "step": 28090 }, { "epoch": 2.81, "grad_norm": 15.517118453979492, "learning_rate": 4.3832e-06, "loss": 1.091, "step": 28100 }, { "epoch": 2.811, "grad_norm": 9.6930513381958, "learning_rate": 4.3812000000000005e-06, "loss": 0.9867, "step": 28110 }, { "epoch": 2.8120000000000003, "grad_norm": 8.094300270080566, "learning_rate": 4.3792e-06, "loss": 0.9502, "step": 28120 }, { "epoch": 2.8129999999999997, "grad_norm": 26.39850425720215, "learning_rate": 4.3772e-06, "loss": 1.205, "step": 28130 }, { "epoch": 2.814, "grad_norm": 14.603551864624023, "learning_rate": 4.375200000000001e-06, "loss": 1.4212, "step": 28140 }, { "epoch": 2.815, "grad_norm": 7.616761684417725, "learning_rate": 4.3732e-06, "loss": 0.7262, "step": 28150 }, { "epoch": 2.816, "grad_norm": 27.312965393066406, "learning_rate": 4.371200000000001e-06, "loss": 1.4117, "step": 28160 }, { "epoch": 2.817, "grad_norm": 11.388627052307129, "learning_rate": 4.3692000000000005e-06, "loss": 0.9729, "step": 28170 }, { "epoch": 2.818, "grad_norm": 10.912339210510254, "learning_rate": 4.3672e-06, "loss": 1.2495, "step": 28180 }, { "epoch": 2.819, "grad_norm": 11.842402458190918, "learning_rate": 4.3652e-06, "loss": 1.432, "step": 28190 }, { "epoch": 2.82, "grad_norm": 12.59762191772461, "learning_rate": 4.3632e-06, "loss": 1.5546, "step": 28200 }, { "epoch": 2.8209999999999997, "grad_norm": 8.438789367675781, "learning_rate": 4.361200000000001e-06, "loss": 1.094, "step": 28210 }, { "epoch": 2.822, "grad_norm": 6.766376972198486, "learning_rate": 4.3592e-06, "loss": 0.7606, "step": 28220 }, { "epoch": 2.823, "grad_norm": 13.738387107849121, "learning_rate": 4.3572e-06, "loss": 1.3159, "step": 28230 }, { "epoch": 2.824, "grad_norm": 11.020387649536133, "learning_rate": 4.3552e-06, "loss": 1.0957, "step": 28240 }, { "epoch": 2.825, "grad_norm": 9.831596374511719, "learning_rate": 4.3532e-06, "loss": 0.9881, "step": 28250 }, { "epoch": 2.826, "grad_norm": 11.902912139892578, "learning_rate": 4.351200000000001e-06, "loss": 0.6875, "step": 28260 }, { "epoch": 2.827, "grad_norm": 13.82536506652832, "learning_rate": 4.3492e-06, "loss": 1.4256, "step": 28270 }, { "epoch": 2.828, "grad_norm": 10.56479263305664, "learning_rate": 4.3472000000000005e-06, "loss": 1.1458, "step": 28280 }, { "epoch": 2.8289999999999997, "grad_norm": 7.628909111022949, "learning_rate": 4.3452e-06, "loss": 0.881, "step": 28290 }, { "epoch": 2.83, "grad_norm": 12.140775680541992, "learning_rate": 4.3432e-06, "loss": 1.4978, "step": 28300 }, { "epoch": 2.831, "grad_norm": 7.6505513191223145, "learning_rate": 4.3412e-06, "loss": 1.1876, "step": 28310 }, { "epoch": 2.832, "grad_norm": 12.143505096435547, "learning_rate": 4.3392e-06, "loss": 1.3192, "step": 28320 }, { "epoch": 2.833, "grad_norm": 18.640975952148438, "learning_rate": 4.337200000000001e-06, "loss": 1.2208, "step": 28330 }, { "epoch": 2.834, "grad_norm": 12.209779739379883, "learning_rate": 4.3352000000000004e-06, "loss": 1.0665, "step": 28340 }, { "epoch": 2.835, "grad_norm": 4.95745849609375, "learning_rate": 4.3332e-06, "loss": 1.2124, "step": 28350 }, { "epoch": 2.836, "grad_norm": 5.988442897796631, "learning_rate": 4.3312e-06, "loss": 0.7705, "step": 28360 }, { "epoch": 2.8369999999999997, "grad_norm": 9.552483558654785, "learning_rate": 4.329200000000001e-06, "loss": 1.0105, "step": 28370 }, { "epoch": 2.838, "grad_norm": 19.005388259887695, "learning_rate": 4.327200000000001e-06, "loss": 0.8, "step": 28380 }, { "epoch": 2.839, "grad_norm": 9.338611602783203, "learning_rate": 4.3252000000000005e-06, "loss": 1.4191, "step": 28390 }, { "epoch": 2.84, "grad_norm": 1.0884543657302856, "learning_rate": 4.3232e-06, "loss": 1.2973, "step": 28400 }, { "epoch": 2.841, "grad_norm": 10.295809745788574, "learning_rate": 4.3212e-06, "loss": 1.2408, "step": 28410 }, { "epoch": 2.842, "grad_norm": 10.498838424682617, "learning_rate": 4.319200000000001e-06, "loss": 1.2065, "step": 28420 }, { "epoch": 2.843, "grad_norm": 12.474020004272461, "learning_rate": 4.3172e-06, "loss": 1.2878, "step": 28430 }, { "epoch": 2.844, "grad_norm": 14.29616928100586, "learning_rate": 4.315200000000001e-06, "loss": 1.11, "step": 28440 }, { "epoch": 2.8449999999999998, "grad_norm": 252.46121215820312, "learning_rate": 4.3132000000000005e-06, "loss": 1.1592, "step": 28450 }, { "epoch": 2.846, "grad_norm": 17.339956283569336, "learning_rate": 4.3112e-06, "loss": 1.181, "step": 28460 }, { "epoch": 2.847, "grad_norm": 14.4572114944458, "learning_rate": 4.309200000000001e-06, "loss": 1.0817, "step": 28470 }, { "epoch": 2.848, "grad_norm": 7.2814106941223145, "learning_rate": 4.3072e-06, "loss": 1.6011, "step": 28480 }, { "epoch": 2.849, "grad_norm": 16.073135375976562, "learning_rate": 4.305200000000001e-06, "loss": 0.9683, "step": 28490 }, { "epoch": 2.85, "grad_norm": 10.381219863891602, "learning_rate": 4.3032000000000006e-06, "loss": 1.5079, "step": 28500 }, { "epoch": 2.851, "grad_norm": 12.608407020568848, "learning_rate": 4.3012000000000004e-06, "loss": 1.2247, "step": 28510 }, { "epoch": 2.852, "grad_norm": 16.885282516479492, "learning_rate": 4.2992e-06, "loss": 1.351, "step": 28520 }, { "epoch": 2.8529999999999998, "grad_norm": 18.70602798461914, "learning_rate": 4.2972e-06, "loss": 1.2875, "step": 28530 }, { "epoch": 2.854, "grad_norm": 7.358452320098877, "learning_rate": 4.295200000000001e-06, "loss": 1.3524, "step": 28540 }, { "epoch": 2.855, "grad_norm": 6.4514665603637695, "learning_rate": 4.2932e-06, "loss": 1.0301, "step": 28550 }, { "epoch": 2.856, "grad_norm": 12.2024564743042, "learning_rate": 4.2912000000000005e-06, "loss": 1.249, "step": 28560 }, { "epoch": 2.857, "grad_norm": 14.770137786865234, "learning_rate": 4.2892e-06, "loss": 1.0809, "step": 28570 }, { "epoch": 2.858, "grad_norm": 13.098846435546875, "learning_rate": 4.2872e-06, "loss": 1.1276, "step": 28580 }, { "epoch": 2.859, "grad_norm": 6.500767707824707, "learning_rate": 4.2852e-06, "loss": 1.3661, "step": 28590 }, { "epoch": 2.86, "grad_norm": 9.041402816772461, "learning_rate": 4.2832e-06, "loss": 1.0491, "step": 28600 }, { "epoch": 2.8609999999999998, "grad_norm": 16.984638214111328, "learning_rate": 4.281200000000001e-06, "loss": 1.2452, "step": 28610 }, { "epoch": 2.862, "grad_norm": 12.612082481384277, "learning_rate": 4.2792000000000005e-06, "loss": 1.8306, "step": 28620 }, { "epoch": 2.863, "grad_norm": 11.838570594787598, "learning_rate": 4.2772e-06, "loss": 1.3565, "step": 28630 }, { "epoch": 2.864, "grad_norm": 8.251659393310547, "learning_rate": 4.2752e-06, "loss": 0.9963, "step": 28640 }, { "epoch": 2.865, "grad_norm": 8.283576011657715, "learning_rate": 4.2732e-06, "loss": 1.2303, "step": 28650 }, { "epoch": 2.866, "grad_norm": 6.326951503753662, "learning_rate": 4.271200000000001e-06, "loss": 1.1413, "step": 28660 }, { "epoch": 2.867, "grad_norm": 13.360471725463867, "learning_rate": 4.2692000000000006e-06, "loss": 1.0576, "step": 28670 }, { "epoch": 2.868, "grad_norm": 11.91942024230957, "learning_rate": 4.2672e-06, "loss": 1.2114, "step": 28680 }, { "epoch": 2.8689999999999998, "grad_norm": 6.352290153503418, "learning_rate": 4.2652e-06, "loss": 1.2793, "step": 28690 }, { "epoch": 2.87, "grad_norm": 15.19814682006836, "learning_rate": 4.2632e-06, "loss": 0.9239, "step": 28700 }, { "epoch": 2.871, "grad_norm": 11.402700424194336, "learning_rate": 4.2612e-06, "loss": 1.1869, "step": 28710 }, { "epoch": 2.872, "grad_norm": 11.78938102722168, "learning_rate": 4.259200000000001e-06, "loss": 1.0111, "step": 28720 }, { "epoch": 2.873, "grad_norm": 12.902175903320312, "learning_rate": 4.2572000000000005e-06, "loss": 1.3875, "step": 28730 }, { "epoch": 2.874, "grad_norm": 11.656749725341797, "learning_rate": 4.2552e-06, "loss": 1.3063, "step": 28740 }, { "epoch": 2.875, "grad_norm": 32.06140899658203, "learning_rate": 4.2532e-06, "loss": 1.2446, "step": 28750 }, { "epoch": 2.876, "grad_norm": 12.36316967010498, "learning_rate": 4.2512e-06, "loss": 1.5132, "step": 28760 }, { "epoch": 2.877, "grad_norm": 11.510587692260742, "learning_rate": 4.249200000000001e-06, "loss": 1.3978, "step": 28770 }, { "epoch": 2.878, "grad_norm": 9.1486234664917, "learning_rate": 4.247200000000001e-06, "loss": 1.3712, "step": 28780 }, { "epoch": 2.879, "grad_norm": 5.921765327453613, "learning_rate": 4.2452000000000005e-06, "loss": 1.2636, "step": 28790 }, { "epoch": 2.88, "grad_norm": 11.80471134185791, "learning_rate": 4.2432e-06, "loss": 1.051, "step": 28800 }, { "epoch": 2.8810000000000002, "grad_norm": 14.716862678527832, "learning_rate": 4.2412e-06, "loss": 0.8862, "step": 28810 }, { "epoch": 2.882, "grad_norm": 11.905674934387207, "learning_rate": 4.239200000000001e-06, "loss": 1.3379, "step": 28820 }, { "epoch": 2.883, "grad_norm": 17.299793243408203, "learning_rate": 4.2372e-06, "loss": 1.3575, "step": 28830 }, { "epoch": 2.884, "grad_norm": 15.264360427856445, "learning_rate": 4.2352000000000005e-06, "loss": 1.472, "step": 28840 }, { "epoch": 2.885, "grad_norm": 6.041256427764893, "learning_rate": 4.2332e-06, "loss": 0.8824, "step": 28850 }, { "epoch": 2.886, "grad_norm": 14.772128105163574, "learning_rate": 4.2312e-06, "loss": 1.4356, "step": 28860 }, { "epoch": 2.887, "grad_norm": 22.43916893005371, "learning_rate": 4.229200000000001e-06, "loss": 1.284, "step": 28870 }, { "epoch": 2.888, "grad_norm": 14.912202835083008, "learning_rate": 4.2272e-06, "loss": 1.313, "step": 28880 }, { "epoch": 2.8890000000000002, "grad_norm": 12.070676803588867, "learning_rate": 4.225200000000001e-06, "loss": 1.027, "step": 28890 }, { "epoch": 2.89, "grad_norm": 16.022184371948242, "learning_rate": 4.2232000000000005e-06, "loss": 1.2464, "step": 28900 }, { "epoch": 2.891, "grad_norm": 13.290488243103027, "learning_rate": 4.2212e-06, "loss": 1.2933, "step": 28910 }, { "epoch": 2.892, "grad_norm": 9.641236305236816, "learning_rate": 4.2192e-06, "loss": 1.2236, "step": 28920 }, { "epoch": 2.893, "grad_norm": 14.699100494384766, "learning_rate": 4.2172e-06, "loss": 1.3147, "step": 28930 }, { "epoch": 2.894, "grad_norm": 4.339374542236328, "learning_rate": 4.215200000000001e-06, "loss": 1.1452, "step": 28940 }, { "epoch": 2.895, "grad_norm": 11.521485328674316, "learning_rate": 4.2132e-06, "loss": 1.378, "step": 28950 }, { "epoch": 2.896, "grad_norm": 10.155696868896484, "learning_rate": 4.2112000000000004e-06, "loss": 1.2072, "step": 28960 }, { "epoch": 2.8970000000000002, "grad_norm": 10.700362205505371, "learning_rate": 4.2092e-06, "loss": 1.0271, "step": 28970 }, { "epoch": 2.898, "grad_norm": 11.94865894317627, "learning_rate": 4.2072e-06, "loss": 1.4238, "step": 28980 }, { "epoch": 2.899, "grad_norm": 13.171172142028809, "learning_rate": 4.2052e-06, "loss": 1.1322, "step": 28990 }, { "epoch": 2.9, "grad_norm": 17.820798873901367, "learning_rate": 4.203200000000001e-06, "loss": 0.9846, "step": 29000 }, { "epoch": 2.901, "grad_norm": 5.406218528747559, "learning_rate": 4.2012000000000005e-06, "loss": 1.4581, "step": 29010 }, { "epoch": 2.902, "grad_norm": 14.824155807495117, "learning_rate": 4.1992e-06, "loss": 1.597, "step": 29020 }, { "epoch": 2.903, "grad_norm": 12.917391777038574, "learning_rate": 4.1972e-06, "loss": 1.1654, "step": 29030 }, { "epoch": 2.904, "grad_norm": 15.35782241821289, "learning_rate": 4.1952e-06, "loss": 1.1696, "step": 29040 }, { "epoch": 2.9050000000000002, "grad_norm": 5.833251476287842, "learning_rate": 4.193200000000001e-06, "loss": 1.243, "step": 29050 }, { "epoch": 2.906, "grad_norm": 7.592819690704346, "learning_rate": 4.191200000000001e-06, "loss": 1.5457, "step": 29060 }, { "epoch": 2.907, "grad_norm": 11.38972282409668, "learning_rate": 4.1892000000000005e-06, "loss": 1.4526, "step": 29070 }, { "epoch": 2.908, "grad_norm": 8.685243606567383, "learning_rate": 4.1872e-06, "loss": 1.4054, "step": 29080 }, { "epoch": 2.909, "grad_norm": 7.060324668884277, "learning_rate": 4.1852e-06, "loss": 1.3415, "step": 29090 }, { "epoch": 2.91, "grad_norm": 8.534814834594727, "learning_rate": 4.183200000000001e-06, "loss": 0.9971, "step": 29100 }, { "epoch": 2.911, "grad_norm": 8.408522605895996, "learning_rate": 4.1812e-06, "loss": 1.2417, "step": 29110 }, { "epoch": 2.912, "grad_norm": 9.102620124816895, "learning_rate": 4.179200000000001e-06, "loss": 1.0141, "step": 29120 }, { "epoch": 2.9130000000000003, "grad_norm": 10.356851577758789, "learning_rate": 4.1772000000000004e-06, "loss": 1.0363, "step": 29130 }, { "epoch": 2.914, "grad_norm": 10.104267120361328, "learning_rate": 4.1752e-06, "loss": 1.4358, "step": 29140 }, { "epoch": 2.915, "grad_norm": 14.787339210510254, "learning_rate": 4.173200000000001e-06, "loss": 1.2991, "step": 29150 }, { "epoch": 2.916, "grad_norm": 4.247357368469238, "learning_rate": 4.1712e-06, "loss": 1.2894, "step": 29160 }, { "epoch": 2.917, "grad_norm": 7.715616226196289, "learning_rate": 4.169200000000001e-06, "loss": 1.246, "step": 29170 }, { "epoch": 2.918, "grad_norm": 3.9938862323760986, "learning_rate": 4.1672000000000005e-06, "loss": 1.3862, "step": 29180 }, { "epoch": 2.919, "grad_norm": 8.835389137268066, "learning_rate": 4.1652e-06, "loss": 1.1072, "step": 29190 }, { "epoch": 2.92, "grad_norm": 55.97272872924805, "learning_rate": 4.1632e-06, "loss": 1.1969, "step": 29200 }, { "epoch": 2.9210000000000003, "grad_norm": 9.522720336914062, "learning_rate": 4.1612e-06, "loss": 1.1262, "step": 29210 }, { "epoch": 2.922, "grad_norm": 11.86079216003418, "learning_rate": 4.159200000000001e-06, "loss": 1.2596, "step": 29220 }, { "epoch": 2.923, "grad_norm": 9.110761642456055, "learning_rate": 4.1572e-06, "loss": 1.1716, "step": 29230 }, { "epoch": 2.924, "grad_norm": 8.488015174865723, "learning_rate": 4.1552000000000005e-06, "loss": 1.302, "step": 29240 }, { "epoch": 2.925, "grad_norm": 7.6231584548950195, "learning_rate": 4.1534e-06, "loss": 0.9356, "step": 29250 }, { "epoch": 2.926, "grad_norm": 10.305044174194336, "learning_rate": 4.151400000000001e-06, "loss": 1.4584, "step": 29260 }, { "epoch": 2.927, "grad_norm": 5.680695533752441, "learning_rate": 4.1494000000000005e-06, "loss": 0.9708, "step": 29270 }, { "epoch": 2.928, "grad_norm": 12.216536521911621, "learning_rate": 4.1474e-06, "loss": 1.3233, "step": 29280 }, { "epoch": 2.9290000000000003, "grad_norm": 6.043252468109131, "learning_rate": 4.1454e-06, "loss": 1.361, "step": 29290 }, { "epoch": 2.93, "grad_norm": 16.41413116455078, "learning_rate": 4.1434e-06, "loss": 1.5806, "step": 29300 }, { "epoch": 2.931, "grad_norm": 6.061256408691406, "learning_rate": 4.141400000000001e-06, "loss": 1.0996, "step": 29310 }, { "epoch": 2.932, "grad_norm": 11.931539535522461, "learning_rate": 4.1394e-06, "loss": 1.1617, "step": 29320 }, { "epoch": 2.933, "grad_norm": 9.317411422729492, "learning_rate": 4.1374e-06, "loss": 1.3681, "step": 29330 }, { "epoch": 2.934, "grad_norm": 10.915534973144531, "learning_rate": 4.1354e-06, "loss": 1.2109, "step": 29340 }, { "epoch": 2.935, "grad_norm": 11.37032699584961, "learning_rate": 4.1334e-06, "loss": 1.2453, "step": 29350 }, { "epoch": 2.936, "grad_norm": 7.190423965454102, "learning_rate": 4.131400000000001e-06, "loss": 1.1166, "step": 29360 }, { "epoch": 2.9370000000000003, "grad_norm": 11.334920883178711, "learning_rate": 4.1294e-06, "loss": 1.0684, "step": 29370 }, { "epoch": 2.9379999999999997, "grad_norm": 13.443899154663086, "learning_rate": 4.1274000000000005e-06, "loss": 1.1673, "step": 29380 }, { "epoch": 2.939, "grad_norm": 7.254450798034668, "learning_rate": 4.1254e-06, "loss": 1.4198, "step": 29390 }, { "epoch": 2.94, "grad_norm": 13.048331260681152, "learning_rate": 4.1234e-06, "loss": 1.117, "step": 29400 }, { "epoch": 2.941, "grad_norm": 11.057998657226562, "learning_rate": 4.1214e-06, "loss": 1.2477, "step": 29410 }, { "epoch": 2.942, "grad_norm": 13.331741333007812, "learning_rate": 4.1194e-06, "loss": 0.9843, "step": 29420 }, { "epoch": 2.943, "grad_norm": 10.3034086227417, "learning_rate": 4.117400000000001e-06, "loss": 1.3048, "step": 29430 }, { "epoch": 2.944, "grad_norm": 5.397353172302246, "learning_rate": 4.1154000000000004e-06, "loss": 1.1486, "step": 29440 }, { "epoch": 2.945, "grad_norm": 10.874167442321777, "learning_rate": 4.1134e-06, "loss": 1.095, "step": 29450 }, { "epoch": 2.9459999999999997, "grad_norm": 10.262704849243164, "learning_rate": 4.1114e-06, "loss": 1.4161, "step": 29460 }, { "epoch": 2.947, "grad_norm": 7.092738628387451, "learning_rate": 4.109400000000001e-06, "loss": 1.2468, "step": 29470 }, { "epoch": 2.948, "grad_norm": 18.848278045654297, "learning_rate": 4.107400000000001e-06, "loss": 0.9757, "step": 29480 }, { "epoch": 2.949, "grad_norm": 3.21994948387146, "learning_rate": 4.1054000000000005e-06, "loss": 0.9925, "step": 29490 }, { "epoch": 2.95, "grad_norm": 20.890731811523438, "learning_rate": 4.1034e-06, "loss": 1.6859, "step": 29500 }, { "epoch": 2.951, "grad_norm": 13.16463851928711, "learning_rate": 4.1014e-06, "loss": 1.3108, "step": 29510 }, { "epoch": 2.952, "grad_norm": 5.010506629943848, "learning_rate": 4.099400000000001e-06, "loss": 1.4405, "step": 29520 }, { "epoch": 2.953, "grad_norm": 9.933984756469727, "learning_rate": 4.0974e-06, "loss": 1.4429, "step": 29530 }, { "epoch": 2.9539999999999997, "grad_norm": 7.415938377380371, "learning_rate": 4.095400000000001e-06, "loss": 1.43, "step": 29540 }, { "epoch": 2.955, "grad_norm": 9.318488121032715, "learning_rate": 4.0934000000000005e-06, "loss": 1.3288, "step": 29550 }, { "epoch": 2.956, "grad_norm": 10.898085594177246, "learning_rate": 4.0914e-06, "loss": 1.3513, "step": 29560 }, { "epoch": 2.957, "grad_norm": 4.022782325744629, "learning_rate": 4.0894e-06, "loss": 1.1828, "step": 29570 }, { "epoch": 2.958, "grad_norm": 8.18714714050293, "learning_rate": 4.0874e-06, "loss": 1.2674, "step": 29580 }, { "epoch": 2.959, "grad_norm": 7.173527717590332, "learning_rate": 4.085400000000001e-06, "loss": 1.2362, "step": 29590 }, { "epoch": 2.96, "grad_norm": 6.813322067260742, "learning_rate": 4.0834e-06, "loss": 1.2112, "step": 29600 }, { "epoch": 2.961, "grad_norm": 11.546833038330078, "learning_rate": 4.0814000000000004e-06, "loss": 1.524, "step": 29610 }, { "epoch": 2.9619999999999997, "grad_norm": 9.355443000793457, "learning_rate": 4.0794e-06, "loss": 1.251, "step": 29620 }, { "epoch": 2.963, "grad_norm": 12.773455619812012, "learning_rate": 4.0774e-06, "loss": 1.356, "step": 29630 }, { "epoch": 2.964, "grad_norm": 18.429052352905273, "learning_rate": 4.075400000000001e-06, "loss": 1.2614, "step": 29640 }, { "epoch": 2.965, "grad_norm": 8.4736328125, "learning_rate": 4.0734e-06, "loss": 0.8968, "step": 29650 }, { "epoch": 2.966, "grad_norm": 12.928428649902344, "learning_rate": 4.0714000000000005e-06, "loss": 0.9966, "step": 29660 }, { "epoch": 2.967, "grad_norm": 10.534080505371094, "learning_rate": 4.0694e-06, "loss": 1.1625, "step": 29670 }, { "epoch": 2.968, "grad_norm": 8.722935676574707, "learning_rate": 4.0674e-06, "loss": 1.3251, "step": 29680 }, { "epoch": 2.969, "grad_norm": 16.15669059753418, "learning_rate": 4.0654e-06, "loss": 1.5693, "step": 29690 }, { "epoch": 2.9699999999999998, "grad_norm": 11.93509578704834, "learning_rate": 4.0634e-06, "loss": 0.9767, "step": 29700 }, { "epoch": 2.971, "grad_norm": 7.1775593757629395, "learning_rate": 4.061400000000001e-06, "loss": 0.9992, "step": 29710 }, { "epoch": 2.972, "grad_norm": 9.036670684814453, "learning_rate": 4.0594000000000005e-06, "loss": 1.1933, "step": 29720 }, { "epoch": 2.973, "grad_norm": 10.021101951599121, "learning_rate": 4.0574e-06, "loss": 1.4988, "step": 29730 }, { "epoch": 2.974, "grad_norm": 9.240202903747559, "learning_rate": 4.0554e-06, "loss": 1.1809, "step": 29740 }, { "epoch": 2.975, "grad_norm": 14.692614555358887, "learning_rate": 4.0534e-06, "loss": 1.1742, "step": 29750 }, { "epoch": 2.976, "grad_norm": 14.450674057006836, "learning_rate": 4.051400000000001e-06, "loss": 1.1267, "step": 29760 }, { "epoch": 2.977, "grad_norm": 48.41706466674805, "learning_rate": 4.0494000000000006e-06, "loss": 1.5424, "step": 29770 }, { "epoch": 2.9779999999999998, "grad_norm": 6.186675548553467, "learning_rate": 4.0474e-06, "loss": 1.3876, "step": 29780 }, { "epoch": 2.979, "grad_norm": 14.09583854675293, "learning_rate": 4.0454e-06, "loss": 1.3073, "step": 29790 }, { "epoch": 2.98, "grad_norm": 9.412736892700195, "learning_rate": 4.0434e-06, "loss": 1.2185, "step": 29800 }, { "epoch": 2.981, "grad_norm": 9.077485084533691, "learning_rate": 4.0414e-06, "loss": 1.1111, "step": 29810 }, { "epoch": 2.982, "grad_norm": 10.901795387268066, "learning_rate": 4.039400000000001e-06, "loss": 1.0057, "step": 29820 }, { "epoch": 2.983, "grad_norm": 15.331480026245117, "learning_rate": 4.0374000000000005e-06, "loss": 1.5755, "step": 29830 }, { "epoch": 2.984, "grad_norm": 14.884889602661133, "learning_rate": 4.0354e-06, "loss": 1.3604, "step": 29840 }, { "epoch": 2.985, "grad_norm": 16.840450286865234, "learning_rate": 4.0334e-06, "loss": 1.2024, "step": 29850 }, { "epoch": 2.9859999999999998, "grad_norm": 12.132843971252441, "learning_rate": 4.0314e-06, "loss": 1.2087, "step": 29860 }, { "epoch": 2.987, "grad_norm": 9.586185455322266, "learning_rate": 4.029400000000001e-06, "loss": 1.0832, "step": 29870 }, { "epoch": 2.988, "grad_norm": 10.214162826538086, "learning_rate": 4.027400000000001e-06, "loss": 0.9691, "step": 29880 }, { "epoch": 2.989, "grad_norm": 16.811994552612305, "learning_rate": 4.0254000000000005e-06, "loss": 1.255, "step": 29890 }, { "epoch": 2.99, "grad_norm": 16.852277755737305, "learning_rate": 4.0234e-06, "loss": 1.7808, "step": 29900 }, { "epoch": 2.991, "grad_norm": 12.565603256225586, "learning_rate": 4.0214e-06, "loss": 1.2461, "step": 29910 }, { "epoch": 2.992, "grad_norm": 7.334055423736572, "learning_rate": 4.019400000000001e-06, "loss": 1.1229, "step": 29920 }, { "epoch": 2.993, "grad_norm": 9.414421081542969, "learning_rate": 4.0174e-06, "loss": 1.1751, "step": 29930 }, { "epoch": 2.9939999999999998, "grad_norm": 7.478322982788086, "learning_rate": 4.0154000000000006e-06, "loss": 1.201, "step": 29940 }, { "epoch": 2.995, "grad_norm": 3.580517053604126, "learning_rate": 4.0134e-06, "loss": 0.7685, "step": 29950 }, { "epoch": 2.996, "grad_norm": 13.668438911437988, "learning_rate": 4.0114e-06, "loss": 1.3708, "step": 29960 }, { "epoch": 2.997, "grad_norm": 9.594085693359375, "learning_rate": 4.0094e-06, "loss": 1.3555, "step": 29970 }, { "epoch": 2.998, "grad_norm": 11.513078689575195, "learning_rate": 4.0074e-06, "loss": 1.1291, "step": 29980 }, { "epoch": 2.999, "grad_norm": 11.818098068237305, "learning_rate": 4.005400000000001e-06, "loss": 1.1003, "step": 29990 }, { "epoch": 3.0, "grad_norm": 12.673623085021973, "learning_rate": 4.0034e-06, "loss": 1.176, "step": 30000 }, { "epoch": 3.001, "grad_norm": 9.77951431274414, "learning_rate": 4.0014e-06, "loss": 1.1537, "step": 30010 }, { "epoch": 3.002, "grad_norm": 13.90412425994873, "learning_rate": 3.9994e-06, "loss": 1.2987, "step": 30020 }, { "epoch": 3.003, "grad_norm": 14.884184837341309, "learning_rate": 3.9974e-06, "loss": 1.4274, "step": 30030 }, { "epoch": 3.004, "grad_norm": 8.348045349121094, "learning_rate": 3.995400000000001e-06, "loss": 0.9202, "step": 30040 }, { "epoch": 3.005, "grad_norm": 12.095112800598145, "learning_rate": 3.9934e-06, "loss": 1.1622, "step": 30050 }, { "epoch": 3.006, "grad_norm": 8.268014907836914, "learning_rate": 3.9914000000000004e-06, "loss": 1.2363, "step": 30060 }, { "epoch": 3.007, "grad_norm": 9.173174858093262, "learning_rate": 3.9894e-06, "loss": 1.8115, "step": 30070 }, { "epoch": 3.008, "grad_norm": 9.089729309082031, "learning_rate": 3.9874e-06, "loss": 1.0293, "step": 30080 }, { "epoch": 3.009, "grad_norm": 4.1526198387146, "learning_rate": 3.9854e-06, "loss": 1.4061, "step": 30090 }, { "epoch": 3.01, "grad_norm": 11.33251953125, "learning_rate": 3.983400000000001e-06, "loss": 1.5591, "step": 30100 }, { "epoch": 3.011, "grad_norm": 13.809500694274902, "learning_rate": 3.9814000000000005e-06, "loss": 1.2539, "step": 30110 }, { "epoch": 3.012, "grad_norm": 11.066414833068848, "learning_rate": 3.9794e-06, "loss": 1.3922, "step": 30120 }, { "epoch": 3.013, "grad_norm": 12.535942077636719, "learning_rate": 3.9774e-06, "loss": 1.3979, "step": 30130 }, { "epoch": 3.014, "grad_norm": 7.725063323974609, "learning_rate": 3.9754e-06, "loss": 1.0484, "step": 30140 }, { "epoch": 3.015, "grad_norm": 13.673731803894043, "learning_rate": 3.973400000000001e-06, "loss": 1.2839, "step": 30150 }, { "epoch": 3.016, "grad_norm": 10.987448692321777, "learning_rate": 3.971400000000001e-06, "loss": 0.9961, "step": 30160 }, { "epoch": 3.017, "grad_norm": 7.807530879974365, "learning_rate": 3.9694000000000005e-06, "loss": 1.051, "step": 30170 }, { "epoch": 3.018, "grad_norm": 10.573387145996094, "learning_rate": 3.9674e-06, "loss": 1.0155, "step": 30180 }, { "epoch": 3.019, "grad_norm": 15.26197338104248, "learning_rate": 3.9654e-06, "loss": 1.4705, "step": 30190 }, { "epoch": 3.02, "grad_norm": 8.380249977111816, "learning_rate": 3.963400000000001e-06, "loss": 0.7125, "step": 30200 }, { "epoch": 3.021, "grad_norm": 10.798430442810059, "learning_rate": 3.9614e-06, "loss": 1.1932, "step": 30210 }, { "epoch": 3.022, "grad_norm": 13.412765502929688, "learning_rate": 3.959400000000001e-06, "loss": 1.1382, "step": 30220 }, { "epoch": 3.023, "grad_norm": 20.461549758911133, "learning_rate": 3.9574000000000004e-06, "loss": 1.5721, "step": 30230 }, { "epoch": 3.024, "grad_norm": 6.921362400054932, "learning_rate": 3.9554e-06, "loss": 0.8691, "step": 30240 }, { "epoch": 3.025, "grad_norm": 6.150951385498047, "learning_rate": 3.953400000000001e-06, "loss": 1.3735, "step": 30250 }, { "epoch": 3.026, "grad_norm": 16.251026153564453, "learning_rate": 3.9514e-06, "loss": 1.287, "step": 30260 }, { "epoch": 3.027, "grad_norm": 8.724386215209961, "learning_rate": 3.949400000000001e-06, "loss": 1.3053, "step": 30270 }, { "epoch": 3.028, "grad_norm": 9.515460968017578, "learning_rate": 3.9474000000000005e-06, "loss": 1.4329, "step": 30280 }, { "epoch": 3.029, "grad_norm": 6.466248989105225, "learning_rate": 3.9454e-06, "loss": 1.2534, "step": 30290 }, { "epoch": 3.03, "grad_norm": 8.086331367492676, "learning_rate": 3.9434e-06, "loss": 1.1782, "step": 30300 }, { "epoch": 3.031, "grad_norm": 13.908480644226074, "learning_rate": 3.9414e-06, "loss": 1.3191, "step": 30310 }, { "epoch": 3.032, "grad_norm": 14.361651420593262, "learning_rate": 3.939400000000001e-06, "loss": 1.3844, "step": 30320 }, { "epoch": 3.033, "grad_norm": 14.469130516052246, "learning_rate": 3.9374e-06, "loss": 1.237, "step": 30330 }, { "epoch": 3.034, "grad_norm": 14.636515617370605, "learning_rate": 3.9354000000000005e-06, "loss": 1.345, "step": 30340 }, { "epoch": 3.035, "grad_norm": 7.846157073974609, "learning_rate": 3.9334e-06, "loss": 0.9253, "step": 30350 }, { "epoch": 3.036, "grad_norm": 6.3321661949157715, "learning_rate": 3.9314e-06, "loss": 0.9427, "step": 30360 }, { "epoch": 3.037, "grad_norm": 18.89450454711914, "learning_rate": 3.9294e-06, "loss": 1.5751, "step": 30370 }, { "epoch": 3.038, "grad_norm": 13.08353042602539, "learning_rate": 3.9274e-06, "loss": 1.5719, "step": 30380 }, { "epoch": 3.039, "grad_norm": 10.827966690063477, "learning_rate": 3.9254000000000006e-06, "loss": 1.236, "step": 30390 }, { "epoch": 3.04, "grad_norm": 11.178470611572266, "learning_rate": 3.9234e-06, "loss": 1.2985, "step": 30400 }, { "epoch": 3.041, "grad_norm": 9.897839546203613, "learning_rate": 3.9214e-06, "loss": 1.3769, "step": 30410 }, { "epoch": 3.042, "grad_norm": 8.8223237991333, "learning_rate": 3.9194e-06, "loss": 1.2525, "step": 30420 }, { "epoch": 3.043, "grad_norm": 13.288087844848633, "learning_rate": 3.9174e-06, "loss": 1.1957, "step": 30430 }, { "epoch": 3.044, "grad_norm": 5.486622333526611, "learning_rate": 3.915400000000001e-06, "loss": 0.9201, "step": 30440 }, { "epoch": 3.045, "grad_norm": 8.763372421264648, "learning_rate": 3.9134000000000005e-06, "loss": 1.1942, "step": 30450 }, { "epoch": 3.046, "grad_norm": 6.571597099304199, "learning_rate": 3.9114e-06, "loss": 1.011, "step": 30460 }, { "epoch": 3.047, "grad_norm": 8.210810661315918, "learning_rate": 3.9094e-06, "loss": 1.276, "step": 30470 }, { "epoch": 3.048, "grad_norm": 11.455232620239258, "learning_rate": 3.9074e-06, "loss": 1.2951, "step": 30480 }, { "epoch": 3.049, "grad_norm": 5.689100742340088, "learning_rate": 3.9054e-06, "loss": 1.116, "step": 30490 }, { "epoch": 3.05, "grad_norm": 7.130867004394531, "learning_rate": 3.903400000000001e-06, "loss": 1.0141, "step": 30500 }, { "epoch": 3.051, "grad_norm": 12.664045333862305, "learning_rate": 3.9014000000000005e-06, "loss": 1.2787, "step": 30510 }, { "epoch": 3.052, "grad_norm": 11.18001651763916, "learning_rate": 3.8994e-06, "loss": 1.4431, "step": 30520 }, { "epoch": 3.053, "grad_norm": 12.193371772766113, "learning_rate": 3.8974e-06, "loss": 1.2434, "step": 30530 }, { "epoch": 3.054, "grad_norm": 12.371489524841309, "learning_rate": 3.8954e-06, "loss": 1.2919, "step": 30540 }, { "epoch": 3.055, "grad_norm": 13.034032821655273, "learning_rate": 3.893400000000001e-06, "loss": 1.2323, "step": 30550 }, { "epoch": 3.056, "grad_norm": 11.013480186462402, "learning_rate": 3.8914000000000006e-06, "loss": 1.2132, "step": 30560 }, { "epoch": 3.057, "grad_norm": 8.45055866241455, "learning_rate": 3.8894e-06, "loss": 1.1617, "step": 30570 }, { "epoch": 3.058, "grad_norm": 15.520748138427734, "learning_rate": 3.8874e-06, "loss": 1.4879, "step": 30580 }, { "epoch": 3.059, "grad_norm": 12.595949172973633, "learning_rate": 3.8854e-06, "loss": 1.017, "step": 30590 }, { "epoch": 3.06, "grad_norm": 11.932159423828125, "learning_rate": 3.883400000000001e-06, "loss": 1.3441, "step": 30600 }, { "epoch": 3.061, "grad_norm": 9.354244232177734, "learning_rate": 3.8814e-06, "loss": 1.2884, "step": 30610 }, { "epoch": 3.062, "grad_norm": 14.507532119750977, "learning_rate": 3.8794000000000005e-06, "loss": 1.209, "step": 30620 }, { "epoch": 3.063, "grad_norm": 9.191494941711426, "learning_rate": 3.8774e-06, "loss": 1.5143, "step": 30630 }, { "epoch": 3.064, "grad_norm": 12.428238868713379, "learning_rate": 3.8754e-06, "loss": 1.1407, "step": 30640 }, { "epoch": 3.065, "grad_norm": 20.833141326904297, "learning_rate": 3.873400000000001e-06, "loss": 1.4652, "step": 30650 }, { "epoch": 3.066, "grad_norm": 13.220902442932129, "learning_rate": 3.8714e-06, "loss": 0.9282, "step": 30660 }, { "epoch": 3.067, "grad_norm": 10.596773147583008, "learning_rate": 3.869400000000001e-06, "loss": 1.3288, "step": 30670 }, { "epoch": 3.068, "grad_norm": 10.420060157775879, "learning_rate": 3.8674000000000004e-06, "loss": 1.1681, "step": 30680 }, { "epoch": 3.069, "grad_norm": 10.388282775878906, "learning_rate": 3.8654e-06, "loss": 1.1564, "step": 30690 }, { "epoch": 3.07, "grad_norm": 13.150054931640625, "learning_rate": 3.8634e-06, "loss": 1.3997, "step": 30700 }, { "epoch": 3.071, "grad_norm": 7.959721565246582, "learning_rate": 3.8614e-06, "loss": 1.0921, "step": 30710 }, { "epoch": 3.072, "grad_norm": 5.3867316246032715, "learning_rate": 3.859400000000001e-06, "loss": 1.5075, "step": 30720 }, { "epoch": 3.073, "grad_norm": 8.060280799865723, "learning_rate": 3.8574000000000005e-06, "loss": 1.3415, "step": 30730 }, { "epoch": 3.074, "grad_norm": 16.95601463317871, "learning_rate": 3.8554e-06, "loss": 1.5321, "step": 30740 }, { "epoch": 3.075, "grad_norm": 4.19404935836792, "learning_rate": 3.8534e-06, "loss": 1.3225, "step": 30750 }, { "epoch": 3.076, "grad_norm": 9.101255416870117, "learning_rate": 3.8514e-06, "loss": 1.237, "step": 30760 }, { "epoch": 3.077, "grad_norm": 8.753789901733398, "learning_rate": 3.8494e-06, "loss": 1.0992, "step": 30770 }, { "epoch": 3.078, "grad_norm": 8.989324569702148, "learning_rate": 3.847400000000001e-06, "loss": 1.0721, "step": 30780 }, { "epoch": 3.079, "grad_norm": 14.807908058166504, "learning_rate": 3.8454000000000005e-06, "loss": 1.2088, "step": 30790 }, { "epoch": 3.08, "grad_norm": 10.968939781188965, "learning_rate": 3.8434e-06, "loss": 1.3712, "step": 30800 }, { "epoch": 3.081, "grad_norm": 10.196441650390625, "learning_rate": 3.8414e-06, "loss": 1.4212, "step": 30810 }, { "epoch": 3.082, "grad_norm": 12.007573127746582, "learning_rate": 3.8394e-06, "loss": 1.117, "step": 30820 }, { "epoch": 3.083, "grad_norm": 7.497933864593506, "learning_rate": 3.837400000000001e-06, "loss": 1.0977, "step": 30830 }, { "epoch": 3.084, "grad_norm": 13.339070320129395, "learning_rate": 3.835400000000001e-06, "loss": 1.0178, "step": 30840 }, { "epoch": 3.085, "grad_norm": 7.9426398277282715, "learning_rate": 3.8334000000000004e-06, "loss": 1.1916, "step": 30850 }, { "epoch": 3.086, "grad_norm": 6.3370585441589355, "learning_rate": 3.8314e-06, "loss": 1.0496, "step": 30860 }, { "epoch": 3.087, "grad_norm": 7.302180767059326, "learning_rate": 3.8294e-06, "loss": 1.2404, "step": 30870 }, { "epoch": 3.088, "grad_norm": 13.352715492248535, "learning_rate": 3.827400000000001e-06, "loss": 1.5919, "step": 30880 }, { "epoch": 3.089, "grad_norm": 9.597620010375977, "learning_rate": 3.8254e-06, "loss": 1.4385, "step": 30890 }, { "epoch": 3.09, "grad_norm": 9.860209465026855, "learning_rate": 3.8234000000000005e-06, "loss": 1.3926, "step": 30900 }, { "epoch": 3.091, "grad_norm": 10.86876392364502, "learning_rate": 3.8214e-06, "loss": 1.243, "step": 30910 }, { "epoch": 3.092, "grad_norm": 6.206463813781738, "learning_rate": 3.8194e-06, "loss": 1.3418, "step": 30920 }, { "epoch": 3.093, "grad_norm": 8.315264701843262, "learning_rate": 3.817400000000001e-06, "loss": 1.1857, "step": 30930 }, { "epoch": 3.094, "grad_norm": 5.195272445678711, "learning_rate": 3.8154e-06, "loss": 1.4426, "step": 30940 }, { "epoch": 3.095, "grad_norm": 12.07642936706543, "learning_rate": 3.8134000000000006e-06, "loss": 1.3925, "step": 30950 }, { "epoch": 3.096, "grad_norm": 13.630117416381836, "learning_rate": 3.8114e-06, "loss": 1.6373, "step": 30960 }, { "epoch": 3.097, "grad_norm": 8.460655212402344, "learning_rate": 3.8094000000000003e-06, "loss": 1.1984, "step": 30970 }, { "epoch": 3.098, "grad_norm": 7.277179718017578, "learning_rate": 3.8074000000000006e-06, "loss": 1.0849, "step": 30980 }, { "epoch": 3.099, "grad_norm": 13.961984634399414, "learning_rate": 3.8054e-06, "loss": 1.4661, "step": 30990 }, { "epoch": 3.1, "grad_norm": 10.759557723999023, "learning_rate": 3.8034000000000003e-06, "loss": 1.2151, "step": 31000 }, { "epoch": 3.101, "grad_norm": 6.320084571838379, "learning_rate": 3.8014e-06, "loss": 1.259, "step": 31010 }, { "epoch": 3.102, "grad_norm": 11.347484588623047, "learning_rate": 3.7994000000000004e-06, "loss": 1.0858, "step": 31020 }, { "epoch": 3.103, "grad_norm": 11.738511085510254, "learning_rate": 3.7974000000000007e-06, "loss": 1.4036, "step": 31030 }, { "epoch": 3.104, "grad_norm": 7.876824855804443, "learning_rate": 3.7954e-06, "loss": 1.2425, "step": 31040 }, { "epoch": 3.105, "grad_norm": 6.670855522155762, "learning_rate": 3.7934000000000004e-06, "loss": 0.9751, "step": 31050 }, { "epoch": 3.106, "grad_norm": 6.283182621002197, "learning_rate": 3.7914000000000002e-06, "loss": 1.1553, "step": 31060 }, { "epoch": 3.107, "grad_norm": 10.735183715820312, "learning_rate": 3.7894e-06, "loss": 0.9739, "step": 31070 }, { "epoch": 3.108, "grad_norm": 13.306958198547363, "learning_rate": 3.7874000000000004e-06, "loss": 1.1078, "step": 31080 }, { "epoch": 3.109, "grad_norm": 10.373847007751465, "learning_rate": 3.7854000000000002e-06, "loss": 0.9577, "step": 31090 }, { "epoch": 3.11, "grad_norm": 21.815853118896484, "learning_rate": 3.7834000000000005e-06, "loss": 1.4627, "step": 31100 }, { "epoch": 3.111, "grad_norm": 3.9659087657928467, "learning_rate": 3.7814e-06, "loss": 0.8445, "step": 31110 }, { "epoch": 3.112, "grad_norm": 8.00013256072998, "learning_rate": 3.7794e-06, "loss": 1.1917, "step": 31120 }, { "epoch": 3.113, "grad_norm": 6.79742956161499, "learning_rate": 3.7774000000000005e-06, "loss": 1.0954, "step": 31130 }, { "epoch": 3.114, "grad_norm": 10.747549057006836, "learning_rate": 3.7754000000000003e-06, "loss": 1.311, "step": 31140 }, { "epoch": 3.115, "grad_norm": 12.122516632080078, "learning_rate": 3.7734000000000006e-06, "loss": 1.2866, "step": 31150 }, { "epoch": 3.116, "grad_norm": 20.06511878967285, "learning_rate": 3.7714e-06, "loss": 1.4261, "step": 31160 }, { "epoch": 3.117, "grad_norm": 13.303239822387695, "learning_rate": 3.7694000000000003e-06, "loss": 1.2013, "step": 31170 }, { "epoch": 3.118, "grad_norm": 8.388273239135742, "learning_rate": 3.7674000000000006e-06, "loss": 1.4465, "step": 31180 }, { "epoch": 3.1189999999999998, "grad_norm": 9.81425666809082, "learning_rate": 3.7654e-06, "loss": 1.2172, "step": 31190 }, { "epoch": 3.12, "grad_norm": 9.817520141601562, "learning_rate": 3.7634000000000003e-06, "loss": 1.1778, "step": 31200 }, { "epoch": 3.121, "grad_norm": 12.540703773498535, "learning_rate": 3.7614e-06, "loss": 1.0971, "step": 31210 }, { "epoch": 3.122, "grad_norm": 11.660219192504883, "learning_rate": 3.7594000000000004e-06, "loss": 1.1201, "step": 31220 }, { "epoch": 3.123, "grad_norm": 11.408303260803223, "learning_rate": 3.7574000000000007e-06, "loss": 1.4855, "step": 31230 }, { "epoch": 3.124, "grad_norm": 11.879040718078613, "learning_rate": 3.7554e-06, "loss": 1.2832, "step": 31240 }, { "epoch": 3.125, "grad_norm": 9.900970458984375, "learning_rate": 3.7536000000000004e-06, "loss": 1.127, "step": 31250 }, { "epoch": 3.126, "grad_norm": 13.693802833557129, "learning_rate": 3.7516000000000002e-06, "loss": 1.2949, "step": 31260 }, { "epoch": 3.127, "grad_norm": 9.12295913696289, "learning_rate": 3.7496000000000005e-06, "loss": 1.1653, "step": 31270 }, { "epoch": 3.128, "grad_norm": 5.375489711761475, "learning_rate": 3.7476000000000003e-06, "loss": 0.8878, "step": 31280 }, { "epoch": 3.129, "grad_norm": 12.173545837402344, "learning_rate": 3.7456e-06, "loss": 1.3439, "step": 31290 }, { "epoch": 3.13, "grad_norm": 7.345860958099365, "learning_rate": 3.7436000000000005e-06, "loss": 0.9743, "step": 31300 }, { "epoch": 3.1310000000000002, "grad_norm": 11.889986991882324, "learning_rate": 3.7416000000000003e-06, "loss": 1.1705, "step": 31310 }, { "epoch": 3.132, "grad_norm": 13.144277572631836, "learning_rate": 3.7396000000000006e-06, "loss": 1.0151, "step": 31320 }, { "epoch": 3.133, "grad_norm": 15.195657730102539, "learning_rate": 3.7376e-06, "loss": 1.1239, "step": 31330 }, { "epoch": 3.134, "grad_norm": 7.408246040344238, "learning_rate": 3.7356000000000003e-06, "loss": 1.0706, "step": 31340 }, { "epoch": 3.135, "grad_norm": 7.377401828765869, "learning_rate": 3.7336000000000006e-06, "loss": 1.4813, "step": 31350 }, { "epoch": 3.136, "grad_norm": 8.999589920043945, "learning_rate": 3.7316000000000004e-06, "loss": 1.1421, "step": 31360 }, { "epoch": 3.137, "grad_norm": 13.325326919555664, "learning_rate": 3.7296000000000003e-06, "loss": 1.2116, "step": 31370 }, { "epoch": 3.138, "grad_norm": 12.753479957580566, "learning_rate": 3.7276e-06, "loss": 1.2395, "step": 31380 }, { "epoch": 3.1390000000000002, "grad_norm": 12.680985450744629, "learning_rate": 3.7256000000000004e-06, "loss": 1.1814, "step": 31390 }, { "epoch": 3.14, "grad_norm": 11.808354377746582, "learning_rate": 3.7236000000000007e-06, "loss": 1.3253, "step": 31400 }, { "epoch": 3.141, "grad_norm": 17.803756713867188, "learning_rate": 3.7216e-06, "loss": 1.144, "step": 31410 }, { "epoch": 3.142, "grad_norm": 24.019948959350586, "learning_rate": 3.7196000000000004e-06, "loss": 1.334, "step": 31420 }, { "epoch": 3.143, "grad_norm": 9.516412734985352, "learning_rate": 3.7176e-06, "loss": 1.0559, "step": 31430 }, { "epoch": 3.144, "grad_norm": 18.44563865661621, "learning_rate": 3.7156000000000005e-06, "loss": 1.2091, "step": 31440 }, { "epoch": 3.145, "grad_norm": 10.469414710998535, "learning_rate": 3.7136000000000007e-06, "loss": 1.3252, "step": 31450 }, { "epoch": 3.146, "grad_norm": 14.363265037536621, "learning_rate": 3.7116e-06, "loss": 1.1258, "step": 31460 }, { "epoch": 3.147, "grad_norm": 15.487251281738281, "learning_rate": 3.7096000000000004e-06, "loss": 1.2603, "step": 31470 }, { "epoch": 3.148, "grad_norm": 12.375779151916504, "learning_rate": 3.7076000000000003e-06, "loss": 1.1469, "step": 31480 }, { "epoch": 3.149, "grad_norm": 11.901060104370117, "learning_rate": 3.7056e-06, "loss": 1.0338, "step": 31490 }, { "epoch": 3.15, "grad_norm": 11.400994300842285, "learning_rate": 3.7036000000000004e-06, "loss": 1.1734, "step": 31500 }, { "epoch": 3.151, "grad_norm": 12.097701072692871, "learning_rate": 3.7016000000000003e-06, "loss": 1.0351, "step": 31510 }, { "epoch": 3.152, "grad_norm": 13.098098754882812, "learning_rate": 3.6996000000000005e-06, "loss": 1.4554, "step": 31520 }, { "epoch": 3.153, "grad_norm": 15.969141960144043, "learning_rate": 3.6976e-06, "loss": 1.1541, "step": 31530 }, { "epoch": 3.154, "grad_norm": 14.381691932678223, "learning_rate": 3.6956000000000002e-06, "loss": 1.2968, "step": 31540 }, { "epoch": 3.155, "grad_norm": 11.013089179992676, "learning_rate": 3.6936000000000005e-06, "loss": 1.2779, "step": 31550 }, { "epoch": 3.156, "grad_norm": 0.5838720798492432, "learning_rate": 3.6916000000000004e-06, "loss": 1.2053, "step": 31560 }, { "epoch": 3.157, "grad_norm": 16.136144638061523, "learning_rate": 3.6896000000000002e-06, "loss": 1.4727, "step": 31570 }, { "epoch": 3.158, "grad_norm": 16.310787200927734, "learning_rate": 3.6876e-06, "loss": 1.1412, "step": 31580 }, { "epoch": 3.159, "grad_norm": 7.520683288574219, "learning_rate": 3.6856000000000003e-06, "loss": 1.1155, "step": 31590 }, { "epoch": 3.16, "grad_norm": 13.775277137756348, "learning_rate": 3.6836000000000006e-06, "loss": 0.9914, "step": 31600 }, { "epoch": 3.161, "grad_norm": 9.952198028564453, "learning_rate": 3.6816e-06, "loss": 1.3161, "step": 31610 }, { "epoch": 3.162, "grad_norm": 17.171445846557617, "learning_rate": 3.6796000000000003e-06, "loss": 1.1587, "step": 31620 }, { "epoch": 3.163, "grad_norm": 15.518289566040039, "learning_rate": 3.6776e-06, "loss": 1.2436, "step": 31630 }, { "epoch": 3.164, "grad_norm": 13.055865287780762, "learning_rate": 3.6756000000000004e-06, "loss": 1.4013, "step": 31640 }, { "epoch": 3.165, "grad_norm": 13.091086387634277, "learning_rate": 3.6736000000000007e-06, "loss": 1.3567, "step": 31650 }, { "epoch": 3.166, "grad_norm": 11.561912536621094, "learning_rate": 3.6716e-06, "loss": 0.89, "step": 31660 }, { "epoch": 3.167, "grad_norm": 10.071240425109863, "learning_rate": 3.6696000000000004e-06, "loss": 1.3013, "step": 31670 }, { "epoch": 3.168, "grad_norm": 15.523853302001953, "learning_rate": 3.6676000000000003e-06, "loss": 1.2898, "step": 31680 }, { "epoch": 3.169, "grad_norm": 14.333702087402344, "learning_rate": 3.6656e-06, "loss": 1.4219, "step": 31690 }, { "epoch": 3.17, "grad_norm": 9.92656135559082, "learning_rate": 3.6636000000000004e-06, "loss": 1.2185, "step": 31700 }, { "epoch": 3.171, "grad_norm": 12.13388442993164, "learning_rate": 3.6616000000000002e-06, "loss": 1.3915, "step": 31710 }, { "epoch": 3.172, "grad_norm": 15.032325744628906, "learning_rate": 3.6596000000000005e-06, "loss": 1.4853, "step": 31720 }, { "epoch": 3.173, "grad_norm": 12.345745086669922, "learning_rate": 3.6576e-06, "loss": 1.169, "step": 31730 }, { "epoch": 3.174, "grad_norm": 14.099884986877441, "learning_rate": 3.6556e-06, "loss": 1.1628, "step": 31740 }, { "epoch": 3.175, "grad_norm": 9.100024223327637, "learning_rate": 3.6536000000000005e-06, "loss": 1.0634, "step": 31750 }, { "epoch": 3.176, "grad_norm": 8.760787963867188, "learning_rate": 3.6516000000000003e-06, "loss": 1.2315, "step": 31760 }, { "epoch": 3.177, "grad_norm": 13.519919395446777, "learning_rate": 3.6496e-06, "loss": 1.2855, "step": 31770 }, { "epoch": 3.178, "grad_norm": 11.733073234558105, "learning_rate": 3.6476e-06, "loss": 1.4069, "step": 31780 }, { "epoch": 3.179, "grad_norm": 12.60672664642334, "learning_rate": 3.6456000000000003e-06, "loss": 1.2564, "step": 31790 }, { "epoch": 3.18, "grad_norm": 13.48668098449707, "learning_rate": 3.6436000000000006e-06, "loss": 1.0412, "step": 31800 }, { "epoch": 3.181, "grad_norm": 10.041485786437988, "learning_rate": 3.6416e-06, "loss": 1.208, "step": 31810 }, { "epoch": 3.182, "grad_norm": 10.235231399536133, "learning_rate": 3.6396000000000003e-06, "loss": 1.5465, "step": 31820 }, { "epoch": 3.183, "grad_norm": 7.388559818267822, "learning_rate": 3.6376e-06, "loss": 1.3857, "step": 31830 }, { "epoch": 3.184, "grad_norm": 13.65880298614502, "learning_rate": 3.6356000000000004e-06, "loss": 1.415, "step": 31840 }, { "epoch": 3.185, "grad_norm": 17.73468017578125, "learning_rate": 3.6336000000000007e-06, "loss": 1.1945, "step": 31850 }, { "epoch": 3.186, "grad_norm": 9.782712936401367, "learning_rate": 3.6316e-06, "loss": 1.0108, "step": 31860 }, { "epoch": 3.187, "grad_norm": 13.31120777130127, "learning_rate": 3.6296000000000004e-06, "loss": 1.0682, "step": 31870 }, { "epoch": 3.188, "grad_norm": 8.320989608764648, "learning_rate": 3.6276000000000006e-06, "loss": 1.2398, "step": 31880 }, { "epoch": 3.189, "grad_norm": 7.5153985023498535, "learning_rate": 3.6256e-06, "loss": 1.1424, "step": 31890 }, { "epoch": 3.19, "grad_norm": 9.46658706665039, "learning_rate": 3.6236000000000003e-06, "loss": 1.3216, "step": 31900 }, { "epoch": 3.191, "grad_norm": 11.493831634521484, "learning_rate": 3.6216e-06, "loss": 1.0897, "step": 31910 }, { "epoch": 3.192, "grad_norm": 14.641666412353516, "learning_rate": 3.6196000000000005e-06, "loss": 1.2583, "step": 31920 }, { "epoch": 3.193, "grad_norm": 17.170948028564453, "learning_rate": 3.6176000000000007e-06, "loss": 1.468, "step": 31930 }, { "epoch": 3.194, "grad_norm": 11.503747940063477, "learning_rate": 3.6156e-06, "loss": 1.3809, "step": 31940 }, { "epoch": 3.195, "grad_norm": 13.143049240112305, "learning_rate": 3.6136000000000004e-06, "loss": 1.2005, "step": 31950 }, { "epoch": 3.196, "grad_norm": 13.152921676635742, "learning_rate": 3.6116000000000003e-06, "loss": 1.3261, "step": 31960 }, { "epoch": 3.197, "grad_norm": 9.248669624328613, "learning_rate": 3.6096e-06, "loss": 1.2812, "step": 31970 }, { "epoch": 3.198, "grad_norm": 8.154925346374512, "learning_rate": 3.6076000000000004e-06, "loss": 1.2347, "step": 31980 }, { "epoch": 3.199, "grad_norm": 13.0037202835083, "learning_rate": 3.6056000000000003e-06, "loss": 1.2011, "step": 31990 }, { "epoch": 3.2, "grad_norm": 19.723426818847656, "learning_rate": 3.6036000000000005e-06, "loss": 1.1806, "step": 32000 }, { "epoch": 3.201, "grad_norm": 10.72747802734375, "learning_rate": 3.6016e-06, "loss": 1.293, "step": 32010 }, { "epoch": 3.202, "grad_norm": 17.53891944885254, "learning_rate": 3.5996000000000002e-06, "loss": 1.2213, "step": 32020 }, { "epoch": 3.203, "grad_norm": 7.7619733810424805, "learning_rate": 3.5976000000000005e-06, "loss": 1.4296, "step": 32030 }, { "epoch": 3.204, "grad_norm": 8.889789581298828, "learning_rate": 3.5956000000000004e-06, "loss": 1.1236, "step": 32040 }, { "epoch": 3.205, "grad_norm": 9.316286087036133, "learning_rate": 3.5936000000000006e-06, "loss": 1.119, "step": 32050 }, { "epoch": 3.206, "grad_norm": 14.856748580932617, "learning_rate": 3.5916e-06, "loss": 1.1871, "step": 32060 }, { "epoch": 3.207, "grad_norm": 8.214824676513672, "learning_rate": 3.5896000000000003e-06, "loss": 1.316, "step": 32070 }, { "epoch": 3.208, "grad_norm": 20.094886779785156, "learning_rate": 3.5876000000000006e-06, "loss": 1.2126, "step": 32080 }, { "epoch": 3.209, "grad_norm": 8.71680736541748, "learning_rate": 3.5856e-06, "loss": 1.797, "step": 32090 }, { "epoch": 3.21, "grad_norm": 14.392909049987793, "learning_rate": 3.5836000000000003e-06, "loss": 1.3637, "step": 32100 }, { "epoch": 3.211, "grad_norm": 8.757878303527832, "learning_rate": 3.5816e-06, "loss": 1.1388, "step": 32110 }, { "epoch": 3.212, "grad_norm": 14.083065032958984, "learning_rate": 3.5796000000000004e-06, "loss": 1.2794, "step": 32120 }, { "epoch": 3.213, "grad_norm": 8.543600082397461, "learning_rate": 3.5776000000000007e-06, "loss": 1.0787, "step": 32130 }, { "epoch": 3.214, "grad_norm": 8.613849639892578, "learning_rate": 3.5756e-06, "loss": 1.2959, "step": 32140 }, { "epoch": 3.215, "grad_norm": 7.297475337982178, "learning_rate": 3.5736000000000004e-06, "loss": 1.264, "step": 32150 }, { "epoch": 3.216, "grad_norm": 5.805179595947266, "learning_rate": 3.5716000000000002e-06, "loss": 1.0243, "step": 32160 }, { "epoch": 3.217, "grad_norm": 8.642257690429688, "learning_rate": 3.5696e-06, "loss": 1.3839, "step": 32170 }, { "epoch": 3.218, "grad_norm": 9.658088684082031, "learning_rate": 3.5676000000000004e-06, "loss": 1.4653, "step": 32180 }, { "epoch": 3.219, "grad_norm": 13.719171524047852, "learning_rate": 3.5656000000000002e-06, "loss": 1.2404, "step": 32190 }, { "epoch": 3.22, "grad_norm": 18.87218475341797, "learning_rate": 3.5636000000000005e-06, "loss": 1.4298, "step": 32200 }, { "epoch": 3.221, "grad_norm": 4.560086250305176, "learning_rate": 3.5616e-06, "loss": 0.8693, "step": 32210 }, { "epoch": 3.222, "grad_norm": 9.677437782287598, "learning_rate": 3.5596e-06, "loss": 1.2829, "step": 32220 }, { "epoch": 3.223, "grad_norm": 8.892374038696289, "learning_rate": 3.5576000000000005e-06, "loss": 1.5183, "step": 32230 }, { "epoch": 3.224, "grad_norm": 13.133440017700195, "learning_rate": 3.5556000000000003e-06, "loss": 1.0629, "step": 32240 }, { "epoch": 3.225, "grad_norm": 10.630733489990234, "learning_rate": 3.5536000000000006e-06, "loss": 1.5007, "step": 32250 }, { "epoch": 3.226, "grad_norm": 7.457658767700195, "learning_rate": 3.5516e-06, "loss": 1.261, "step": 32260 }, { "epoch": 3.227, "grad_norm": 8.467300415039062, "learning_rate": 3.5496000000000003e-06, "loss": 1.0812, "step": 32270 }, { "epoch": 3.228, "grad_norm": 12.2103853225708, "learning_rate": 3.5476000000000006e-06, "loss": 1.3369, "step": 32280 }, { "epoch": 3.229, "grad_norm": 24.05048942565918, "learning_rate": 3.5456e-06, "loss": 1.2854, "step": 32290 }, { "epoch": 3.23, "grad_norm": 9.563419342041016, "learning_rate": 3.5436000000000003e-06, "loss": 1.3415, "step": 32300 }, { "epoch": 3.231, "grad_norm": 7.2136077880859375, "learning_rate": 3.5416e-06, "loss": 0.9525, "step": 32310 }, { "epoch": 3.232, "grad_norm": 13.376723289489746, "learning_rate": 3.5396000000000004e-06, "loss": 1.4683, "step": 32320 }, { "epoch": 3.233, "grad_norm": 15.275553703308105, "learning_rate": 3.5376000000000007e-06, "loss": 1.2159, "step": 32330 }, { "epoch": 3.234, "grad_norm": 12.236398696899414, "learning_rate": 3.5356e-06, "loss": 1.5497, "step": 32340 }, { "epoch": 3.235, "grad_norm": 14.109912872314453, "learning_rate": 3.5336000000000004e-06, "loss": 1.4545, "step": 32350 }, { "epoch": 3.2359999999999998, "grad_norm": 6.347513675689697, "learning_rate": 3.5316e-06, "loss": 1.1414, "step": 32360 }, { "epoch": 3.237, "grad_norm": 10.685503005981445, "learning_rate": 3.5296e-06, "loss": 1.1433, "step": 32370 }, { "epoch": 3.238, "grad_norm": 7.3020548820495605, "learning_rate": 3.5276000000000003e-06, "loss": 1.3105, "step": 32380 }, { "epoch": 3.239, "grad_norm": 17.469188690185547, "learning_rate": 3.5256e-06, "loss": 1.3292, "step": 32390 }, { "epoch": 3.24, "grad_norm": 14.983972549438477, "learning_rate": 3.5236000000000004e-06, "loss": 1.2838, "step": 32400 }, { "epoch": 3.241, "grad_norm": 5.904664993286133, "learning_rate": 3.5216e-06, "loss": 1.3207, "step": 32410 }, { "epoch": 3.242, "grad_norm": 9.73958683013916, "learning_rate": 3.5196e-06, "loss": 1.0813, "step": 32420 }, { "epoch": 3.243, "grad_norm": 7.651210784912109, "learning_rate": 3.5176000000000004e-06, "loss": 1.1683, "step": 32430 }, { "epoch": 3.2439999999999998, "grad_norm": 12.60692310333252, "learning_rate": 3.5156000000000003e-06, "loss": 1.5792, "step": 32440 }, { "epoch": 3.245, "grad_norm": 9.290870666503906, "learning_rate": 3.5136000000000005e-06, "loss": 1.0657, "step": 32450 }, { "epoch": 3.246, "grad_norm": 9.939592361450195, "learning_rate": 3.5116000000000004e-06, "loss": 1.1628, "step": 32460 }, { "epoch": 3.247, "grad_norm": 11.36020278930664, "learning_rate": 3.5096000000000002e-06, "loss": 1.5508, "step": 32470 }, { "epoch": 3.248, "grad_norm": 6.549915313720703, "learning_rate": 3.5076000000000005e-06, "loss": 1.2146, "step": 32480 }, { "epoch": 3.249, "grad_norm": 5.919882774353027, "learning_rate": 3.5056e-06, "loss": 1.0822, "step": 32490 }, { "epoch": 3.25, "grad_norm": 8.799071311950684, "learning_rate": 3.5036000000000002e-06, "loss": 1.2133, "step": 32500 }, { "epoch": 3.251, "grad_norm": 6.232070446014404, "learning_rate": 3.5016000000000005e-06, "loss": 1.0634, "step": 32510 }, { "epoch": 3.252, "grad_norm": 8.41631031036377, "learning_rate": 3.4996000000000003e-06, "loss": 1.0283, "step": 32520 }, { "epoch": 3.253, "grad_norm": 9.329627990722656, "learning_rate": 3.4976000000000006e-06, "loss": 1.3866, "step": 32530 }, { "epoch": 3.254, "grad_norm": 6.06884241104126, "learning_rate": 3.4956e-06, "loss": 1.2663, "step": 32540 }, { "epoch": 3.255, "grad_norm": 12.039359092712402, "learning_rate": 3.4936000000000003e-06, "loss": 1.2413, "step": 32550 }, { "epoch": 3.2560000000000002, "grad_norm": 7.163179397583008, "learning_rate": 3.4916000000000006e-06, "loss": 1.3534, "step": 32560 }, { "epoch": 3.257, "grad_norm": 8.500076293945312, "learning_rate": 3.4896e-06, "loss": 1.1705, "step": 32570 }, { "epoch": 3.258, "grad_norm": 9.490743637084961, "learning_rate": 3.4876000000000003e-06, "loss": 1.1251, "step": 32580 }, { "epoch": 3.259, "grad_norm": 11.267419815063477, "learning_rate": 3.4856e-06, "loss": 1.2505, "step": 32590 }, { "epoch": 3.26, "grad_norm": 9.031342506408691, "learning_rate": 3.4836000000000004e-06, "loss": 1.046, "step": 32600 }, { "epoch": 3.261, "grad_norm": 12.972938537597656, "learning_rate": 3.4816000000000007e-06, "loss": 1.1546, "step": 32610 }, { "epoch": 3.262, "grad_norm": 10.140288352966309, "learning_rate": 3.4796e-06, "loss": 0.9507, "step": 32620 }, { "epoch": 3.263, "grad_norm": 4.204392433166504, "learning_rate": 3.4776000000000004e-06, "loss": 1.4854, "step": 32630 }, { "epoch": 3.2640000000000002, "grad_norm": 14.990270614624023, "learning_rate": 3.4756000000000002e-06, "loss": 1.5088, "step": 32640 }, { "epoch": 3.265, "grad_norm": 3.699033737182617, "learning_rate": 3.4736000000000005e-06, "loss": 1.3875, "step": 32650 }, { "epoch": 3.266, "grad_norm": 11.2253999710083, "learning_rate": 3.4716000000000004e-06, "loss": 1.1256, "step": 32660 }, { "epoch": 3.267, "grad_norm": 7.6248273849487305, "learning_rate": 3.4696e-06, "loss": 1.2642, "step": 32670 }, { "epoch": 3.268, "grad_norm": 11.952447891235352, "learning_rate": 3.4676000000000005e-06, "loss": 1.2381, "step": 32680 }, { "epoch": 3.269, "grad_norm": 5.650092601776123, "learning_rate": 3.4656e-06, "loss": 1.2303, "step": 32690 }, { "epoch": 3.27, "grad_norm": 8.982007026672363, "learning_rate": 3.4636e-06, "loss": 0.9703, "step": 32700 }, { "epoch": 3.271, "grad_norm": 15.105560302734375, "learning_rate": 3.4616000000000005e-06, "loss": 1.1687, "step": 32710 }, { "epoch": 3.2720000000000002, "grad_norm": 13.096453666687012, "learning_rate": 3.4596000000000003e-06, "loss": 1.2502, "step": 32720 }, { "epoch": 3.273, "grad_norm": 8.091682434082031, "learning_rate": 3.4576000000000006e-06, "loss": 1.2922, "step": 32730 }, { "epoch": 3.274, "grad_norm": 11.396024703979492, "learning_rate": 3.4556e-06, "loss": 0.7897, "step": 32740 }, { "epoch": 3.275, "grad_norm": 27.17865753173828, "learning_rate": 3.4536000000000003e-06, "loss": 1.3303, "step": 32750 }, { "epoch": 3.276, "grad_norm": 7.695706367492676, "learning_rate": 3.4516000000000005e-06, "loss": 1.3804, "step": 32760 }, { "epoch": 3.277, "grad_norm": 8.851439476013184, "learning_rate": 3.4496e-06, "loss": 1.3579, "step": 32770 }, { "epoch": 3.278, "grad_norm": 19.301010131835938, "learning_rate": 3.4476000000000002e-06, "loss": 1.3023, "step": 32780 }, { "epoch": 3.279, "grad_norm": 8.406732559204102, "learning_rate": 3.4456e-06, "loss": 1.0665, "step": 32790 }, { "epoch": 3.2800000000000002, "grad_norm": 5.838198661804199, "learning_rate": 3.4436000000000004e-06, "loss": 1.1368, "step": 32800 }, { "epoch": 3.281, "grad_norm": 12.55969524383545, "learning_rate": 3.4416000000000006e-06, "loss": 1.1306, "step": 32810 }, { "epoch": 3.282, "grad_norm": 3.7656383514404297, "learning_rate": 3.4396e-06, "loss": 1.0868, "step": 32820 }, { "epoch": 3.283, "grad_norm": 6.539681911468506, "learning_rate": 3.4376000000000003e-06, "loss": 0.8968, "step": 32830 }, { "epoch": 3.284, "grad_norm": 9.941622734069824, "learning_rate": 3.4356e-06, "loss": 0.8876, "step": 32840 }, { "epoch": 3.285, "grad_norm": 13.108545303344727, "learning_rate": 3.4336000000000005e-06, "loss": 1.3521, "step": 32850 }, { "epoch": 3.286, "grad_norm": 10.608994483947754, "learning_rate": 3.4316000000000003e-06, "loss": 1.486, "step": 32860 }, { "epoch": 3.287, "grad_norm": 12.911544799804688, "learning_rate": 3.4296e-06, "loss": 0.977, "step": 32870 }, { "epoch": 3.288, "grad_norm": 17.774314880371094, "learning_rate": 3.4276000000000004e-06, "loss": 1.0168, "step": 32880 }, { "epoch": 3.289, "grad_norm": 8.926736831665039, "learning_rate": 3.4256e-06, "loss": 1.1982, "step": 32890 }, { "epoch": 3.29, "grad_norm": 5.778880596160889, "learning_rate": 3.4236e-06, "loss": 0.8865, "step": 32900 }, { "epoch": 3.291, "grad_norm": 9.908224105834961, "learning_rate": 3.4216000000000004e-06, "loss": 1.119, "step": 32910 }, { "epoch": 3.292, "grad_norm": 14.492218017578125, "learning_rate": 3.4196000000000003e-06, "loss": 1.1737, "step": 32920 }, { "epoch": 3.293, "grad_norm": 25.405712127685547, "learning_rate": 3.4176000000000005e-06, "loss": 1.3227, "step": 32930 }, { "epoch": 3.294, "grad_norm": 15.04414176940918, "learning_rate": 3.4156e-06, "loss": 1.3968, "step": 32940 }, { "epoch": 3.295, "grad_norm": 17.074663162231445, "learning_rate": 3.4136000000000002e-06, "loss": 1.3745, "step": 32950 }, { "epoch": 3.296, "grad_norm": 9.181321144104004, "learning_rate": 3.4116000000000005e-06, "loss": 1.0224, "step": 32960 }, { "epoch": 3.297, "grad_norm": 6.065814971923828, "learning_rate": 3.4096e-06, "loss": 0.9816, "step": 32970 }, { "epoch": 3.298, "grad_norm": 19.590219497680664, "learning_rate": 3.4076e-06, "loss": 1.0568, "step": 32980 }, { "epoch": 3.299, "grad_norm": 13.940866470336914, "learning_rate": 3.4056e-06, "loss": 1.2674, "step": 32990 }, { "epoch": 3.3, "grad_norm": 16.113611221313477, "learning_rate": 3.4036000000000003e-06, "loss": 1.1921, "step": 33000 }, { "epoch": 3.301, "grad_norm": 8.01199722290039, "learning_rate": 3.4016000000000006e-06, "loss": 1.3221, "step": 33010 }, { "epoch": 3.302, "grad_norm": 12.130508422851562, "learning_rate": 3.3996e-06, "loss": 1.2079, "step": 33020 }, { "epoch": 3.303, "grad_norm": 4.403210639953613, "learning_rate": 3.3976000000000003e-06, "loss": 1.1665, "step": 33030 }, { "epoch": 3.304, "grad_norm": 13.2339506149292, "learning_rate": 3.3956e-06, "loss": 1.2635, "step": 33040 }, { "epoch": 3.305, "grad_norm": 11.011444091796875, "learning_rate": 3.3936000000000004e-06, "loss": 1.3262, "step": 33050 }, { "epoch": 3.306, "grad_norm": 8.397808074951172, "learning_rate": 3.3916000000000003e-06, "loss": 1.4053, "step": 33060 }, { "epoch": 3.307, "grad_norm": 37.19631576538086, "learning_rate": 3.3896e-06, "loss": 1.2789, "step": 33070 }, { "epoch": 3.308, "grad_norm": 9.323402404785156, "learning_rate": 3.3876000000000004e-06, "loss": 1.2272, "step": 33080 }, { "epoch": 3.309, "grad_norm": 8.225543022155762, "learning_rate": 3.3856000000000007e-06, "loss": 0.987, "step": 33090 }, { "epoch": 3.31, "grad_norm": 14.92512035369873, "learning_rate": 3.3836e-06, "loss": 1.2526, "step": 33100 }, { "epoch": 3.311, "grad_norm": 11.277851104736328, "learning_rate": 3.3816000000000004e-06, "loss": 1.5352, "step": 33110 }, { "epoch": 3.312, "grad_norm": 19.680356979370117, "learning_rate": 3.3796000000000002e-06, "loss": 1.3631, "step": 33120 }, { "epoch": 3.313, "grad_norm": 15.132512092590332, "learning_rate": 3.3776000000000005e-06, "loss": 1.2785, "step": 33130 }, { "epoch": 3.314, "grad_norm": 9.5708589553833, "learning_rate": 3.3756000000000003e-06, "loss": 1.1834, "step": 33140 }, { "epoch": 3.315, "grad_norm": 12.436238288879395, "learning_rate": 3.3736e-06, "loss": 1.4233, "step": 33150 }, { "epoch": 3.316, "grad_norm": 7.335597515106201, "learning_rate": 3.3716000000000005e-06, "loss": 1.0372, "step": 33160 }, { "epoch": 3.317, "grad_norm": 15.58549690246582, "learning_rate": 3.3696e-06, "loss": 1.3561, "step": 33170 }, { "epoch": 3.318, "grad_norm": 7.0587239265441895, "learning_rate": 3.3676e-06, "loss": 1.4077, "step": 33180 }, { "epoch": 3.319, "grad_norm": 7.00608491897583, "learning_rate": 3.3656000000000004e-06, "loss": 1.1006, "step": 33190 }, { "epoch": 3.32, "grad_norm": 9.472764015197754, "learning_rate": 3.3636000000000003e-06, "loss": 0.9282, "step": 33200 }, { "epoch": 3.321, "grad_norm": 15.248495101928711, "learning_rate": 3.3616000000000006e-06, "loss": 1.3763, "step": 33210 }, { "epoch": 3.322, "grad_norm": 12.883280754089355, "learning_rate": 3.3596e-06, "loss": 1.2735, "step": 33220 }, { "epoch": 3.323, "grad_norm": 16.638837814331055, "learning_rate": 3.3576000000000003e-06, "loss": 1.2208, "step": 33230 }, { "epoch": 3.324, "grad_norm": 12.072118759155273, "learning_rate": 3.3556000000000005e-06, "loss": 0.9692, "step": 33240 }, { "epoch": 3.325, "grad_norm": 19.319643020629883, "learning_rate": 3.3538000000000004e-06, "loss": 1.4519, "step": 33250 }, { "epoch": 3.326, "grad_norm": 9.106452941894531, "learning_rate": 3.3518000000000002e-06, "loss": 1.0134, "step": 33260 }, { "epoch": 3.327, "grad_norm": 10.190835952758789, "learning_rate": 3.3498e-06, "loss": 1.2243, "step": 33270 }, { "epoch": 3.328, "grad_norm": 84.5706787109375, "learning_rate": 3.3478000000000004e-06, "loss": 0.996, "step": 33280 }, { "epoch": 3.329, "grad_norm": 6.543524265289307, "learning_rate": 3.3458000000000002e-06, "loss": 0.917, "step": 33290 }, { "epoch": 3.33, "grad_norm": 20.832143783569336, "learning_rate": 3.3438000000000005e-06, "loss": 1.6999, "step": 33300 }, { "epoch": 3.331, "grad_norm": 10.799151420593262, "learning_rate": 3.3418e-06, "loss": 1.2738, "step": 33310 }, { "epoch": 3.332, "grad_norm": 9.876962661743164, "learning_rate": 3.3398e-06, "loss": 0.6454, "step": 33320 }, { "epoch": 3.333, "grad_norm": 16.863393783569336, "learning_rate": 3.3378000000000005e-06, "loss": 1.1647, "step": 33330 }, { "epoch": 3.334, "grad_norm": 10.639697074890137, "learning_rate": 3.3358000000000003e-06, "loss": 1.1701, "step": 33340 }, { "epoch": 3.335, "grad_norm": 9.584555625915527, "learning_rate": 3.3338e-06, "loss": 1.1659, "step": 33350 }, { "epoch": 3.336, "grad_norm": 15.341971397399902, "learning_rate": 3.3318e-06, "loss": 1.1988, "step": 33360 }, { "epoch": 3.337, "grad_norm": 9.649847984313965, "learning_rate": 3.3298000000000003e-06, "loss": 1.1138, "step": 33370 }, { "epoch": 3.338, "grad_norm": 6.513732433319092, "learning_rate": 3.3278000000000006e-06, "loss": 1.2033, "step": 33380 }, { "epoch": 3.339, "grad_norm": 12.632469177246094, "learning_rate": 3.3258e-06, "loss": 1.1843, "step": 33390 }, { "epoch": 3.34, "grad_norm": 19.51534080505371, "learning_rate": 3.3238000000000003e-06, "loss": 1.2779, "step": 33400 }, { "epoch": 3.341, "grad_norm": 22.03960609436035, "learning_rate": 3.3218e-06, "loss": 1.5278, "step": 33410 }, { "epoch": 3.342, "grad_norm": 4.911806106567383, "learning_rate": 3.3198000000000004e-06, "loss": 1.2932, "step": 33420 }, { "epoch": 3.343, "grad_norm": 12.205720901489258, "learning_rate": 3.3178000000000007e-06, "loss": 1.1227, "step": 33430 }, { "epoch": 3.344, "grad_norm": 8.928045272827148, "learning_rate": 3.3158e-06, "loss": 1.2943, "step": 33440 }, { "epoch": 3.3449999999999998, "grad_norm": 12.347901344299316, "learning_rate": 3.3138000000000004e-06, "loss": 1.3205, "step": 33450 }, { "epoch": 3.346, "grad_norm": 13.050529479980469, "learning_rate": 3.3118e-06, "loss": 1.292, "step": 33460 }, { "epoch": 3.347, "grad_norm": 14.409247398376465, "learning_rate": 3.3098e-06, "loss": 1.2239, "step": 33470 }, { "epoch": 3.348, "grad_norm": 19.74100685119629, "learning_rate": 3.3078000000000003e-06, "loss": 1.4628, "step": 33480 }, { "epoch": 3.349, "grad_norm": 11.511463165283203, "learning_rate": 3.3058e-06, "loss": 1.1971, "step": 33490 }, { "epoch": 3.35, "grad_norm": 3.650604486465454, "learning_rate": 3.3038000000000005e-06, "loss": 1.2318, "step": 33500 }, { "epoch": 3.351, "grad_norm": 8.586471557617188, "learning_rate": 3.3018e-06, "loss": 1.2146, "step": 33510 }, { "epoch": 3.352, "grad_norm": 11.183399200439453, "learning_rate": 3.2998e-06, "loss": 1.3388, "step": 33520 }, { "epoch": 3.3529999999999998, "grad_norm": 10.341146469116211, "learning_rate": 3.2978000000000004e-06, "loss": 1.3137, "step": 33530 }, { "epoch": 3.354, "grad_norm": 13.762995719909668, "learning_rate": 3.2958000000000003e-06, "loss": 1.2138, "step": 33540 }, { "epoch": 3.355, "grad_norm": 10.429519653320312, "learning_rate": 3.2938e-06, "loss": 1.4147, "step": 33550 }, { "epoch": 3.356, "grad_norm": 8.173771858215332, "learning_rate": 3.2918e-06, "loss": 1.3404, "step": 33560 }, { "epoch": 3.357, "grad_norm": 11.151724815368652, "learning_rate": 3.2898000000000002e-06, "loss": 1.2263, "step": 33570 }, { "epoch": 3.358, "grad_norm": 8.839871406555176, "learning_rate": 3.2878000000000005e-06, "loss": 1.2488, "step": 33580 }, { "epoch": 3.359, "grad_norm": 10.73769760131836, "learning_rate": 3.2858e-06, "loss": 1.0362, "step": 33590 }, { "epoch": 3.36, "grad_norm": 11.241143226623535, "learning_rate": 3.2838000000000002e-06, "loss": 1.4276, "step": 33600 }, { "epoch": 3.3609999999999998, "grad_norm": 17.27197265625, "learning_rate": 3.2818000000000005e-06, "loss": 1.1917, "step": 33610 }, { "epoch": 3.362, "grad_norm": 6.699103832244873, "learning_rate": 3.2798000000000003e-06, "loss": 1.6466, "step": 33620 }, { "epoch": 3.363, "grad_norm": 12.583761215209961, "learning_rate": 3.2778000000000006e-06, "loss": 1.361, "step": 33630 }, { "epoch": 3.364, "grad_norm": 7.58347749710083, "learning_rate": 3.2758e-06, "loss": 1.6005, "step": 33640 }, { "epoch": 3.365, "grad_norm": 6.883507251739502, "learning_rate": 3.2738000000000003e-06, "loss": 1.0624, "step": 33650 }, { "epoch": 3.366, "grad_norm": 5.605075836181641, "learning_rate": 3.2718000000000006e-06, "loss": 1.3698, "step": 33660 }, { "epoch": 3.367, "grad_norm": 15.588250160217285, "learning_rate": 3.2698e-06, "loss": 1.3087, "step": 33670 }, { "epoch": 3.368, "grad_norm": 10.50893497467041, "learning_rate": 3.2678000000000003e-06, "loss": 1.4174, "step": 33680 }, { "epoch": 3.3689999999999998, "grad_norm": 5.206080913543701, "learning_rate": 3.2658e-06, "loss": 0.9736, "step": 33690 }, { "epoch": 3.37, "grad_norm": 11.979902267456055, "learning_rate": 3.2638000000000004e-06, "loss": 1.1269, "step": 33700 }, { "epoch": 3.371, "grad_norm": 8.385255813598633, "learning_rate": 3.2618000000000007e-06, "loss": 1.5978, "step": 33710 }, { "epoch": 3.372, "grad_norm": 19.772235870361328, "learning_rate": 3.2598e-06, "loss": 1.2343, "step": 33720 }, { "epoch": 3.373, "grad_norm": 5.867338180541992, "learning_rate": 3.2578000000000004e-06, "loss": 1.4097, "step": 33730 }, { "epoch": 3.374, "grad_norm": 8.423144340515137, "learning_rate": 3.2558000000000002e-06, "loss": 1.3668, "step": 33740 }, { "epoch": 3.375, "grad_norm": 5.6387128829956055, "learning_rate": 3.2538e-06, "loss": 0.8141, "step": 33750 }, { "epoch": 3.376, "grad_norm": 8.880389213562012, "learning_rate": 3.2518000000000004e-06, "loss": 1.2723, "step": 33760 }, { "epoch": 3.377, "grad_norm": 14.121014595031738, "learning_rate": 3.2498e-06, "loss": 1.3464, "step": 33770 }, { "epoch": 3.378, "grad_norm": 9.599756240844727, "learning_rate": 3.2478000000000005e-06, "loss": 1.0841, "step": 33780 }, { "epoch": 3.379, "grad_norm": 9.268075942993164, "learning_rate": 3.2458e-06, "loss": 1.3382, "step": 33790 }, { "epoch": 3.38, "grad_norm": 11.690591812133789, "learning_rate": 3.2438e-06, "loss": 1.6777, "step": 33800 }, { "epoch": 3.3810000000000002, "grad_norm": 9.098777770996094, "learning_rate": 3.2418000000000005e-06, "loss": 1.2003, "step": 33810 }, { "epoch": 3.382, "grad_norm": 9.938180923461914, "learning_rate": 3.2398000000000003e-06, "loss": 1.2026, "step": 33820 }, { "epoch": 3.383, "grad_norm": 9.417630195617676, "learning_rate": 3.2378000000000006e-06, "loss": 1.127, "step": 33830 }, { "epoch": 3.384, "grad_norm": 8.025232315063477, "learning_rate": 3.2358e-06, "loss": 1.1777, "step": 33840 }, { "epoch": 3.385, "grad_norm": 18.49056053161621, "learning_rate": 3.2338000000000003e-06, "loss": 1.6752, "step": 33850 }, { "epoch": 3.386, "grad_norm": 4.776751518249512, "learning_rate": 3.2318000000000006e-06, "loss": 1.452, "step": 33860 }, { "epoch": 3.387, "grad_norm": 24.444454193115234, "learning_rate": 3.2298e-06, "loss": 1.5033, "step": 33870 }, { "epoch": 3.388, "grad_norm": 6.775298595428467, "learning_rate": 3.2278000000000002e-06, "loss": 0.9021, "step": 33880 }, { "epoch": 3.3890000000000002, "grad_norm": 9.522842407226562, "learning_rate": 3.2258e-06, "loss": 1.2927, "step": 33890 }, { "epoch": 3.39, "grad_norm": 10.366374015808105, "learning_rate": 3.2238000000000004e-06, "loss": 0.9999, "step": 33900 }, { "epoch": 3.391, "grad_norm": 11.614657402038574, "learning_rate": 3.2218000000000006e-06, "loss": 1.1758, "step": 33910 }, { "epoch": 3.392, "grad_norm": 10.75184440612793, "learning_rate": 3.2198e-06, "loss": 1.2787, "step": 33920 }, { "epoch": 3.393, "grad_norm": 9.227688789367676, "learning_rate": 3.2178000000000003e-06, "loss": 1.1336, "step": 33930 }, { "epoch": 3.394, "grad_norm": 16.701358795166016, "learning_rate": 3.2158e-06, "loss": 1.2278, "step": 33940 }, { "epoch": 3.395, "grad_norm": 14.846860885620117, "learning_rate": 3.2138e-06, "loss": 1.0623, "step": 33950 }, { "epoch": 3.396, "grad_norm": 8.67315673828125, "learning_rate": 3.2118000000000003e-06, "loss": 1.2896, "step": 33960 }, { "epoch": 3.3970000000000002, "grad_norm": 13.453950881958008, "learning_rate": 3.2098e-06, "loss": 1.3317, "step": 33970 }, { "epoch": 3.398, "grad_norm": 7.869078636169434, "learning_rate": 3.2078000000000004e-06, "loss": 1.0102, "step": 33980 }, { "epoch": 3.399, "grad_norm": 11.090518951416016, "learning_rate": 3.2058e-06, "loss": 1.546, "step": 33990 }, { "epoch": 3.4, "grad_norm": 10.65959358215332, "learning_rate": 3.2038e-06, "loss": 1.4729, "step": 34000 }, { "epoch": 3.401, "grad_norm": 11.785367012023926, "learning_rate": 3.2018000000000004e-06, "loss": 1.3541, "step": 34010 }, { "epoch": 3.402, "grad_norm": 9.617659568786621, "learning_rate": 3.1998000000000003e-06, "loss": 1.2666, "step": 34020 }, { "epoch": 3.403, "grad_norm": 2.974426031112671, "learning_rate": 3.1978000000000005e-06, "loss": 1.4494, "step": 34030 }, { "epoch": 3.404, "grad_norm": 9.40168571472168, "learning_rate": 3.1958e-06, "loss": 1.3822, "step": 34040 }, { "epoch": 3.4050000000000002, "grad_norm": 13.251846313476562, "learning_rate": 3.1938000000000002e-06, "loss": 1.219, "step": 34050 }, { "epoch": 3.406, "grad_norm": 8.29511547088623, "learning_rate": 3.1918000000000005e-06, "loss": 1.0676, "step": 34060 }, { "epoch": 3.407, "grad_norm": 10.543883323669434, "learning_rate": 3.1898e-06, "loss": 1.0714, "step": 34070 }, { "epoch": 3.408, "grad_norm": 14.195906639099121, "learning_rate": 3.1878e-06, "loss": 0.8721, "step": 34080 }, { "epoch": 3.409, "grad_norm": 8.348739624023438, "learning_rate": 3.1858e-06, "loss": 1.0349, "step": 34090 }, { "epoch": 3.41, "grad_norm": 7.491306304931641, "learning_rate": 3.1838000000000003e-06, "loss": 1.1205, "step": 34100 }, { "epoch": 3.411, "grad_norm": 18.908933639526367, "learning_rate": 3.1818000000000006e-06, "loss": 1.3497, "step": 34110 }, { "epoch": 3.412, "grad_norm": 11.109058380126953, "learning_rate": 3.1798e-06, "loss": 1.011, "step": 34120 }, { "epoch": 3.413, "grad_norm": 11.877083778381348, "learning_rate": 3.1778000000000003e-06, "loss": 1.1504, "step": 34130 }, { "epoch": 3.414, "grad_norm": 12.849214553833008, "learning_rate": 3.1758e-06, "loss": 1.1642, "step": 34140 }, { "epoch": 3.415, "grad_norm": 20.05178451538086, "learning_rate": 3.1738e-06, "loss": 1.4801, "step": 34150 }, { "epoch": 3.416, "grad_norm": 8.191794395446777, "learning_rate": 3.1718000000000003e-06, "loss": 1.217, "step": 34160 }, { "epoch": 3.417, "grad_norm": 18.743289947509766, "learning_rate": 3.1698e-06, "loss": 1.2655, "step": 34170 }, { "epoch": 3.418, "grad_norm": 10.480864524841309, "learning_rate": 3.1678000000000004e-06, "loss": 1.049, "step": 34180 }, { "epoch": 3.419, "grad_norm": 5.857020854949951, "learning_rate": 3.1658e-06, "loss": 1.2576, "step": 34190 }, { "epoch": 3.42, "grad_norm": 9.69017219543457, "learning_rate": 3.1638e-06, "loss": 1.1391, "step": 34200 }, { "epoch": 3.421, "grad_norm": 20.963146209716797, "learning_rate": 3.1618000000000004e-06, "loss": 1.3186, "step": 34210 }, { "epoch": 3.422, "grad_norm": 14.046696662902832, "learning_rate": 3.1598000000000002e-06, "loss": 0.846, "step": 34220 }, { "epoch": 3.423, "grad_norm": 13.370206832885742, "learning_rate": 3.1578000000000005e-06, "loss": 1.567, "step": 34230 }, { "epoch": 3.424, "grad_norm": 14.044351577758789, "learning_rate": 3.1558000000000003e-06, "loss": 1.232, "step": 34240 }, { "epoch": 3.425, "grad_norm": 14.906122207641602, "learning_rate": 3.1538e-06, "loss": 1.509, "step": 34250 }, { "epoch": 3.426, "grad_norm": 13.107522964477539, "learning_rate": 3.1518000000000005e-06, "loss": 1.166, "step": 34260 }, { "epoch": 3.427, "grad_norm": 9.866403579711914, "learning_rate": 3.1498e-06, "loss": 0.8877, "step": 34270 }, { "epoch": 3.428, "grad_norm": 10.417130470275879, "learning_rate": 3.1478e-06, "loss": 1.459, "step": 34280 }, { "epoch": 3.429, "grad_norm": 10.57152271270752, "learning_rate": 3.1458000000000004e-06, "loss": 1.4674, "step": 34290 }, { "epoch": 3.43, "grad_norm": 13.937708854675293, "learning_rate": 3.1438000000000003e-06, "loss": 1.2823, "step": 34300 }, { "epoch": 3.431, "grad_norm": 10.89199161529541, "learning_rate": 3.1418000000000006e-06, "loss": 1.4248, "step": 34310 }, { "epoch": 3.432, "grad_norm": 10.097068786621094, "learning_rate": 3.1398e-06, "loss": 0.9265, "step": 34320 }, { "epoch": 3.433, "grad_norm": 14.117484092712402, "learning_rate": 3.1378000000000003e-06, "loss": 1.2412, "step": 34330 }, { "epoch": 3.434, "grad_norm": 7.420694351196289, "learning_rate": 3.1358000000000005e-06, "loss": 0.9103, "step": 34340 }, { "epoch": 3.435, "grad_norm": 11.679058074951172, "learning_rate": 3.1338e-06, "loss": 1.0792, "step": 34350 }, { "epoch": 3.436, "grad_norm": 12.93230152130127, "learning_rate": 3.1318000000000002e-06, "loss": 1.1223, "step": 34360 }, { "epoch": 3.437, "grad_norm": 14.877182960510254, "learning_rate": 3.1298e-06, "loss": 1.0565, "step": 34370 }, { "epoch": 3.438, "grad_norm": 11.642122268676758, "learning_rate": 3.1278000000000004e-06, "loss": 1.1499, "step": 34380 }, { "epoch": 3.439, "grad_norm": 29.165790557861328, "learning_rate": 3.1258000000000006e-06, "loss": 1.1739, "step": 34390 }, { "epoch": 3.44, "grad_norm": 16.550203323364258, "learning_rate": 3.1238e-06, "loss": 1.4083, "step": 34400 }, { "epoch": 3.441, "grad_norm": 94.38886260986328, "learning_rate": 3.1218000000000003e-06, "loss": 1.1679, "step": 34410 }, { "epoch": 3.442, "grad_norm": 8.254895210266113, "learning_rate": 3.1198e-06, "loss": 1.3657, "step": 34420 }, { "epoch": 3.443, "grad_norm": 14.071529388427734, "learning_rate": 3.1178000000000005e-06, "loss": 1.2738, "step": 34430 }, { "epoch": 3.444, "grad_norm": 7.064838886260986, "learning_rate": 3.1158000000000003e-06, "loss": 1.342, "step": 34440 }, { "epoch": 3.445, "grad_norm": 11.968881607055664, "learning_rate": 3.1138e-06, "loss": 1.2235, "step": 34450 }, { "epoch": 3.446, "grad_norm": 6.783051490783691, "learning_rate": 3.1118000000000004e-06, "loss": 1.1615, "step": 34460 }, { "epoch": 3.447, "grad_norm": 41.84458541870117, "learning_rate": 3.1098e-06, "loss": 1.2197, "step": 34470 }, { "epoch": 3.448, "grad_norm": 13.810715675354004, "learning_rate": 3.1078e-06, "loss": 1.3945, "step": 34480 }, { "epoch": 3.449, "grad_norm": 20.119709014892578, "learning_rate": 3.1058000000000004e-06, "loss": 0.936, "step": 34490 }, { "epoch": 3.45, "grad_norm": 10.514229774475098, "learning_rate": 3.1038000000000003e-06, "loss": 1.5519, "step": 34500 }, { "epoch": 3.451, "grad_norm": 10.80628490447998, "learning_rate": 3.1018000000000005e-06, "loss": 1.4053, "step": 34510 }, { "epoch": 3.452, "grad_norm": 10.871781349182129, "learning_rate": 3.0998e-06, "loss": 1.4818, "step": 34520 }, { "epoch": 3.453, "grad_norm": 9.422157287597656, "learning_rate": 3.0978000000000002e-06, "loss": 1.3059, "step": 34530 }, { "epoch": 3.454, "grad_norm": 10.570176124572754, "learning_rate": 3.0958000000000005e-06, "loss": 1.3697, "step": 34540 }, { "epoch": 3.455, "grad_norm": 8.905158042907715, "learning_rate": 3.0938000000000003e-06, "loss": 1.2643, "step": 34550 }, { "epoch": 3.456, "grad_norm": 9.268630981445312, "learning_rate": 3.0918e-06, "loss": 1.4701, "step": 34560 }, { "epoch": 3.457, "grad_norm": 9.121696472167969, "learning_rate": 3.0898e-06, "loss": 1.0755, "step": 34570 }, { "epoch": 3.458, "grad_norm": 10.415026664733887, "learning_rate": 3.0878000000000003e-06, "loss": 1.3057, "step": 34580 }, { "epoch": 3.459, "grad_norm": 11.028660774230957, "learning_rate": 3.0858000000000006e-06, "loss": 1.369, "step": 34590 }, { "epoch": 3.46, "grad_norm": 20.174774169921875, "learning_rate": 3.0838e-06, "loss": 1.3251, "step": 34600 }, { "epoch": 3.461, "grad_norm": 8.983832359313965, "learning_rate": 3.0818000000000003e-06, "loss": 1.1352, "step": 34610 }, { "epoch": 3.462, "grad_norm": 13.275557518005371, "learning_rate": 3.0798e-06, "loss": 1.264, "step": 34620 }, { "epoch": 3.463, "grad_norm": 12.011662483215332, "learning_rate": 3.0778000000000004e-06, "loss": 1.0753, "step": 34630 }, { "epoch": 3.464, "grad_norm": 11.438468933105469, "learning_rate": 3.0758000000000003e-06, "loss": 1.2774, "step": 34640 }, { "epoch": 3.465, "grad_norm": 6.723337173461914, "learning_rate": 3.0738e-06, "loss": 0.9397, "step": 34650 }, { "epoch": 3.466, "grad_norm": 12.843230247497559, "learning_rate": 3.0718000000000004e-06, "loss": 1.1794, "step": 34660 }, { "epoch": 3.467, "grad_norm": 15.513681411743164, "learning_rate": 3.0698e-06, "loss": 1.0516, "step": 34670 }, { "epoch": 3.468, "grad_norm": 8.693320274353027, "learning_rate": 3.0678e-06, "loss": 1.253, "step": 34680 }, { "epoch": 3.469, "grad_norm": 11.100879669189453, "learning_rate": 3.0658000000000004e-06, "loss": 1.2344, "step": 34690 }, { "epoch": 3.4699999999999998, "grad_norm": 17.663164138793945, "learning_rate": 3.0638e-06, "loss": 1.0974, "step": 34700 }, { "epoch": 3.471, "grad_norm": 11.607297897338867, "learning_rate": 3.0618000000000005e-06, "loss": 1.1865, "step": 34710 }, { "epoch": 3.472, "grad_norm": 7.533469200134277, "learning_rate": 3.0598e-06, "loss": 1.2858, "step": 34720 }, { "epoch": 3.473, "grad_norm": 15.175600051879883, "learning_rate": 3.0578e-06, "loss": 0.918, "step": 34730 }, { "epoch": 3.474, "grad_norm": 14.2586030960083, "learning_rate": 3.0558000000000005e-06, "loss": 1.2796, "step": 34740 }, { "epoch": 3.475, "grad_norm": 22.066314697265625, "learning_rate": 3.0538000000000003e-06, "loss": 1.4447, "step": 34750 }, { "epoch": 3.476, "grad_norm": 9.574776649475098, "learning_rate": 3.0518e-06, "loss": 1.2219, "step": 34760 }, { "epoch": 3.477, "grad_norm": 11.769184112548828, "learning_rate": 3.0498e-06, "loss": 1.0649, "step": 34770 }, { "epoch": 3.4779999999999998, "grad_norm": 16.585189819335938, "learning_rate": 3.0478000000000003e-06, "loss": 0.9256, "step": 34780 }, { "epoch": 3.479, "grad_norm": 9.61883544921875, "learning_rate": 3.0458000000000006e-06, "loss": 1.1651, "step": 34790 }, { "epoch": 3.48, "grad_norm": 16.011905670166016, "learning_rate": 3.0438e-06, "loss": 1.076, "step": 34800 }, { "epoch": 3.481, "grad_norm": 14.46161937713623, "learning_rate": 3.0418000000000003e-06, "loss": 1.0956, "step": 34810 }, { "epoch": 3.482, "grad_norm": 10.926321983337402, "learning_rate": 3.0398e-06, "loss": 1.4018, "step": 34820 }, { "epoch": 3.483, "grad_norm": 10.246420860290527, "learning_rate": 3.0378000000000004e-06, "loss": 1.3949, "step": 34830 }, { "epoch": 3.484, "grad_norm": 12.439456939697266, "learning_rate": 3.0358000000000002e-06, "loss": 1.3904, "step": 34840 }, { "epoch": 3.485, "grad_norm": 13.932425498962402, "learning_rate": 3.0338e-06, "loss": 1.3499, "step": 34850 }, { "epoch": 3.4859999999999998, "grad_norm": 29.808679580688477, "learning_rate": 3.0318000000000003e-06, "loss": 1.4419, "step": 34860 }, { "epoch": 3.487, "grad_norm": 8.483957290649414, "learning_rate": 3.0298000000000006e-06, "loss": 1.3342, "step": 34870 }, { "epoch": 3.488, "grad_norm": 25.728961944580078, "learning_rate": 3.0278e-06, "loss": 1.5919, "step": 34880 }, { "epoch": 3.489, "grad_norm": 1.7447080612182617, "learning_rate": 3.0258000000000003e-06, "loss": 0.9932, "step": 34890 }, { "epoch": 3.49, "grad_norm": 15.595843315124512, "learning_rate": 3.0238e-06, "loss": 1.1095, "step": 34900 }, { "epoch": 3.491, "grad_norm": 20.301332473754883, "learning_rate": 3.0218000000000004e-06, "loss": 1.1366, "step": 34910 }, { "epoch": 3.492, "grad_norm": 10.591264724731445, "learning_rate": 3.0198000000000007e-06, "loss": 1.2141, "step": 34920 }, { "epoch": 3.493, "grad_norm": 16.570968627929688, "learning_rate": 3.0178e-06, "loss": 1.3903, "step": 34930 }, { "epoch": 3.4939999999999998, "grad_norm": 11.962677001953125, "learning_rate": 3.0158000000000004e-06, "loss": 1.3349, "step": 34940 }, { "epoch": 3.495, "grad_norm": 9.16555404663086, "learning_rate": 3.0138000000000003e-06, "loss": 1.2034, "step": 34950 }, { "epoch": 3.496, "grad_norm": 12.446146965026855, "learning_rate": 3.0118e-06, "loss": 1.2602, "step": 34960 }, { "epoch": 3.497, "grad_norm": 13.839415550231934, "learning_rate": 3.0098000000000004e-06, "loss": 1.324, "step": 34970 }, { "epoch": 3.498, "grad_norm": 9.544456481933594, "learning_rate": 3.0078000000000002e-06, "loss": 1.1419, "step": 34980 }, { "epoch": 3.499, "grad_norm": 8.468356132507324, "learning_rate": 3.0058000000000005e-06, "loss": 1.1111, "step": 34990 }, { "epoch": 3.5, "grad_norm": 11.848767280578613, "learning_rate": 3.0038e-06, "loss": 1.4124, "step": 35000 }, { "epoch": 3.501, "grad_norm": 19.835660934448242, "learning_rate": 3.0018e-06, "loss": 1.1178, "step": 35010 }, { "epoch": 3.502, "grad_norm": 8.538936614990234, "learning_rate": 2.9998000000000005e-06, "loss": 1.1947, "step": 35020 }, { "epoch": 3.503, "grad_norm": 12.672850608825684, "learning_rate": 2.9978000000000003e-06, "loss": 1.3617, "step": 35030 }, { "epoch": 3.504, "grad_norm": 11.742630958557129, "learning_rate": 2.9958e-06, "loss": 0.9827, "step": 35040 }, { "epoch": 3.505, "grad_norm": 8.299677848815918, "learning_rate": 2.9938e-06, "loss": 1.2273, "step": 35050 }, { "epoch": 3.5060000000000002, "grad_norm": 11.297226905822754, "learning_rate": 2.9918000000000003e-06, "loss": 1.0741, "step": 35060 }, { "epoch": 3.507, "grad_norm": 17.20513916015625, "learning_rate": 2.9898000000000006e-06, "loss": 1.3963, "step": 35070 }, { "epoch": 3.508, "grad_norm": 13.799625396728516, "learning_rate": 2.9878e-06, "loss": 1.5551, "step": 35080 }, { "epoch": 3.509, "grad_norm": 16.844680786132812, "learning_rate": 2.9858000000000003e-06, "loss": 1.1859, "step": 35090 }, { "epoch": 3.51, "grad_norm": 9.619006156921387, "learning_rate": 2.9838e-06, "loss": 1.2588, "step": 35100 }, { "epoch": 3.511, "grad_norm": 15.689319610595703, "learning_rate": 2.9818000000000004e-06, "loss": 1.0763, "step": 35110 }, { "epoch": 3.512, "grad_norm": 8.318473815917969, "learning_rate": 2.9798000000000007e-06, "loss": 1.4848, "step": 35120 }, { "epoch": 3.513, "grad_norm": 12.929315567016602, "learning_rate": 2.9778e-06, "loss": 0.9772, "step": 35130 }, { "epoch": 3.5140000000000002, "grad_norm": 13.920153617858887, "learning_rate": 2.9758000000000004e-06, "loss": 1.3043, "step": 35140 }, { "epoch": 3.515, "grad_norm": 7.5626540184021, "learning_rate": 2.9738000000000002e-06, "loss": 1.0893, "step": 35150 }, { "epoch": 3.516, "grad_norm": 12.712926864624023, "learning_rate": 2.9718e-06, "loss": 1.2683, "step": 35160 }, { "epoch": 3.517, "grad_norm": 8.401205062866211, "learning_rate": 2.9698000000000003e-06, "loss": 1.0076, "step": 35170 }, { "epoch": 3.518, "grad_norm": 10.700116157531738, "learning_rate": 2.9678e-06, "loss": 1.1142, "step": 35180 }, { "epoch": 3.519, "grad_norm": 11.96967887878418, "learning_rate": 2.9658000000000005e-06, "loss": 1.049, "step": 35190 }, { "epoch": 3.52, "grad_norm": 10.4402437210083, "learning_rate": 2.9638e-06, "loss": 1.2213, "step": 35200 }, { "epoch": 3.521, "grad_norm": 10.360675811767578, "learning_rate": 2.9618e-06, "loss": 1.5244, "step": 35210 }, { "epoch": 3.5220000000000002, "grad_norm": 12.723966598510742, "learning_rate": 2.9598000000000004e-06, "loss": 1.0189, "step": 35220 }, { "epoch": 3.523, "grad_norm": 7.882230281829834, "learning_rate": 2.9578000000000003e-06, "loss": 1.5046, "step": 35230 }, { "epoch": 3.524, "grad_norm": 12.393601417541504, "learning_rate": 2.9558e-06, "loss": 1.2952, "step": 35240 }, { "epoch": 3.525, "grad_norm": 6.784119606018066, "learning_rate": 2.954e-06, "loss": 1.1812, "step": 35250 }, { "epoch": 3.526, "grad_norm": 11.994717597961426, "learning_rate": 2.9520000000000003e-06, "loss": 1.208, "step": 35260 }, { "epoch": 3.527, "grad_norm": 12.410710334777832, "learning_rate": 2.95e-06, "loss": 1.1474, "step": 35270 }, { "epoch": 3.528, "grad_norm": 7.638210296630859, "learning_rate": 2.9480000000000004e-06, "loss": 1.1552, "step": 35280 }, { "epoch": 3.529, "grad_norm": 7.182655334472656, "learning_rate": 2.946e-06, "loss": 0.9325, "step": 35290 }, { "epoch": 3.5300000000000002, "grad_norm": 16.988740921020508, "learning_rate": 2.944e-06, "loss": 1.3915, "step": 35300 }, { "epoch": 3.531, "grad_norm": 20.228641510009766, "learning_rate": 2.9420000000000004e-06, "loss": 1.093, "step": 35310 }, { "epoch": 3.532, "grad_norm": 9.96055793762207, "learning_rate": 2.9400000000000002e-06, "loss": 1.2415, "step": 35320 }, { "epoch": 3.533, "grad_norm": 12.77842903137207, "learning_rate": 2.9380000000000005e-06, "loss": 1.4007, "step": 35330 }, { "epoch": 3.534, "grad_norm": 11.983407020568848, "learning_rate": 2.9360000000000003e-06, "loss": 1.3673, "step": 35340 }, { "epoch": 3.535, "grad_norm": 9.29656982421875, "learning_rate": 2.934e-06, "loss": 1.0645, "step": 35350 }, { "epoch": 3.536, "grad_norm": 7.043116569519043, "learning_rate": 2.9320000000000005e-06, "loss": 1.3412, "step": 35360 }, { "epoch": 3.537, "grad_norm": 9.555559158325195, "learning_rate": 2.93e-06, "loss": 0.954, "step": 35370 }, { "epoch": 3.5380000000000003, "grad_norm": 10.05732250213623, "learning_rate": 2.928e-06, "loss": 1.0945, "step": 35380 }, { "epoch": 3.539, "grad_norm": 16.445629119873047, "learning_rate": 2.9260000000000004e-06, "loss": 1.1894, "step": 35390 }, { "epoch": 3.54, "grad_norm": 9.908014297485352, "learning_rate": 2.9240000000000003e-06, "loss": 1.2683, "step": 35400 }, { "epoch": 3.541, "grad_norm": 4.7955780029296875, "learning_rate": 2.9220000000000006e-06, "loss": 1.0892, "step": 35410 }, { "epoch": 3.542, "grad_norm": 10.743565559387207, "learning_rate": 2.92e-06, "loss": 1.4223, "step": 35420 }, { "epoch": 3.543, "grad_norm": 7.127283573150635, "learning_rate": 2.9180000000000003e-06, "loss": 1.3005, "step": 35430 }, { "epoch": 3.544, "grad_norm": 16.754690170288086, "learning_rate": 2.9160000000000005e-06, "loss": 1.1942, "step": 35440 }, { "epoch": 3.545, "grad_norm": 14.4373140335083, "learning_rate": 2.914e-06, "loss": 1.1751, "step": 35450 }, { "epoch": 3.5460000000000003, "grad_norm": 9.68008041381836, "learning_rate": 2.9120000000000002e-06, "loss": 1.1119, "step": 35460 }, { "epoch": 3.547, "grad_norm": 9.140665054321289, "learning_rate": 2.91e-06, "loss": 1.0802, "step": 35470 }, { "epoch": 3.548, "grad_norm": 14.038318634033203, "learning_rate": 2.9080000000000004e-06, "loss": 1.0655, "step": 35480 }, { "epoch": 3.549, "grad_norm": 9.689499855041504, "learning_rate": 2.9060000000000006e-06, "loss": 1.5814, "step": 35490 }, { "epoch": 3.55, "grad_norm": 15.209005355834961, "learning_rate": 2.904e-06, "loss": 1.2907, "step": 35500 }, { "epoch": 3.551, "grad_norm": 7.268345832824707, "learning_rate": 2.9020000000000003e-06, "loss": 1.1682, "step": 35510 }, { "epoch": 3.552, "grad_norm": 8.618778228759766, "learning_rate": 2.9e-06, "loss": 0.9265, "step": 35520 }, { "epoch": 3.553, "grad_norm": 6.0367231369018555, "learning_rate": 2.8980000000000005e-06, "loss": 1.0123, "step": 35530 }, { "epoch": 3.5540000000000003, "grad_norm": 17.42850112915039, "learning_rate": 2.8960000000000003e-06, "loss": 1.0663, "step": 35540 }, { "epoch": 3.555, "grad_norm": 13.996506690979004, "learning_rate": 2.894e-06, "loss": 1.6472, "step": 35550 }, { "epoch": 3.556, "grad_norm": 5.860506534576416, "learning_rate": 2.8920000000000004e-06, "loss": 1.3576, "step": 35560 }, { "epoch": 3.557, "grad_norm": 13.581502914428711, "learning_rate": 2.89e-06, "loss": 1.4309, "step": 35570 }, { "epoch": 3.558, "grad_norm": 14.517158508300781, "learning_rate": 2.888e-06, "loss": 1.3796, "step": 35580 }, { "epoch": 3.559, "grad_norm": 14.613443374633789, "learning_rate": 2.8860000000000004e-06, "loss": 1.2851, "step": 35590 }, { "epoch": 3.56, "grad_norm": 11.06806468963623, "learning_rate": 2.8840000000000003e-06, "loss": 1.267, "step": 35600 }, { "epoch": 3.561, "grad_norm": 12.168789863586426, "learning_rate": 2.8820000000000005e-06, "loss": 1.204, "step": 35610 }, { "epoch": 3.5620000000000003, "grad_norm": 6.610494136810303, "learning_rate": 2.88e-06, "loss": 1.308, "step": 35620 }, { "epoch": 3.5629999999999997, "grad_norm": 8.08450698852539, "learning_rate": 2.8780000000000002e-06, "loss": 1.2764, "step": 35630 }, { "epoch": 3.564, "grad_norm": 14.113688468933105, "learning_rate": 2.8760000000000005e-06, "loss": 1.1534, "step": 35640 }, { "epoch": 3.565, "grad_norm": 21.434650421142578, "learning_rate": 2.874e-06, "loss": 1.134, "step": 35650 }, { "epoch": 3.566, "grad_norm": 9.465171813964844, "learning_rate": 2.872e-06, "loss": 1.1471, "step": 35660 }, { "epoch": 3.567, "grad_norm": 12.621737480163574, "learning_rate": 2.87e-06, "loss": 1.3958, "step": 35670 }, { "epoch": 3.568, "grad_norm": 15.05048656463623, "learning_rate": 2.8680000000000003e-06, "loss": 1.3684, "step": 35680 }, { "epoch": 3.569, "grad_norm": 11.365574836730957, "learning_rate": 2.8660000000000006e-06, "loss": 1.2435, "step": 35690 }, { "epoch": 3.57, "grad_norm": 7.216854095458984, "learning_rate": 2.864e-06, "loss": 1.2825, "step": 35700 }, { "epoch": 3.5709999999999997, "grad_norm": 10.923060417175293, "learning_rate": 2.8620000000000003e-06, "loss": 1.1284, "step": 35710 }, { "epoch": 3.572, "grad_norm": 7.507437229156494, "learning_rate": 2.86e-06, "loss": 1.2998, "step": 35720 }, { "epoch": 3.573, "grad_norm": 10.394550323486328, "learning_rate": 2.8580000000000004e-06, "loss": 1.1587, "step": 35730 }, { "epoch": 3.574, "grad_norm": 12.85277271270752, "learning_rate": 2.8560000000000003e-06, "loss": 1.3359, "step": 35740 }, { "epoch": 3.575, "grad_norm": 14.029452323913574, "learning_rate": 2.854e-06, "loss": 1.335, "step": 35750 }, { "epoch": 3.576, "grad_norm": 11.347783088684082, "learning_rate": 2.8520000000000004e-06, "loss": 0.9668, "step": 35760 }, { "epoch": 3.577, "grad_norm": 8.690044403076172, "learning_rate": 2.85e-06, "loss": 1.0456, "step": 35770 }, { "epoch": 3.578, "grad_norm": 8.568771362304688, "learning_rate": 2.848e-06, "loss": 1.246, "step": 35780 }, { "epoch": 3.5789999999999997, "grad_norm": 8.575702667236328, "learning_rate": 2.8460000000000004e-06, "loss": 0.8592, "step": 35790 }, { "epoch": 3.58, "grad_norm": 13.4548978805542, "learning_rate": 2.8440000000000002e-06, "loss": 1.1766, "step": 35800 }, { "epoch": 3.581, "grad_norm": 19.149660110473633, "learning_rate": 2.8420000000000005e-06, "loss": 1.4006, "step": 35810 }, { "epoch": 3.582, "grad_norm": 14.697803497314453, "learning_rate": 2.84e-06, "loss": 1.3519, "step": 35820 }, { "epoch": 3.583, "grad_norm": 14.334990501403809, "learning_rate": 2.838e-06, "loss": 1.1612, "step": 35830 }, { "epoch": 3.584, "grad_norm": 9.595459938049316, "learning_rate": 2.8360000000000005e-06, "loss": 1.6002, "step": 35840 }, { "epoch": 3.585, "grad_norm": 17.179277420043945, "learning_rate": 2.834e-06, "loss": 1.0407, "step": 35850 }, { "epoch": 3.586, "grad_norm": 6.249801158905029, "learning_rate": 2.832e-06, "loss": 1.0096, "step": 35860 }, { "epoch": 3.5869999999999997, "grad_norm": 16.804039001464844, "learning_rate": 2.83e-06, "loss": 0.6875, "step": 35870 }, { "epoch": 3.588, "grad_norm": 18.084596633911133, "learning_rate": 2.8280000000000003e-06, "loss": 1.266, "step": 35880 }, { "epoch": 3.589, "grad_norm": 13.36996078491211, "learning_rate": 2.8260000000000006e-06, "loss": 1.1478, "step": 35890 }, { "epoch": 3.59, "grad_norm": 17.47124481201172, "learning_rate": 2.824e-06, "loss": 1.337, "step": 35900 }, { "epoch": 3.591, "grad_norm": 6.509373664855957, "learning_rate": 2.8220000000000003e-06, "loss": 1.1851, "step": 35910 }, { "epoch": 3.592, "grad_norm": 16.13741111755371, "learning_rate": 2.82e-06, "loss": 1.2728, "step": 35920 }, { "epoch": 3.593, "grad_norm": 17.523176193237305, "learning_rate": 2.8180000000000004e-06, "loss": 1.1389, "step": 35930 }, { "epoch": 3.594, "grad_norm": 9.977924346923828, "learning_rate": 2.8160000000000002e-06, "loss": 0.9627, "step": 35940 }, { "epoch": 3.5949999999999998, "grad_norm": 10.849647521972656, "learning_rate": 2.814e-06, "loss": 1.0174, "step": 35950 }, { "epoch": 3.596, "grad_norm": 9.244802474975586, "learning_rate": 2.8120000000000004e-06, "loss": 0.979, "step": 35960 }, { "epoch": 3.597, "grad_norm": 6.819057941436768, "learning_rate": 2.8100000000000006e-06, "loss": 0.9126, "step": 35970 }, { "epoch": 3.598, "grad_norm": 5.826292514801025, "learning_rate": 2.808e-06, "loss": 1.3841, "step": 35980 }, { "epoch": 3.599, "grad_norm": 7.9612250328063965, "learning_rate": 2.8060000000000003e-06, "loss": 1.1496, "step": 35990 }, { "epoch": 3.6, "grad_norm": 9.257613182067871, "learning_rate": 2.804e-06, "loss": 1.1545, "step": 36000 }, { "epoch": 3.601, "grad_norm": 12.834121704101562, "learning_rate": 2.8020000000000004e-06, "loss": 1.2654, "step": 36010 }, { "epoch": 3.602, "grad_norm": 14.461570739746094, "learning_rate": 2.8000000000000003e-06, "loss": 0.9972, "step": 36020 }, { "epoch": 3.6029999999999998, "grad_norm": 18.78476333618164, "learning_rate": 2.798e-06, "loss": 1.4824, "step": 36030 }, { "epoch": 3.604, "grad_norm": 17.954561233520508, "learning_rate": 2.7960000000000004e-06, "loss": 1.2936, "step": 36040 }, { "epoch": 3.605, "grad_norm": 6.002723693847656, "learning_rate": 2.794e-06, "loss": 1.1285, "step": 36050 }, { "epoch": 3.606, "grad_norm": 8.60559368133545, "learning_rate": 2.792e-06, "loss": 1.1198, "step": 36060 }, { "epoch": 3.607, "grad_norm": 9.828865051269531, "learning_rate": 2.7900000000000004e-06, "loss": 1.2593, "step": 36070 }, { "epoch": 3.608, "grad_norm": 22.926645278930664, "learning_rate": 2.7880000000000002e-06, "loss": 1.0456, "step": 36080 }, { "epoch": 3.609, "grad_norm": 12.654271125793457, "learning_rate": 2.7860000000000005e-06, "loss": 1.1922, "step": 36090 }, { "epoch": 3.61, "grad_norm": 18.484392166137695, "learning_rate": 2.784e-06, "loss": 1.4182, "step": 36100 }, { "epoch": 3.6109999999999998, "grad_norm": 20.95228385925293, "learning_rate": 2.7820000000000002e-06, "loss": 1.2811, "step": 36110 }, { "epoch": 3.612, "grad_norm": 11.550802230834961, "learning_rate": 2.7800000000000005e-06, "loss": 1.2786, "step": 36120 }, { "epoch": 3.613, "grad_norm": 14.506121635437012, "learning_rate": 2.7780000000000003e-06, "loss": 1.1509, "step": 36130 }, { "epoch": 3.614, "grad_norm": 12.350043296813965, "learning_rate": 2.776e-06, "loss": 1.2058, "step": 36140 }, { "epoch": 3.615, "grad_norm": 6.468221664428711, "learning_rate": 2.774e-06, "loss": 1.0568, "step": 36150 }, { "epoch": 3.616, "grad_norm": 12.824684143066406, "learning_rate": 2.7720000000000003e-06, "loss": 1.0714, "step": 36160 }, { "epoch": 3.617, "grad_norm": 20.16745376586914, "learning_rate": 2.7700000000000006e-06, "loss": 1.4449, "step": 36170 }, { "epoch": 3.618, "grad_norm": 19.492359161376953, "learning_rate": 2.768e-06, "loss": 1.2426, "step": 36180 }, { "epoch": 3.6189999999999998, "grad_norm": 2.524810552597046, "learning_rate": 2.7660000000000003e-06, "loss": 0.9952, "step": 36190 }, { "epoch": 3.62, "grad_norm": 10.060896873474121, "learning_rate": 2.764e-06, "loss": 1.0088, "step": 36200 }, { "epoch": 3.621, "grad_norm": 14.020805358886719, "learning_rate": 2.7620000000000004e-06, "loss": 1.1903, "step": 36210 }, { "epoch": 3.622, "grad_norm": 17.525585174560547, "learning_rate": 2.7600000000000003e-06, "loss": 1.1517, "step": 36220 }, { "epoch": 3.623, "grad_norm": 21.797607421875, "learning_rate": 2.758e-06, "loss": 0.9501, "step": 36230 }, { "epoch": 3.624, "grad_norm": 25.789915084838867, "learning_rate": 2.7560000000000004e-06, "loss": 1.6765, "step": 36240 }, { "epoch": 3.625, "grad_norm": 12.799304962158203, "learning_rate": 2.754e-06, "loss": 1.2307, "step": 36250 }, { "epoch": 3.626, "grad_norm": 6.159667491912842, "learning_rate": 2.752e-06, "loss": 0.7611, "step": 36260 }, { "epoch": 3.627, "grad_norm": 4.418728828430176, "learning_rate": 2.7500000000000004e-06, "loss": 1.0875, "step": 36270 }, { "epoch": 3.628, "grad_norm": 16.129789352416992, "learning_rate": 2.748e-06, "loss": 1.4589, "step": 36280 }, { "epoch": 3.629, "grad_norm": 20.727731704711914, "learning_rate": 2.7460000000000005e-06, "loss": 0.9646, "step": 36290 }, { "epoch": 3.63, "grad_norm": 26.39175033569336, "learning_rate": 2.744e-06, "loss": 1.1634, "step": 36300 }, { "epoch": 3.6310000000000002, "grad_norm": 10.212064743041992, "learning_rate": 2.742e-06, "loss": 1.1934, "step": 36310 }, { "epoch": 3.632, "grad_norm": 10.348026275634766, "learning_rate": 2.7400000000000004e-06, "loss": 1.5886, "step": 36320 }, { "epoch": 3.633, "grad_norm": 17.07811164855957, "learning_rate": 2.7380000000000003e-06, "loss": 1.45, "step": 36330 }, { "epoch": 3.634, "grad_norm": 1.956390142440796, "learning_rate": 2.736e-06, "loss": 0.9826, "step": 36340 }, { "epoch": 3.635, "grad_norm": 8.518217086791992, "learning_rate": 2.734e-06, "loss": 1.129, "step": 36350 }, { "epoch": 3.636, "grad_norm": 21.391393661499023, "learning_rate": 2.7320000000000003e-06, "loss": 1.0946, "step": 36360 }, { "epoch": 3.637, "grad_norm": 18.108491897583008, "learning_rate": 2.7300000000000005e-06, "loss": 1.1492, "step": 36370 }, { "epoch": 3.638, "grad_norm": 18.267120361328125, "learning_rate": 2.728e-06, "loss": 1.3237, "step": 36380 }, { "epoch": 3.6390000000000002, "grad_norm": 20.8299617767334, "learning_rate": 2.7260000000000002e-06, "loss": 1.3876, "step": 36390 }, { "epoch": 3.64, "grad_norm": 8.044137001037598, "learning_rate": 2.724e-06, "loss": 1.2582, "step": 36400 }, { "epoch": 3.641, "grad_norm": 14.209968566894531, "learning_rate": 2.7220000000000004e-06, "loss": 1.5211, "step": 36410 }, { "epoch": 3.642, "grad_norm": 11.755558967590332, "learning_rate": 2.7200000000000002e-06, "loss": 1.3589, "step": 36420 }, { "epoch": 3.643, "grad_norm": 12.838407516479492, "learning_rate": 2.718e-06, "loss": 1.17, "step": 36430 }, { "epoch": 3.644, "grad_norm": 18.50345802307129, "learning_rate": 2.7160000000000003e-06, "loss": 1.5076, "step": 36440 }, { "epoch": 3.645, "grad_norm": 17.201900482177734, "learning_rate": 2.7139999999999998e-06, "loss": 1.0815, "step": 36450 }, { "epoch": 3.646, "grad_norm": 8.453629493713379, "learning_rate": 2.712e-06, "loss": 1.1051, "step": 36460 }, { "epoch": 3.6470000000000002, "grad_norm": 11.801692962646484, "learning_rate": 2.7100000000000003e-06, "loss": 1.463, "step": 36470 }, { "epoch": 3.648, "grad_norm": 8.9876127243042, "learning_rate": 2.708e-06, "loss": 1.1528, "step": 36480 }, { "epoch": 3.649, "grad_norm": 22.524517059326172, "learning_rate": 2.7060000000000004e-06, "loss": 1.3248, "step": 36490 }, { "epoch": 3.65, "grad_norm": 7.726346015930176, "learning_rate": 2.704e-06, "loss": 1.2373, "step": 36500 }, { "epoch": 3.651, "grad_norm": 5.736200332641602, "learning_rate": 2.702e-06, "loss": 1.0588, "step": 36510 }, { "epoch": 3.652, "grad_norm": 15.864219665527344, "learning_rate": 2.7000000000000004e-06, "loss": 1.456, "step": 36520 }, { "epoch": 3.653, "grad_norm": 12.415483474731445, "learning_rate": 2.6980000000000003e-06, "loss": 1.0343, "step": 36530 }, { "epoch": 3.654, "grad_norm": 17.885202407836914, "learning_rate": 2.696e-06, "loss": 1.1643, "step": 36540 }, { "epoch": 3.6550000000000002, "grad_norm": 18.117143630981445, "learning_rate": 2.694e-06, "loss": 1.1562, "step": 36550 }, { "epoch": 3.656, "grad_norm": 14.147415161132812, "learning_rate": 2.6920000000000002e-06, "loss": 1.3567, "step": 36560 }, { "epoch": 3.657, "grad_norm": 12.150044441223145, "learning_rate": 2.6900000000000005e-06, "loss": 1.0997, "step": 36570 }, { "epoch": 3.658, "grad_norm": 7.887204647064209, "learning_rate": 2.688e-06, "loss": 1.0242, "step": 36580 }, { "epoch": 3.659, "grad_norm": 16.363229751586914, "learning_rate": 2.686e-06, "loss": 1.1773, "step": 36590 }, { "epoch": 3.66, "grad_norm": 12.716050148010254, "learning_rate": 2.6840000000000005e-06, "loss": 1.308, "step": 36600 }, { "epoch": 3.661, "grad_norm": 9.179887771606445, "learning_rate": 2.6820000000000003e-06, "loss": 0.9988, "step": 36610 }, { "epoch": 3.662, "grad_norm": 11.666603088378906, "learning_rate": 2.68e-06, "loss": 1.2548, "step": 36620 }, { "epoch": 3.6630000000000003, "grad_norm": 13.475577354431152, "learning_rate": 2.678e-06, "loss": 1.5505, "step": 36630 }, { "epoch": 3.664, "grad_norm": 9.771602630615234, "learning_rate": 2.6760000000000003e-06, "loss": 1.3689, "step": 36640 }, { "epoch": 3.665, "grad_norm": 11.08886432647705, "learning_rate": 2.6740000000000006e-06, "loss": 1.06, "step": 36650 }, { "epoch": 3.666, "grad_norm": 12.574010848999023, "learning_rate": 2.672e-06, "loss": 1.209, "step": 36660 }, { "epoch": 3.667, "grad_norm": 5.424672603607178, "learning_rate": 2.6700000000000003e-06, "loss": 1.0502, "step": 36670 }, { "epoch": 3.668, "grad_norm": 16.4224910736084, "learning_rate": 2.668e-06, "loss": 1.3515, "step": 36680 }, { "epoch": 3.669, "grad_norm": 12.17082691192627, "learning_rate": 2.6660000000000004e-06, "loss": 1.4268, "step": 36690 }, { "epoch": 3.67, "grad_norm": 7.899961471557617, "learning_rate": 2.6640000000000007e-06, "loss": 1.0039, "step": 36700 }, { "epoch": 3.6710000000000003, "grad_norm": 12.674928665161133, "learning_rate": 2.662e-06, "loss": 1.2846, "step": 36710 }, { "epoch": 3.672, "grad_norm": 6.95627498626709, "learning_rate": 2.6600000000000004e-06, "loss": 1.148, "step": 36720 }, { "epoch": 3.673, "grad_norm": 11.182089805603027, "learning_rate": 2.6580000000000002e-06, "loss": 1.0662, "step": 36730 }, { "epoch": 3.674, "grad_norm": 1.2621252536773682, "learning_rate": 2.656e-06, "loss": 1.03, "step": 36740 }, { "epoch": 3.675, "grad_norm": 13.605354309082031, "learning_rate": 2.6540000000000003e-06, "loss": 1.0758, "step": 36750 }, { "epoch": 3.676, "grad_norm": 8.791426658630371, "learning_rate": 2.652e-06, "loss": 1.1042, "step": 36760 }, { "epoch": 3.677, "grad_norm": 19.263572692871094, "learning_rate": 2.6500000000000005e-06, "loss": 1.1953, "step": 36770 }, { "epoch": 3.678, "grad_norm": 23.476062774658203, "learning_rate": 2.648e-06, "loss": 1.2859, "step": 36780 }, { "epoch": 3.6790000000000003, "grad_norm": 11.831419944763184, "learning_rate": 2.646e-06, "loss": 1.2824, "step": 36790 }, { "epoch": 3.68, "grad_norm": 13.832085609436035, "learning_rate": 2.6440000000000004e-06, "loss": 1.5123, "step": 36800 }, { "epoch": 3.681, "grad_norm": 7.087924480438232, "learning_rate": 2.6420000000000003e-06, "loss": 1.4134, "step": 36810 }, { "epoch": 3.682, "grad_norm": 25.93120765686035, "learning_rate": 2.64e-06, "loss": 1.6509, "step": 36820 }, { "epoch": 3.683, "grad_norm": 11.15920639038086, "learning_rate": 2.638e-06, "loss": 1.579, "step": 36830 }, { "epoch": 3.684, "grad_norm": 8.34695816040039, "learning_rate": 2.6360000000000003e-06, "loss": 1.0334, "step": 36840 }, { "epoch": 3.685, "grad_norm": 10.159542083740234, "learning_rate": 2.6340000000000005e-06, "loss": 1.4075, "step": 36850 }, { "epoch": 3.686, "grad_norm": 22.964771270751953, "learning_rate": 2.632e-06, "loss": 1.319, "step": 36860 }, { "epoch": 3.6870000000000003, "grad_norm": 14.692290306091309, "learning_rate": 2.6300000000000002e-06, "loss": 1.0455, "step": 36870 }, { "epoch": 3.6879999999999997, "grad_norm": 13.554398536682129, "learning_rate": 2.628e-06, "loss": 1.6862, "step": 36880 }, { "epoch": 3.689, "grad_norm": 11.248958587646484, "learning_rate": 2.6260000000000004e-06, "loss": 1.2886, "step": 36890 }, { "epoch": 3.69, "grad_norm": 5.3590898513793945, "learning_rate": 2.6240000000000006e-06, "loss": 0.9043, "step": 36900 }, { "epoch": 3.691, "grad_norm": 14.180007934570312, "learning_rate": 2.622e-06, "loss": 1.0671, "step": 36910 }, { "epoch": 3.692, "grad_norm": 10.051776885986328, "learning_rate": 2.6200000000000003e-06, "loss": 1.2247, "step": 36920 }, { "epoch": 3.693, "grad_norm": 15.782591819763184, "learning_rate": 2.618e-06, "loss": 1.1562, "step": 36930 }, { "epoch": 3.694, "grad_norm": 17.544021606445312, "learning_rate": 2.616e-06, "loss": 1.5871, "step": 36940 }, { "epoch": 3.695, "grad_norm": 11.220580101013184, "learning_rate": 2.6140000000000003e-06, "loss": 1.3203, "step": 36950 }, { "epoch": 3.6959999999999997, "grad_norm": 8.220802307128906, "learning_rate": 2.612e-06, "loss": 0.9136, "step": 36960 }, { "epoch": 3.697, "grad_norm": 13.638473510742188, "learning_rate": 2.6100000000000004e-06, "loss": 1.1787, "step": 36970 }, { "epoch": 3.698, "grad_norm": 13.971834182739258, "learning_rate": 2.608e-06, "loss": 1.1751, "step": 36980 }, { "epoch": 3.699, "grad_norm": 9.516674041748047, "learning_rate": 2.606e-06, "loss": 1.0989, "step": 36990 }, { "epoch": 3.7, "grad_norm": 4.6494951248168945, "learning_rate": 2.6040000000000004e-06, "loss": 1.089, "step": 37000 }, { "epoch": 3.701, "grad_norm": 14.530884742736816, "learning_rate": 2.6020000000000002e-06, "loss": 1.1673, "step": 37010 }, { "epoch": 3.702, "grad_norm": 6.113986492156982, "learning_rate": 2.6e-06, "loss": 1.3669, "step": 37020 }, { "epoch": 3.703, "grad_norm": 6.492940902709961, "learning_rate": 2.598e-06, "loss": 1.4126, "step": 37030 }, { "epoch": 3.7039999999999997, "grad_norm": 10.635706901550293, "learning_rate": 2.5960000000000002e-06, "loss": 1.1158, "step": 37040 }, { "epoch": 3.705, "grad_norm": 6.2552313804626465, "learning_rate": 2.5940000000000005e-06, "loss": 0.8061, "step": 37050 }, { "epoch": 3.706, "grad_norm": 13.647176742553711, "learning_rate": 2.592e-06, "loss": 1.3224, "step": 37060 }, { "epoch": 3.707, "grad_norm": 15.083850860595703, "learning_rate": 2.59e-06, "loss": 1.1179, "step": 37070 }, { "epoch": 3.708, "grad_norm": 11.59984302520752, "learning_rate": 2.588e-06, "loss": 1.1568, "step": 37080 }, { "epoch": 3.709, "grad_norm": 9.519498825073242, "learning_rate": 2.5860000000000003e-06, "loss": 1.1348, "step": 37090 }, { "epoch": 3.71, "grad_norm": 14.054869651794434, "learning_rate": 2.5840000000000006e-06, "loss": 1.1043, "step": 37100 }, { "epoch": 3.711, "grad_norm": 11.87894058227539, "learning_rate": 2.582e-06, "loss": 1.4287, "step": 37110 }, { "epoch": 3.7119999999999997, "grad_norm": 11.50086498260498, "learning_rate": 2.5800000000000003e-06, "loss": 1.0157, "step": 37120 }, { "epoch": 3.713, "grad_norm": 11.393383026123047, "learning_rate": 2.578e-06, "loss": 1.1654, "step": 37130 }, { "epoch": 3.714, "grad_norm": 14.00998592376709, "learning_rate": 2.576e-06, "loss": 1.1639, "step": 37140 }, { "epoch": 3.715, "grad_norm": 5.437056541442871, "learning_rate": 2.5740000000000003e-06, "loss": 1.4599, "step": 37150 }, { "epoch": 3.716, "grad_norm": 19.12710952758789, "learning_rate": 2.572e-06, "loss": 1.5201, "step": 37160 }, { "epoch": 3.717, "grad_norm": 3.114049196243286, "learning_rate": 2.5700000000000004e-06, "loss": 1.2267, "step": 37170 }, { "epoch": 3.718, "grad_norm": 9.474925994873047, "learning_rate": 2.568e-06, "loss": 1.0521, "step": 37180 }, { "epoch": 3.719, "grad_norm": 8.14013385772705, "learning_rate": 2.566e-06, "loss": 0.9744, "step": 37190 }, { "epoch": 3.7199999999999998, "grad_norm": 6.267669677734375, "learning_rate": 2.5640000000000004e-06, "loss": 1.2222, "step": 37200 }, { "epoch": 3.721, "grad_norm": 6.764854907989502, "learning_rate": 2.562e-06, "loss": 1.0106, "step": 37210 }, { "epoch": 3.722, "grad_norm": 12.530685424804688, "learning_rate": 2.56e-06, "loss": 1.0711, "step": 37220 }, { "epoch": 3.723, "grad_norm": 22.11410903930664, "learning_rate": 2.5580000000000003e-06, "loss": 0.9691, "step": 37230 }, { "epoch": 3.724, "grad_norm": 25.86504364013672, "learning_rate": 2.556e-06, "loss": 1.251, "step": 37240 }, { "epoch": 3.725, "grad_norm": 18.099578857421875, "learning_rate": 2.5542e-06, "loss": 1.4867, "step": 37250 }, { "epoch": 3.726, "grad_norm": 15.700010299682617, "learning_rate": 2.5522000000000003e-06, "loss": 1.6044, "step": 37260 }, { "epoch": 3.727, "grad_norm": 14.464618682861328, "learning_rate": 2.5502000000000006e-06, "loss": 1.327, "step": 37270 }, { "epoch": 3.7279999999999998, "grad_norm": 15.875319480895996, "learning_rate": 2.5482e-06, "loss": 1.0415, "step": 37280 }, { "epoch": 3.729, "grad_norm": 16.791244506835938, "learning_rate": 2.5462000000000003e-06, "loss": 1.0948, "step": 37290 }, { "epoch": 3.73, "grad_norm": 6.164147853851318, "learning_rate": 2.5442e-06, "loss": 1.7704, "step": 37300 }, { "epoch": 3.731, "grad_norm": 6.356234550476074, "learning_rate": 2.5422000000000004e-06, "loss": 1.2755, "step": 37310 }, { "epoch": 3.732, "grad_norm": 9.98618221282959, "learning_rate": 2.5402000000000003e-06, "loss": 1.1305, "step": 37320 }, { "epoch": 3.733, "grad_norm": 7.874368190765381, "learning_rate": 2.5382e-06, "loss": 1.0434, "step": 37330 }, { "epoch": 3.734, "grad_norm": 4.246832370758057, "learning_rate": 2.5362000000000004e-06, "loss": 1.0562, "step": 37340 }, { "epoch": 3.735, "grad_norm": 7.599032402038574, "learning_rate": 2.5342e-06, "loss": 1.4693, "step": 37350 }, { "epoch": 3.7359999999999998, "grad_norm": 9.574150085449219, "learning_rate": 2.5322e-06, "loss": 1.1877, "step": 37360 }, { "epoch": 3.737, "grad_norm": 16.0639591217041, "learning_rate": 2.5302000000000004e-06, "loss": 1.0039, "step": 37370 }, { "epoch": 3.738, "grad_norm": 17.091434478759766, "learning_rate": 2.5282e-06, "loss": 1.5169, "step": 37380 }, { "epoch": 3.739, "grad_norm": 11.7332124710083, "learning_rate": 2.5262000000000005e-06, "loss": 1.1284, "step": 37390 }, { "epoch": 3.74, "grad_norm": 6.14620304107666, "learning_rate": 2.5242e-06, "loss": 1.0751, "step": 37400 }, { "epoch": 3.741, "grad_norm": 12.942353248596191, "learning_rate": 2.5222e-06, "loss": 1.4284, "step": 37410 }, { "epoch": 3.742, "grad_norm": 12.845550537109375, "learning_rate": 2.5202000000000005e-06, "loss": 1.1306, "step": 37420 }, { "epoch": 3.743, "grad_norm": 7.463331699371338, "learning_rate": 2.5182e-06, "loss": 1.3782, "step": 37430 }, { "epoch": 3.7439999999999998, "grad_norm": 17.15587043762207, "learning_rate": 2.5162e-06, "loss": 1.5442, "step": 37440 }, { "epoch": 3.745, "grad_norm": 10.905468940734863, "learning_rate": 2.5142e-06, "loss": 1.1357, "step": 37450 }, { "epoch": 3.746, "grad_norm": 11.443551063537598, "learning_rate": 2.5122000000000003e-06, "loss": 0.964, "step": 37460 }, { "epoch": 3.747, "grad_norm": 12.683226585388184, "learning_rate": 2.5102000000000005e-06, "loss": 1.0837, "step": 37470 }, { "epoch": 3.748, "grad_norm": 18.750415802001953, "learning_rate": 2.5082e-06, "loss": 1.3452, "step": 37480 }, { "epoch": 3.749, "grad_norm": 8.066972732543945, "learning_rate": 2.5062000000000002e-06, "loss": 1.1663, "step": 37490 }, { "epoch": 3.75, "grad_norm": 16.481922149658203, "learning_rate": 2.5042e-06, "loss": 1.2579, "step": 37500 }, { "epoch": 3.751, "grad_norm": 11.642102241516113, "learning_rate": 2.5022000000000004e-06, "loss": 1.014, "step": 37510 }, { "epoch": 3.752, "grad_norm": 10.27799129486084, "learning_rate": 2.5002000000000002e-06, "loss": 1.2442, "step": 37520 }, { "epoch": 3.753, "grad_norm": 3.742845296859741, "learning_rate": 2.4982e-06, "loss": 1.0168, "step": 37530 }, { "epoch": 3.754, "grad_norm": 13.51158332824707, "learning_rate": 2.4962000000000003e-06, "loss": 1.1422, "step": 37540 }, { "epoch": 3.755, "grad_norm": 16.03113555908203, "learning_rate": 2.4942e-06, "loss": 1.0991, "step": 37550 }, { "epoch": 3.7560000000000002, "grad_norm": 9.47546672821045, "learning_rate": 2.4922e-06, "loss": 1.0483, "step": 37560 }, { "epoch": 3.757, "grad_norm": 17.5817928314209, "learning_rate": 2.4902000000000003e-06, "loss": 1.0064, "step": 37570 }, { "epoch": 3.758, "grad_norm": 13.51327896118164, "learning_rate": 2.4882e-06, "loss": 1.5105, "step": 37580 }, { "epoch": 3.759, "grad_norm": 11.975251197814941, "learning_rate": 2.4862000000000004e-06, "loss": 1.0095, "step": 37590 }, { "epoch": 3.76, "grad_norm": 14.530927658081055, "learning_rate": 2.4842000000000003e-06, "loss": 1.156, "step": 37600 }, { "epoch": 3.761, "grad_norm": 24.944501876831055, "learning_rate": 2.4822e-06, "loss": 0.9513, "step": 37610 }, { "epoch": 3.762, "grad_norm": 5.367509365081787, "learning_rate": 2.4802e-06, "loss": 0.9004, "step": 37620 }, { "epoch": 3.763, "grad_norm": 5.479240417480469, "learning_rate": 2.4782000000000003e-06, "loss": 1.0432, "step": 37630 }, { "epoch": 3.7640000000000002, "grad_norm": 10.34628677368164, "learning_rate": 2.4762e-06, "loss": 1.1895, "step": 37640 }, { "epoch": 3.765, "grad_norm": 11.60633659362793, "learning_rate": 2.4742000000000004e-06, "loss": 1.2478, "step": 37650 }, { "epoch": 3.766, "grad_norm": 13.319875717163086, "learning_rate": 2.4722000000000002e-06, "loss": 1.2157, "step": 37660 }, { "epoch": 3.767, "grad_norm": 15.343708992004395, "learning_rate": 2.4702e-06, "loss": 1.3538, "step": 37670 }, { "epoch": 3.768, "grad_norm": 15.542409896850586, "learning_rate": 2.4682000000000004e-06, "loss": 1.3609, "step": 37680 }, { "epoch": 3.769, "grad_norm": 16.866321563720703, "learning_rate": 2.4662e-06, "loss": 1.2886, "step": 37690 }, { "epoch": 3.77, "grad_norm": 18.840110778808594, "learning_rate": 2.4642e-06, "loss": 0.9752, "step": 37700 }, { "epoch": 3.771, "grad_norm": 8.74290943145752, "learning_rate": 2.4622000000000003e-06, "loss": 1.3683, "step": 37710 }, { "epoch": 3.7720000000000002, "grad_norm": 4.357846736907959, "learning_rate": 2.4602e-06, "loss": 0.9678, "step": 37720 }, { "epoch": 3.773, "grad_norm": 13.368971824645996, "learning_rate": 2.4582000000000005e-06, "loss": 1.397, "step": 37730 }, { "epoch": 3.774, "grad_norm": 9.007780075073242, "learning_rate": 2.4562000000000003e-06, "loss": 0.956, "step": 37740 }, { "epoch": 3.775, "grad_norm": 6.478409767150879, "learning_rate": 2.4542e-06, "loss": 1.2695, "step": 37750 }, { "epoch": 3.776, "grad_norm": 9.990360260009766, "learning_rate": 2.4522e-06, "loss": 1.1857, "step": 37760 }, { "epoch": 3.777, "grad_norm": 10.997722625732422, "learning_rate": 2.4502000000000003e-06, "loss": 1.4881, "step": 37770 }, { "epoch": 3.778, "grad_norm": 12.49606704711914, "learning_rate": 2.4482e-06, "loss": 1.1437, "step": 37780 }, { "epoch": 3.779, "grad_norm": 14.01514720916748, "learning_rate": 2.4462000000000004e-06, "loss": 1.1204, "step": 37790 }, { "epoch": 3.7800000000000002, "grad_norm": 12.415432929992676, "learning_rate": 2.4442000000000002e-06, "loss": 1.4102, "step": 37800 }, { "epoch": 3.781, "grad_norm": 24.736719131469727, "learning_rate": 2.4422e-06, "loss": 1.2113, "step": 37810 }, { "epoch": 3.782, "grad_norm": 7.65277099609375, "learning_rate": 2.4402e-06, "loss": 1.1207, "step": 37820 }, { "epoch": 3.783, "grad_norm": 14.657126426696777, "learning_rate": 2.4382000000000002e-06, "loss": 1.073, "step": 37830 }, { "epoch": 3.784, "grad_norm": 13.241527557373047, "learning_rate": 2.4362e-06, "loss": 0.8693, "step": 37840 }, { "epoch": 3.785, "grad_norm": 17.25339698791504, "learning_rate": 2.4342000000000003e-06, "loss": 1.2488, "step": 37850 }, { "epoch": 3.786, "grad_norm": 14.380743980407715, "learning_rate": 2.4322e-06, "loss": 1.1669, "step": 37860 }, { "epoch": 3.787, "grad_norm": 11.47537899017334, "learning_rate": 2.4302000000000005e-06, "loss": 1.4015, "step": 37870 }, { "epoch": 3.7880000000000003, "grad_norm": 14.436346054077148, "learning_rate": 2.4282000000000003e-06, "loss": 0.993, "step": 37880 }, { "epoch": 3.789, "grad_norm": 13.6403169631958, "learning_rate": 2.4262e-06, "loss": 1.3097, "step": 37890 }, { "epoch": 3.79, "grad_norm": 9.384072303771973, "learning_rate": 2.4242e-06, "loss": 1.3464, "step": 37900 }, { "epoch": 3.791, "grad_norm": 7.495100498199463, "learning_rate": 2.4222000000000003e-06, "loss": 1.2927, "step": 37910 }, { "epoch": 3.792, "grad_norm": 14.62960147857666, "learning_rate": 2.4202e-06, "loss": 1.0146, "step": 37920 }, { "epoch": 3.793, "grad_norm": 9.809941291809082, "learning_rate": 2.4182000000000004e-06, "loss": 1.2766, "step": 37930 }, { "epoch": 3.794, "grad_norm": 9.882699012756348, "learning_rate": 2.4162000000000003e-06, "loss": 0.9037, "step": 37940 }, { "epoch": 3.795, "grad_norm": 10.173505783081055, "learning_rate": 2.4142e-06, "loss": 1.0216, "step": 37950 }, { "epoch": 3.7960000000000003, "grad_norm": 13.312867164611816, "learning_rate": 2.4122e-06, "loss": 1.4801, "step": 37960 }, { "epoch": 3.797, "grad_norm": 14.587601661682129, "learning_rate": 2.4102000000000002e-06, "loss": 0.9533, "step": 37970 }, { "epoch": 3.798, "grad_norm": 10.390947341918945, "learning_rate": 2.4082e-06, "loss": 1.1268, "step": 37980 }, { "epoch": 3.799, "grad_norm": 15.766487121582031, "learning_rate": 2.4062000000000004e-06, "loss": 1.3979, "step": 37990 }, { "epoch": 3.8, "grad_norm": 13.392780303955078, "learning_rate": 2.4042e-06, "loss": 0.9694, "step": 38000 }, { "epoch": 3.801, "grad_norm": 12.483599662780762, "learning_rate": 2.4022e-06, "loss": 1.1086, "step": 38010 }, { "epoch": 3.802, "grad_norm": 10.133040428161621, "learning_rate": 2.4002000000000003e-06, "loss": 0.8126, "step": 38020 }, { "epoch": 3.803, "grad_norm": 17.023761749267578, "learning_rate": 2.3982e-06, "loss": 1.2898, "step": 38030 }, { "epoch": 3.8040000000000003, "grad_norm": 14.7162446975708, "learning_rate": 2.3962e-06, "loss": 1.3709, "step": 38040 }, { "epoch": 3.805, "grad_norm": 13.780320167541504, "learning_rate": 2.3942000000000003e-06, "loss": 1.2211, "step": 38050 }, { "epoch": 3.806, "grad_norm": 17.116544723510742, "learning_rate": 2.3922e-06, "loss": 1.2862, "step": 38060 }, { "epoch": 3.807, "grad_norm": 15.063960075378418, "learning_rate": 2.3902000000000004e-06, "loss": 0.9882, "step": 38070 }, { "epoch": 3.808, "grad_norm": 5.567142963409424, "learning_rate": 2.3882000000000003e-06, "loss": 1.1569, "step": 38080 }, { "epoch": 3.809, "grad_norm": 13.843855857849121, "learning_rate": 2.3862e-06, "loss": 1.7105, "step": 38090 }, { "epoch": 3.81, "grad_norm": 8.842657089233398, "learning_rate": 2.3842e-06, "loss": 0.9502, "step": 38100 }, { "epoch": 3.811, "grad_norm": 6.113519191741943, "learning_rate": 2.3822000000000002e-06, "loss": 1.1137, "step": 38110 }, { "epoch": 3.8120000000000003, "grad_norm": 15.566743850708008, "learning_rate": 2.3802e-06, "loss": 1.2962, "step": 38120 }, { "epoch": 3.8129999999999997, "grad_norm": 16.298362731933594, "learning_rate": 2.3782000000000004e-06, "loss": 1.381, "step": 38130 }, { "epoch": 3.814, "grad_norm": 11.878606796264648, "learning_rate": 2.3762000000000002e-06, "loss": 1.1471, "step": 38140 }, { "epoch": 3.815, "grad_norm": 18.62247657775879, "learning_rate": 2.3742e-06, "loss": 1.4622, "step": 38150 }, { "epoch": 3.816, "grad_norm": 16.04578399658203, "learning_rate": 2.3722e-06, "loss": 1.5767, "step": 38160 }, { "epoch": 3.817, "grad_norm": 10.105911254882812, "learning_rate": 2.3702e-06, "loss": 1.4737, "step": 38170 }, { "epoch": 3.818, "grad_norm": 12.15550708770752, "learning_rate": 2.3682e-06, "loss": 1.127, "step": 38180 }, { "epoch": 3.819, "grad_norm": 14.195103645324707, "learning_rate": 2.3662000000000003e-06, "loss": 1.2135, "step": 38190 }, { "epoch": 3.82, "grad_norm": 8.003168106079102, "learning_rate": 2.3642e-06, "loss": 1.019, "step": 38200 }, { "epoch": 3.8209999999999997, "grad_norm": 14.367325782775879, "learning_rate": 2.3622000000000004e-06, "loss": 1.216, "step": 38210 }, { "epoch": 3.822, "grad_norm": 16.803428649902344, "learning_rate": 2.3602000000000003e-06, "loss": 1.3464, "step": 38220 }, { "epoch": 3.823, "grad_norm": 9.78681755065918, "learning_rate": 2.3582e-06, "loss": 1.4767, "step": 38230 }, { "epoch": 3.824, "grad_norm": 11.72006893157959, "learning_rate": 2.3562e-06, "loss": 1.3687, "step": 38240 }, { "epoch": 3.825, "grad_norm": 11.226656913757324, "learning_rate": 2.3542000000000003e-06, "loss": 1.3891, "step": 38250 }, { "epoch": 3.826, "grad_norm": 15.027206420898438, "learning_rate": 2.3522e-06, "loss": 1.1606, "step": 38260 }, { "epoch": 3.827, "grad_norm": 7.722898006439209, "learning_rate": 2.3502000000000004e-06, "loss": 1.0091, "step": 38270 }, { "epoch": 3.828, "grad_norm": 9.572613716125488, "learning_rate": 2.3482000000000002e-06, "loss": 1.167, "step": 38280 }, { "epoch": 3.8289999999999997, "grad_norm": 8.165882110595703, "learning_rate": 2.3462e-06, "loss": 1.3308, "step": 38290 }, { "epoch": 3.83, "grad_norm": 6.780120372772217, "learning_rate": 2.3442e-06, "loss": 1.3709, "step": 38300 }, { "epoch": 3.831, "grad_norm": 8.761841773986816, "learning_rate": 2.3422e-06, "loss": 1.1885, "step": 38310 }, { "epoch": 3.832, "grad_norm": 9.897774696350098, "learning_rate": 2.3402e-06, "loss": 1.3131, "step": 38320 }, { "epoch": 3.833, "grad_norm": 8.843579292297363, "learning_rate": 2.3382000000000003e-06, "loss": 1.0232, "step": 38330 }, { "epoch": 3.834, "grad_norm": 8.747920036315918, "learning_rate": 2.3362e-06, "loss": 0.9621, "step": 38340 }, { "epoch": 3.835, "grad_norm": 13.01405143737793, "learning_rate": 2.3342e-06, "loss": 1.1395, "step": 38350 }, { "epoch": 3.836, "grad_norm": 16.146595001220703, "learning_rate": 2.3322000000000003e-06, "loss": 1.2677, "step": 38360 }, { "epoch": 3.8369999999999997, "grad_norm": 13.549741744995117, "learning_rate": 2.3302e-06, "loss": 1.2888, "step": 38370 }, { "epoch": 3.838, "grad_norm": 8.407752990722656, "learning_rate": 2.3282e-06, "loss": 1.4747, "step": 38380 }, { "epoch": 3.839, "grad_norm": 9.734667778015137, "learning_rate": 2.3262000000000003e-06, "loss": 1.2384, "step": 38390 }, { "epoch": 3.84, "grad_norm": 11.550728797912598, "learning_rate": 2.3242e-06, "loss": 1.3916, "step": 38400 }, { "epoch": 3.841, "grad_norm": 12.675631523132324, "learning_rate": 2.3222000000000004e-06, "loss": 1.0983, "step": 38410 }, { "epoch": 3.842, "grad_norm": 5.010979175567627, "learning_rate": 2.3202000000000002e-06, "loss": 1.2378, "step": 38420 }, { "epoch": 3.843, "grad_norm": 12.073360443115234, "learning_rate": 2.3182e-06, "loss": 1.1127, "step": 38430 }, { "epoch": 3.844, "grad_norm": 10.695977210998535, "learning_rate": 2.3162e-06, "loss": 1.2293, "step": 38440 }, { "epoch": 3.8449999999999998, "grad_norm": 7.519216060638428, "learning_rate": 2.3142000000000002e-06, "loss": 0.9377, "step": 38450 }, { "epoch": 3.846, "grad_norm": 9.301962852478027, "learning_rate": 2.3122e-06, "loss": 0.9873, "step": 38460 }, { "epoch": 3.847, "grad_norm": 12.23387336730957, "learning_rate": 2.3102000000000003e-06, "loss": 1.2708, "step": 38470 }, { "epoch": 3.848, "grad_norm": 11.03333568572998, "learning_rate": 2.3082e-06, "loss": 1.2395, "step": 38480 }, { "epoch": 3.849, "grad_norm": 7.471413612365723, "learning_rate": 2.3062e-06, "loss": 1.3549, "step": 38490 }, { "epoch": 3.85, "grad_norm": 13.695836067199707, "learning_rate": 2.3042000000000003e-06, "loss": 1.4828, "step": 38500 }, { "epoch": 3.851, "grad_norm": 13.552950859069824, "learning_rate": 2.3022e-06, "loss": 1.2599, "step": 38510 }, { "epoch": 3.852, "grad_norm": 8.290284156799316, "learning_rate": 2.3002e-06, "loss": 1.3018, "step": 38520 }, { "epoch": 3.8529999999999998, "grad_norm": 8.726314544677734, "learning_rate": 2.2982000000000003e-06, "loss": 1.3201, "step": 38530 }, { "epoch": 3.854, "grad_norm": 9.286538124084473, "learning_rate": 2.2962e-06, "loss": 0.816, "step": 38540 }, { "epoch": 3.855, "grad_norm": 9.999709129333496, "learning_rate": 2.2942000000000004e-06, "loss": 1.2965, "step": 38550 }, { "epoch": 3.856, "grad_norm": 10.900120735168457, "learning_rate": 2.2922000000000003e-06, "loss": 1.1841, "step": 38560 }, { "epoch": 3.857, "grad_norm": 10.506426811218262, "learning_rate": 2.2902e-06, "loss": 1.1653, "step": 38570 }, { "epoch": 3.858, "grad_norm": 6.279806137084961, "learning_rate": 2.2882e-06, "loss": 1.09, "step": 38580 }, { "epoch": 3.859, "grad_norm": 3.8418819904327393, "learning_rate": 2.2862000000000002e-06, "loss": 1.0775, "step": 38590 }, { "epoch": 3.86, "grad_norm": 8.201671600341797, "learning_rate": 2.2842000000000005e-06, "loss": 1.0939, "step": 38600 }, { "epoch": 3.8609999999999998, "grad_norm": 10.66903305053711, "learning_rate": 2.2822000000000004e-06, "loss": 1.0349, "step": 38610 }, { "epoch": 3.862, "grad_norm": 16.716264724731445, "learning_rate": 2.2802e-06, "loss": 1.1746, "step": 38620 }, { "epoch": 3.863, "grad_norm": 8.715425491333008, "learning_rate": 2.2782e-06, "loss": 1.1843, "step": 38630 }, { "epoch": 3.864, "grad_norm": 6.37714147567749, "learning_rate": 2.2762e-06, "loss": 1.4304, "step": 38640 }, { "epoch": 3.865, "grad_norm": 13.692492485046387, "learning_rate": 2.2742e-06, "loss": 1.3738, "step": 38650 }, { "epoch": 3.866, "grad_norm": 8.078996658325195, "learning_rate": 2.2722e-06, "loss": 0.9851, "step": 38660 }, { "epoch": 3.867, "grad_norm": 12.841557502746582, "learning_rate": 2.2702000000000003e-06, "loss": 1.4776, "step": 38670 }, { "epoch": 3.868, "grad_norm": 29.28874397277832, "learning_rate": 2.2682e-06, "loss": 1.4468, "step": 38680 }, { "epoch": 3.8689999999999998, "grad_norm": 21.274463653564453, "learning_rate": 2.2662e-06, "loss": 1.4106, "step": 38690 }, { "epoch": 3.87, "grad_norm": 5.536740303039551, "learning_rate": 2.2642000000000003e-06, "loss": 1.6215, "step": 38700 }, { "epoch": 3.871, "grad_norm": 10.827073097229004, "learning_rate": 2.2622e-06, "loss": 1.3824, "step": 38710 }, { "epoch": 3.872, "grad_norm": 6.931528091430664, "learning_rate": 2.2602e-06, "loss": 1.3013, "step": 38720 }, { "epoch": 3.873, "grad_norm": 19.29330062866211, "learning_rate": 2.2582000000000002e-06, "loss": 1.3871, "step": 38730 }, { "epoch": 3.874, "grad_norm": 12.133950233459473, "learning_rate": 2.2562e-06, "loss": 1.0613, "step": 38740 }, { "epoch": 3.875, "grad_norm": 12.192902565002441, "learning_rate": 2.2542000000000004e-06, "loss": 1.1907, "step": 38750 }, { "epoch": 3.876, "grad_norm": 11.40755844116211, "learning_rate": 2.2522000000000002e-06, "loss": 1.1024, "step": 38760 }, { "epoch": 3.877, "grad_norm": 6.671988010406494, "learning_rate": 2.2502e-06, "loss": 1.2264, "step": 38770 }, { "epoch": 3.878, "grad_norm": 9.963266372680664, "learning_rate": 2.2482e-06, "loss": 1.253, "step": 38780 }, { "epoch": 3.879, "grad_norm": 11.280170440673828, "learning_rate": 2.2462e-06, "loss": 1.171, "step": 38790 }, { "epoch": 3.88, "grad_norm": 10.309623718261719, "learning_rate": 2.2442000000000005e-06, "loss": 0.893, "step": 38800 }, { "epoch": 3.8810000000000002, "grad_norm": 16.783235549926758, "learning_rate": 2.2422000000000003e-06, "loss": 1.1308, "step": 38810 }, { "epoch": 3.882, "grad_norm": 10.35633659362793, "learning_rate": 2.2402e-06, "loss": 1.3019, "step": 38820 }, { "epoch": 3.883, "grad_norm": 18.56487274169922, "learning_rate": 2.2382e-06, "loss": 1.1619, "step": 38830 }, { "epoch": 3.884, "grad_norm": 10.23858642578125, "learning_rate": 2.2362000000000003e-06, "loss": 1.1176, "step": 38840 }, { "epoch": 3.885, "grad_norm": 9.214202880859375, "learning_rate": 2.2342e-06, "loss": 1.3365, "step": 38850 }, { "epoch": 3.886, "grad_norm": 6.946726322174072, "learning_rate": 2.2322e-06, "loss": 1.0709, "step": 38860 }, { "epoch": 3.887, "grad_norm": 10.780777931213379, "learning_rate": 2.2302000000000003e-06, "loss": 1.2471, "step": 38870 }, { "epoch": 3.888, "grad_norm": 10.035628318786621, "learning_rate": 2.2282e-06, "loss": 1.3503, "step": 38880 }, { "epoch": 3.8890000000000002, "grad_norm": 13.412074089050293, "learning_rate": 2.2262000000000004e-06, "loss": 1.178, "step": 38890 }, { "epoch": 3.89, "grad_norm": 11.035019874572754, "learning_rate": 2.2242000000000002e-06, "loss": 1.1522, "step": 38900 }, { "epoch": 3.891, "grad_norm": 13.609627723693848, "learning_rate": 2.2222e-06, "loss": 1.0773, "step": 38910 }, { "epoch": 3.892, "grad_norm": 14.160517692565918, "learning_rate": 2.2202e-06, "loss": 1.5197, "step": 38920 }, { "epoch": 3.893, "grad_norm": 9.225414276123047, "learning_rate": 2.2182e-06, "loss": 1.3432, "step": 38930 }, { "epoch": 3.894, "grad_norm": 14.875001907348633, "learning_rate": 2.2162000000000005e-06, "loss": 1.3332, "step": 38940 }, { "epoch": 3.895, "grad_norm": 14.877756118774414, "learning_rate": 2.2142000000000003e-06, "loss": 1.4285, "step": 38950 }, { "epoch": 3.896, "grad_norm": 12.281317710876465, "learning_rate": 2.2122e-06, "loss": 1.1517, "step": 38960 }, { "epoch": 3.8970000000000002, "grad_norm": 8.571146011352539, "learning_rate": 2.2102e-06, "loss": 1.4144, "step": 38970 }, { "epoch": 3.898, "grad_norm": 4.709758758544922, "learning_rate": 2.2082e-06, "loss": 1.1561, "step": 38980 }, { "epoch": 3.899, "grad_norm": 15.320708274841309, "learning_rate": 2.2062e-06, "loss": 1.0347, "step": 38990 }, { "epoch": 3.9, "grad_norm": 10.320055961608887, "learning_rate": 2.2042000000000004e-06, "loss": 1.0229, "step": 39000 }, { "epoch": 3.901, "grad_norm": 8.622745513916016, "learning_rate": 2.2022000000000003e-06, "loss": 1.468, "step": 39010 }, { "epoch": 3.902, "grad_norm": 8.651406288146973, "learning_rate": 2.2002e-06, "loss": 1.1901, "step": 39020 }, { "epoch": 3.903, "grad_norm": 9.010873794555664, "learning_rate": 2.1982e-06, "loss": 1.4265, "step": 39030 }, { "epoch": 3.904, "grad_norm": 15.541300773620605, "learning_rate": 2.1962000000000003e-06, "loss": 1.3429, "step": 39040 }, { "epoch": 3.9050000000000002, "grad_norm": 15.52620792388916, "learning_rate": 2.1942e-06, "loss": 1.3845, "step": 39050 }, { "epoch": 3.906, "grad_norm": 7.8189239501953125, "learning_rate": 2.1922e-06, "loss": 1.1793, "step": 39060 }, { "epoch": 3.907, "grad_norm": 7.524705410003662, "learning_rate": 2.1902000000000002e-06, "loss": 0.9377, "step": 39070 }, { "epoch": 3.908, "grad_norm": 11.998468399047852, "learning_rate": 2.1882e-06, "loss": 1.2431, "step": 39080 }, { "epoch": 3.909, "grad_norm": 18.20262336730957, "learning_rate": 2.1862000000000003e-06, "loss": 1.3673, "step": 39090 }, { "epoch": 3.91, "grad_norm": 4.264401435852051, "learning_rate": 2.1842e-06, "loss": 1.2569, "step": 39100 }, { "epoch": 3.911, "grad_norm": 10.863852500915527, "learning_rate": 2.1822e-06, "loss": 1.216, "step": 39110 }, { "epoch": 3.912, "grad_norm": 9.973640441894531, "learning_rate": 2.1802e-06, "loss": 1.0703, "step": 39120 }, { "epoch": 3.9130000000000003, "grad_norm": 6.345876693725586, "learning_rate": 2.1782e-06, "loss": 1.1872, "step": 39130 }, { "epoch": 3.914, "grad_norm": 12.918730735778809, "learning_rate": 2.1762000000000004e-06, "loss": 1.1203, "step": 39140 }, { "epoch": 3.915, "grad_norm": 9.743945121765137, "learning_rate": 2.1742000000000003e-06, "loss": 1.1562, "step": 39150 }, { "epoch": 3.916, "grad_norm": 10.633359909057617, "learning_rate": 2.1722e-06, "loss": 1.2903, "step": 39160 }, { "epoch": 3.917, "grad_norm": 10.342625617980957, "learning_rate": 2.1702e-06, "loss": 1.2968, "step": 39170 }, { "epoch": 3.918, "grad_norm": 9.322047233581543, "learning_rate": 2.1682000000000003e-06, "loss": 1.0395, "step": 39180 }, { "epoch": 3.919, "grad_norm": 9.705948829650879, "learning_rate": 2.1662e-06, "loss": 1.3505, "step": 39190 }, { "epoch": 3.92, "grad_norm": 6.615529537200928, "learning_rate": 2.1642000000000004e-06, "loss": 1.2587, "step": 39200 }, { "epoch": 3.9210000000000003, "grad_norm": 9.800826072692871, "learning_rate": 2.1622000000000002e-06, "loss": 1.2829, "step": 39210 }, { "epoch": 3.922, "grad_norm": 9.191574096679688, "learning_rate": 2.1602e-06, "loss": 1.0858, "step": 39220 }, { "epoch": 3.923, "grad_norm": 8.503543853759766, "learning_rate": 2.1582000000000004e-06, "loss": 0.9438, "step": 39230 }, { "epoch": 3.924, "grad_norm": 9.25988483428955, "learning_rate": 2.1562e-06, "loss": 1.1584, "step": 39240 }, { "epoch": 3.925, "grad_norm": 8.112602233886719, "learning_rate": 2.1544e-06, "loss": 1.1373, "step": 39250 }, { "epoch": 3.926, "grad_norm": 18.134437561035156, "learning_rate": 2.1524e-06, "loss": 1.455, "step": 39260 }, { "epoch": 3.927, "grad_norm": 11.678825378417969, "learning_rate": 2.1504e-06, "loss": 0.9958, "step": 39270 }, { "epoch": 3.928, "grad_norm": 12.235458374023438, "learning_rate": 2.1484e-06, "loss": 1.3775, "step": 39280 }, { "epoch": 3.9290000000000003, "grad_norm": 12.416475296020508, "learning_rate": 2.1464000000000003e-06, "loss": 1.3357, "step": 39290 }, { "epoch": 3.93, "grad_norm": 11.198461532592773, "learning_rate": 2.1444e-06, "loss": 1.1272, "step": 39300 }, { "epoch": 3.931, "grad_norm": 12.460107803344727, "learning_rate": 2.1424000000000004e-06, "loss": 1.5054, "step": 39310 }, { "epoch": 3.932, "grad_norm": 6.768657684326172, "learning_rate": 2.1404000000000003e-06, "loss": 1.3466, "step": 39320 }, { "epoch": 3.933, "grad_norm": 12.902031898498535, "learning_rate": 2.1384e-06, "loss": 0.9435, "step": 39330 }, { "epoch": 3.934, "grad_norm": 10.827200889587402, "learning_rate": 2.1364e-06, "loss": 1.3099, "step": 39340 }, { "epoch": 3.935, "grad_norm": 8.690793991088867, "learning_rate": 2.1344000000000003e-06, "loss": 0.9469, "step": 39350 }, { "epoch": 3.936, "grad_norm": 12.577960968017578, "learning_rate": 2.1324e-06, "loss": 1.1955, "step": 39360 }, { "epoch": 3.9370000000000003, "grad_norm": 11.960624694824219, "learning_rate": 2.1304000000000004e-06, "loss": 1.3436, "step": 39370 }, { "epoch": 3.9379999999999997, "grad_norm": 7.513112545013428, "learning_rate": 2.1284000000000002e-06, "loss": 1.2821, "step": 39380 }, { "epoch": 3.939, "grad_norm": 13.949124336242676, "learning_rate": 2.1264e-06, "loss": 1.5437, "step": 39390 }, { "epoch": 3.94, "grad_norm": 9.117955207824707, "learning_rate": 2.1244e-06, "loss": 1.2273, "step": 39400 }, { "epoch": 3.941, "grad_norm": 9.620978355407715, "learning_rate": 2.1224e-06, "loss": 1.3495, "step": 39410 }, { "epoch": 3.942, "grad_norm": 12.69318962097168, "learning_rate": 2.1204e-06, "loss": 1.3289, "step": 39420 }, { "epoch": 3.943, "grad_norm": 14.887171745300293, "learning_rate": 2.1184000000000003e-06, "loss": 1.1939, "step": 39430 }, { "epoch": 3.944, "grad_norm": 6.5539870262146, "learning_rate": 2.1164e-06, "loss": 1.2338, "step": 39440 }, { "epoch": 3.945, "grad_norm": 11.535837173461914, "learning_rate": 2.1144e-06, "loss": 1.2445, "step": 39450 }, { "epoch": 3.9459999999999997, "grad_norm": 8.392821311950684, "learning_rate": 2.1124000000000003e-06, "loss": 0.9979, "step": 39460 }, { "epoch": 3.947, "grad_norm": 15.28943920135498, "learning_rate": 2.1104e-06, "loss": 1.0111, "step": 39470 }, { "epoch": 3.948, "grad_norm": 13.631561279296875, "learning_rate": 2.1084e-06, "loss": 1.1839, "step": 39480 }, { "epoch": 3.949, "grad_norm": 13.736732482910156, "learning_rate": 2.1064000000000003e-06, "loss": 1.1102, "step": 39490 }, { "epoch": 3.95, "grad_norm": 8.457724571228027, "learning_rate": 2.1044e-06, "loss": 1.1509, "step": 39500 }, { "epoch": 3.951, "grad_norm": 15.011367797851562, "learning_rate": 2.1024000000000004e-06, "loss": 1.3375, "step": 39510 }, { "epoch": 3.952, "grad_norm": 6.542825698852539, "learning_rate": 2.1004000000000003e-06, "loss": 1.0697, "step": 39520 }, { "epoch": 3.953, "grad_norm": 10.32967758178711, "learning_rate": 2.0984e-06, "loss": 1.2154, "step": 39530 }, { "epoch": 3.9539999999999997, "grad_norm": 2.30588698387146, "learning_rate": 2.0964e-06, "loss": 0.9015, "step": 39540 }, { "epoch": 3.955, "grad_norm": 10.52668571472168, "learning_rate": 2.0944000000000002e-06, "loss": 1.1191, "step": 39550 }, { "epoch": 3.956, "grad_norm": 14.20761489868164, "learning_rate": 2.0924e-06, "loss": 1.3412, "step": 39560 }, { "epoch": 3.957, "grad_norm": 12.69825553894043, "learning_rate": 2.0904000000000003e-06, "loss": 1.125, "step": 39570 }, { "epoch": 3.958, "grad_norm": 11.690139770507812, "learning_rate": 2.0884e-06, "loss": 1.411, "step": 39580 }, { "epoch": 3.959, "grad_norm": 23.669801712036133, "learning_rate": 2.0864e-06, "loss": 1.0525, "step": 39590 }, { "epoch": 3.96, "grad_norm": 8.103926658630371, "learning_rate": 2.0844e-06, "loss": 1.3755, "step": 39600 }, { "epoch": 3.961, "grad_norm": 11.425315856933594, "learning_rate": 2.0824e-06, "loss": 1.0759, "step": 39610 }, { "epoch": 3.9619999999999997, "grad_norm": 2.0040135383605957, "learning_rate": 2.0804e-06, "loss": 1.0839, "step": 39620 }, { "epoch": 3.963, "grad_norm": 10.157102584838867, "learning_rate": 2.0784000000000003e-06, "loss": 1.3164, "step": 39630 }, { "epoch": 3.964, "grad_norm": 7.7615461349487305, "learning_rate": 2.0764e-06, "loss": 1.2555, "step": 39640 }, { "epoch": 3.965, "grad_norm": 15.621055603027344, "learning_rate": 2.0744000000000004e-06, "loss": 1.2752, "step": 39650 }, { "epoch": 3.966, "grad_norm": 14.723012924194336, "learning_rate": 2.0724000000000003e-06, "loss": 0.9618, "step": 39660 }, { "epoch": 3.967, "grad_norm": 22.73052215576172, "learning_rate": 2.0704e-06, "loss": 1.3223, "step": 39670 }, { "epoch": 3.968, "grad_norm": 19.596620559692383, "learning_rate": 2.0684e-06, "loss": 1.3325, "step": 39680 }, { "epoch": 3.969, "grad_norm": 8.103555679321289, "learning_rate": 2.0664000000000002e-06, "loss": 1.072, "step": 39690 }, { "epoch": 3.9699999999999998, "grad_norm": 12.16108512878418, "learning_rate": 2.0644e-06, "loss": 0.9697, "step": 39700 }, { "epoch": 3.971, "grad_norm": 12.558130264282227, "learning_rate": 2.0624000000000004e-06, "loss": 1.269, "step": 39710 }, { "epoch": 3.972, "grad_norm": 5.639257431030273, "learning_rate": 2.0604000000000002e-06, "loss": 1.2229, "step": 39720 }, { "epoch": 3.973, "grad_norm": 35.27702331542969, "learning_rate": 2.0584e-06, "loss": 1.5389, "step": 39730 }, { "epoch": 3.974, "grad_norm": 7.00636100769043, "learning_rate": 2.0564e-06, "loss": 1.4366, "step": 39740 }, { "epoch": 3.975, "grad_norm": 5.253599166870117, "learning_rate": 2.0544e-06, "loss": 1.5813, "step": 39750 }, { "epoch": 3.976, "grad_norm": 7.881631851196289, "learning_rate": 2.0524e-06, "loss": 0.9579, "step": 39760 }, { "epoch": 3.977, "grad_norm": 17.991270065307617, "learning_rate": 2.0504000000000003e-06, "loss": 1.1928, "step": 39770 }, { "epoch": 3.9779999999999998, "grad_norm": 16.202117919921875, "learning_rate": 2.0484e-06, "loss": 1.2862, "step": 39780 }, { "epoch": 3.979, "grad_norm": 12.839705467224121, "learning_rate": 2.0464e-06, "loss": 1.5963, "step": 39790 }, { "epoch": 3.98, "grad_norm": 9.252676963806152, "learning_rate": 2.0444000000000003e-06, "loss": 1.0667, "step": 39800 }, { "epoch": 3.981, "grad_norm": 11.684073448181152, "learning_rate": 2.0424e-06, "loss": 1.2189, "step": 39810 }, { "epoch": 3.982, "grad_norm": 14.890811920166016, "learning_rate": 2.0404e-06, "loss": 1.1794, "step": 39820 }, { "epoch": 3.983, "grad_norm": 13.780019760131836, "learning_rate": 2.0384000000000003e-06, "loss": 1.3959, "step": 39830 }, { "epoch": 3.984, "grad_norm": 13.055339813232422, "learning_rate": 2.0364e-06, "loss": 1.1673, "step": 39840 }, { "epoch": 3.985, "grad_norm": 22.301334381103516, "learning_rate": 2.0344000000000004e-06, "loss": 1.1952, "step": 39850 }, { "epoch": 3.9859999999999998, "grad_norm": 9.01404857635498, "learning_rate": 2.0324000000000002e-06, "loss": 1.1618, "step": 39860 }, { "epoch": 3.987, "grad_norm": 7.801680088043213, "learning_rate": 2.0304e-06, "loss": 1.0837, "step": 39870 }, { "epoch": 3.988, "grad_norm": 9.99185562133789, "learning_rate": 2.0284e-06, "loss": 0.9401, "step": 39880 }, { "epoch": 3.989, "grad_norm": 7.114344596862793, "learning_rate": 2.0264e-06, "loss": 0.8261, "step": 39890 }, { "epoch": 3.99, "grad_norm": 12.52552604675293, "learning_rate": 2.0244e-06, "loss": 1.2295, "step": 39900 }, { "epoch": 3.991, "grad_norm": 16.246763229370117, "learning_rate": 2.0224000000000003e-06, "loss": 1.2274, "step": 39910 }, { "epoch": 3.992, "grad_norm": 20.329694747924805, "learning_rate": 2.0204e-06, "loss": 1.3935, "step": 39920 }, { "epoch": 3.993, "grad_norm": 15.775959968566895, "learning_rate": 2.0184e-06, "loss": 1.5667, "step": 39930 }, { "epoch": 3.9939999999999998, "grad_norm": 3.9927680492401123, "learning_rate": 2.0164000000000003e-06, "loss": 1.1985, "step": 39940 }, { "epoch": 3.995, "grad_norm": 18.908706665039062, "learning_rate": 2.0144e-06, "loss": 1.4964, "step": 39950 }, { "epoch": 3.996, "grad_norm": 6.706070423126221, "learning_rate": 2.0124e-06, "loss": 1.4124, "step": 39960 }, { "epoch": 3.997, "grad_norm": 6.100954055786133, "learning_rate": 2.0104000000000003e-06, "loss": 1.2583, "step": 39970 }, { "epoch": 3.998, "grad_norm": 12.561248779296875, "learning_rate": 2.0084e-06, "loss": 1.0361, "step": 39980 }, { "epoch": 3.999, "grad_norm": 16.806325912475586, "learning_rate": 2.0064000000000004e-06, "loss": 1.3935, "step": 39990 }, { "epoch": 4.0, "grad_norm": 8.588282585144043, "learning_rate": 2.0044000000000002e-06, "loss": 1.2572, "step": 40000 }, { "epoch": 4.001, "grad_norm": 11.128682136535645, "learning_rate": 2.0024e-06, "loss": 1.2347, "step": 40010 }, { "epoch": 4.002, "grad_norm": 16.264772415161133, "learning_rate": 2.0004e-06, "loss": 1.551, "step": 40020 }, { "epoch": 4.003, "grad_norm": 13.367792129516602, "learning_rate": 1.9984000000000002e-06, "loss": 1.0935, "step": 40030 }, { "epoch": 4.004, "grad_norm": 11.161703109741211, "learning_rate": 1.9964000000000005e-06, "loss": 0.9004, "step": 40040 }, { "epoch": 4.005, "grad_norm": 4.508510589599609, "learning_rate": 1.9944000000000003e-06, "loss": 1.1791, "step": 40050 }, { "epoch": 4.006, "grad_norm": 14.080422401428223, "learning_rate": 1.9924e-06, "loss": 1.2697, "step": 40060 }, { "epoch": 4.007, "grad_norm": 13.283492088317871, "learning_rate": 1.9904e-06, "loss": 1.3962, "step": 40070 }, { "epoch": 4.008, "grad_norm": 9.062101364135742, "learning_rate": 1.9884e-06, "loss": 1.0267, "step": 40080 }, { "epoch": 4.009, "grad_norm": 12.410930633544922, "learning_rate": 1.9864e-06, "loss": 1.2767, "step": 40090 }, { "epoch": 4.01, "grad_norm": 12.03365707397461, "learning_rate": 1.9844e-06, "loss": 1.1209, "step": 40100 }, { "epoch": 4.011, "grad_norm": 14.829510688781738, "learning_rate": 1.9824000000000003e-06, "loss": 1.0653, "step": 40110 }, { "epoch": 4.012, "grad_norm": 2.148223638534546, "learning_rate": 1.9804e-06, "loss": 0.9023, "step": 40120 }, { "epoch": 4.013, "grad_norm": 16.32964324951172, "learning_rate": 1.9784e-06, "loss": 1.4369, "step": 40130 }, { "epoch": 4.014, "grad_norm": 7.899197101593018, "learning_rate": 1.9764000000000003e-06, "loss": 1.3912, "step": 40140 }, { "epoch": 4.015, "grad_norm": 145.9697723388672, "learning_rate": 1.9744e-06, "loss": 1.1991, "step": 40150 }, { "epoch": 4.016, "grad_norm": 10.792060852050781, "learning_rate": 1.9724e-06, "loss": 1.0464, "step": 40160 }, { "epoch": 4.017, "grad_norm": 14.326906204223633, "learning_rate": 1.9704000000000002e-06, "loss": 1.3006, "step": 40170 }, { "epoch": 4.018, "grad_norm": 12.141740798950195, "learning_rate": 1.9684e-06, "loss": 1.2158, "step": 40180 }, { "epoch": 4.019, "grad_norm": 12.262004852294922, "learning_rate": 1.9664000000000003e-06, "loss": 1.4417, "step": 40190 }, { "epoch": 4.02, "grad_norm": 2.0249717235565186, "learning_rate": 1.9644e-06, "loss": 0.8404, "step": 40200 }, { "epoch": 4.021, "grad_norm": 7.84832239151001, "learning_rate": 1.9624e-06, "loss": 1.1789, "step": 40210 }, { "epoch": 4.022, "grad_norm": 16.320781707763672, "learning_rate": 1.9604e-06, "loss": 1.4101, "step": 40220 }, { "epoch": 4.023, "grad_norm": 14.928508758544922, "learning_rate": 1.9584e-06, "loss": 1.3553, "step": 40230 }, { "epoch": 4.024, "grad_norm": 8.78088092803955, "learning_rate": 1.9564000000000004e-06, "loss": 1.2127, "step": 40240 }, { "epoch": 4.025, "grad_norm": 4.696277141571045, "learning_rate": 1.9544000000000003e-06, "loss": 1.0219, "step": 40250 }, { "epoch": 4.026, "grad_norm": 13.06330680847168, "learning_rate": 1.9524e-06, "loss": 1.0031, "step": 40260 }, { "epoch": 4.027, "grad_norm": 17.397062301635742, "learning_rate": 1.9504e-06, "loss": 1.5734, "step": 40270 }, { "epoch": 4.028, "grad_norm": 3.442828893661499, "learning_rate": 1.9484000000000003e-06, "loss": 1.1175, "step": 40280 }, { "epoch": 4.029, "grad_norm": 14.766777992248535, "learning_rate": 1.9464e-06, "loss": 1.2666, "step": 40290 }, { "epoch": 4.03, "grad_norm": 12.792179107666016, "learning_rate": 1.9444e-06, "loss": 1.3148, "step": 40300 }, { "epoch": 4.031, "grad_norm": 14.319787979125977, "learning_rate": 1.9424000000000002e-06, "loss": 1.5315, "step": 40310 }, { "epoch": 4.032, "grad_norm": 7.522754192352295, "learning_rate": 1.9404e-06, "loss": 1.2031, "step": 40320 }, { "epoch": 4.033, "grad_norm": 9.742520332336426, "learning_rate": 1.9384000000000004e-06, "loss": 1.3779, "step": 40330 }, { "epoch": 4.034, "grad_norm": 5.746943950653076, "learning_rate": 1.9364000000000002e-06, "loss": 0.8605, "step": 40340 }, { "epoch": 4.035, "grad_norm": 7.665735244750977, "learning_rate": 1.9344e-06, "loss": 0.8738, "step": 40350 }, { "epoch": 4.036, "grad_norm": 7.8452324867248535, "learning_rate": 1.9324e-06, "loss": 1.1671, "step": 40360 }, { "epoch": 4.037, "grad_norm": 8.378031730651855, "learning_rate": 1.9304e-06, "loss": 1.3305, "step": 40370 }, { "epoch": 4.038, "grad_norm": 13.991755485534668, "learning_rate": 1.9284000000000005e-06, "loss": 1.4696, "step": 40380 }, { "epoch": 4.039, "grad_norm": 11.42869758605957, "learning_rate": 1.9264000000000003e-06, "loss": 1.1719, "step": 40390 }, { "epoch": 4.04, "grad_norm": 10.945043563842773, "learning_rate": 1.9244e-06, "loss": 1.189, "step": 40400 }, { "epoch": 4.041, "grad_norm": 11.763151168823242, "learning_rate": 1.9224e-06, "loss": 1.2759, "step": 40410 }, { "epoch": 4.042, "grad_norm": 11.601271629333496, "learning_rate": 1.9204e-06, "loss": 1.3376, "step": 40420 }, { "epoch": 4.043, "grad_norm": 11.217597961425781, "learning_rate": 1.9184e-06, "loss": 1.3389, "step": 40430 }, { "epoch": 4.044, "grad_norm": 17.997289657592773, "learning_rate": 1.9164000000000004e-06, "loss": 1.4455, "step": 40440 }, { "epoch": 4.045, "grad_norm": 12.068998336791992, "learning_rate": 1.9144000000000003e-06, "loss": 1.2748, "step": 40450 }, { "epoch": 4.046, "grad_norm": 10.9359712600708, "learning_rate": 1.9124e-06, "loss": 1.1655, "step": 40460 }, { "epoch": 4.047, "grad_norm": 18.37262535095215, "learning_rate": 1.9104e-06, "loss": 1.277, "step": 40470 }, { "epoch": 4.048, "grad_norm": 11.638949394226074, "learning_rate": 1.9084000000000002e-06, "loss": 1.2216, "step": 40480 }, { "epoch": 4.049, "grad_norm": 17.218978881835938, "learning_rate": 1.9064000000000003e-06, "loss": 1.0217, "step": 40490 }, { "epoch": 4.05, "grad_norm": 15.362520217895508, "learning_rate": 1.9044000000000001e-06, "loss": 1.1352, "step": 40500 }, { "epoch": 4.051, "grad_norm": 41.6008186340332, "learning_rate": 1.9024e-06, "loss": 1.3486, "step": 40510 }, { "epoch": 4.052, "grad_norm": 16.02692222595215, "learning_rate": 1.9004e-06, "loss": 1.255, "step": 40520 }, { "epoch": 4.053, "grad_norm": 15.08223819732666, "learning_rate": 1.8984000000000003e-06, "loss": 1.196, "step": 40530 }, { "epoch": 4.054, "grad_norm": 18.837661743164062, "learning_rate": 1.8964000000000002e-06, "loss": 1.3321, "step": 40540 }, { "epoch": 4.055, "grad_norm": 14.87779426574707, "learning_rate": 1.8944e-06, "loss": 1.3748, "step": 40550 }, { "epoch": 4.056, "grad_norm": 14.315704345703125, "learning_rate": 1.8924e-06, "loss": 0.9526, "step": 40560 }, { "epoch": 4.057, "grad_norm": 21.465242385864258, "learning_rate": 1.8904000000000004e-06, "loss": 1.5365, "step": 40570 }, { "epoch": 4.058, "grad_norm": 14.292797088623047, "learning_rate": 1.8884000000000002e-06, "loss": 1.3124, "step": 40580 }, { "epoch": 4.059, "grad_norm": 13.437397956848145, "learning_rate": 1.8864000000000003e-06, "loss": 1.2113, "step": 40590 }, { "epoch": 4.06, "grad_norm": 11.626646995544434, "learning_rate": 1.8844000000000001e-06, "loss": 1.2839, "step": 40600 }, { "epoch": 4.061, "grad_norm": 12.128142356872559, "learning_rate": 1.8824e-06, "loss": 1.2649, "step": 40610 }, { "epoch": 4.062, "grad_norm": 9.753108978271484, "learning_rate": 1.8804000000000002e-06, "loss": 1.0922, "step": 40620 }, { "epoch": 4.063, "grad_norm": 12.226005554199219, "learning_rate": 1.8784000000000003e-06, "loss": 1.0324, "step": 40630 }, { "epoch": 4.064, "grad_norm": 10.765656471252441, "learning_rate": 1.8764000000000002e-06, "loss": 0.864, "step": 40640 }, { "epoch": 4.065, "grad_norm": 5.601574420928955, "learning_rate": 1.8744000000000002e-06, "loss": 0.8616, "step": 40650 }, { "epoch": 4.066, "grad_norm": 6.348523139953613, "learning_rate": 1.8724e-06, "loss": 1.1013, "step": 40660 }, { "epoch": 4.067, "grad_norm": 14.095992088317871, "learning_rate": 1.8704000000000003e-06, "loss": 1.0465, "step": 40670 }, { "epoch": 4.068, "grad_norm": 13.380661964416504, "learning_rate": 1.8684000000000002e-06, "loss": 1.2694, "step": 40680 }, { "epoch": 4.069, "grad_norm": 14.413578033447266, "learning_rate": 1.8664000000000002e-06, "loss": 1.6316, "step": 40690 }, { "epoch": 4.07, "grad_norm": 14.797370910644531, "learning_rate": 1.8644e-06, "loss": 1.4015, "step": 40700 }, { "epoch": 4.071, "grad_norm": 13.191323280334473, "learning_rate": 1.8624e-06, "loss": 1.2672, "step": 40710 }, { "epoch": 4.072, "grad_norm": 10.441399574279785, "learning_rate": 1.8604000000000002e-06, "loss": 1.1072, "step": 40720 }, { "epoch": 4.073, "grad_norm": 11.217610359191895, "learning_rate": 1.8584000000000003e-06, "loss": 1.2855, "step": 40730 }, { "epoch": 4.074, "grad_norm": 8.068812370300293, "learning_rate": 1.8564000000000001e-06, "loss": 1.118, "step": 40740 }, { "epoch": 4.075, "grad_norm": 11.481019973754883, "learning_rate": 1.8544000000000002e-06, "loss": 1.2519, "step": 40750 }, { "epoch": 4.076, "grad_norm": 7.041345596313477, "learning_rate": 1.8524e-06, "loss": 0.9162, "step": 40760 }, { "epoch": 4.077, "grad_norm": 14.712966918945312, "learning_rate": 1.8504000000000003e-06, "loss": 1.1733, "step": 40770 }, { "epoch": 4.078, "grad_norm": 12.23774528503418, "learning_rate": 1.8484000000000002e-06, "loss": 1.2879, "step": 40780 }, { "epoch": 4.079, "grad_norm": 17.56555938720703, "learning_rate": 1.8464000000000002e-06, "loss": 1.3726, "step": 40790 }, { "epoch": 4.08, "grad_norm": 9.929778099060059, "learning_rate": 1.8444e-06, "loss": 1.4375, "step": 40800 }, { "epoch": 4.081, "grad_norm": 10.517845153808594, "learning_rate": 1.8424e-06, "loss": 1.1667, "step": 40810 }, { "epoch": 4.082, "grad_norm": 15.464540481567383, "learning_rate": 1.8404000000000002e-06, "loss": 1.1746, "step": 40820 }, { "epoch": 4.083, "grad_norm": 11.998087882995605, "learning_rate": 1.8384000000000003e-06, "loss": 1.3664, "step": 40830 }, { "epoch": 4.084, "grad_norm": 12.494379043579102, "learning_rate": 1.8364000000000001e-06, "loss": 1.0481, "step": 40840 }, { "epoch": 4.085, "grad_norm": 9.480566024780273, "learning_rate": 1.8344000000000002e-06, "loss": 0.8092, "step": 40850 }, { "epoch": 4.086, "grad_norm": 3.6071290969848633, "learning_rate": 1.8324e-06, "loss": 0.7558, "step": 40860 }, { "epoch": 4.087, "grad_norm": 16.53622817993164, "learning_rate": 1.8304000000000003e-06, "loss": 1.0528, "step": 40870 }, { "epoch": 4.088, "grad_norm": 7.138472080230713, "learning_rate": 1.8284000000000001e-06, "loss": 1.1895, "step": 40880 }, { "epoch": 4.089, "grad_norm": 7.524162292480469, "learning_rate": 1.8264000000000002e-06, "loss": 1.2304, "step": 40890 }, { "epoch": 4.09, "grad_norm": 13.9469633102417, "learning_rate": 1.8244e-06, "loss": 0.9797, "step": 40900 }, { "epoch": 4.091, "grad_norm": 15.148113250732422, "learning_rate": 1.8224000000000003e-06, "loss": 1.4159, "step": 40910 }, { "epoch": 4.092, "grad_norm": 15.00952434539795, "learning_rate": 1.8204000000000002e-06, "loss": 1.0077, "step": 40920 }, { "epoch": 4.093, "grad_norm": 20.535730361938477, "learning_rate": 1.8184000000000002e-06, "loss": 1.6336, "step": 40930 }, { "epoch": 4.094, "grad_norm": 11.729060173034668, "learning_rate": 1.8164e-06, "loss": 1.043, "step": 40940 }, { "epoch": 4.095, "grad_norm": 14.314053535461426, "learning_rate": 1.8144000000000002e-06, "loss": 1.7448, "step": 40950 }, { "epoch": 4.096, "grad_norm": 18.2724666595459, "learning_rate": 1.8124000000000002e-06, "loss": 1.2472, "step": 40960 }, { "epoch": 4.097, "grad_norm": 13.902151107788086, "learning_rate": 1.8104000000000003e-06, "loss": 1.194, "step": 40970 }, { "epoch": 4.098, "grad_norm": 10.761075019836426, "learning_rate": 1.8084000000000001e-06, "loss": 0.9249, "step": 40980 }, { "epoch": 4.099, "grad_norm": 16.681001663208008, "learning_rate": 1.8064000000000002e-06, "loss": 1.2968, "step": 40990 }, { "epoch": 4.1, "grad_norm": 16.36736488342285, "learning_rate": 1.8044e-06, "loss": 1.1447, "step": 41000 }, { "epoch": 4.101, "grad_norm": 16.537643432617188, "learning_rate": 1.8024000000000003e-06, "loss": 1.3504, "step": 41010 }, { "epoch": 4.102, "grad_norm": 14.55415153503418, "learning_rate": 1.8004000000000002e-06, "loss": 1.4286, "step": 41020 }, { "epoch": 4.103, "grad_norm": 18.604991912841797, "learning_rate": 1.7984000000000002e-06, "loss": 1.3002, "step": 41030 }, { "epoch": 4.104, "grad_norm": 12.910828590393066, "learning_rate": 1.7964e-06, "loss": 1.0211, "step": 41040 }, { "epoch": 4.105, "grad_norm": 17.426374435424805, "learning_rate": 1.7944000000000001e-06, "loss": 1.1109, "step": 41050 }, { "epoch": 4.106, "grad_norm": 11.728358268737793, "learning_rate": 1.7924000000000002e-06, "loss": 1.3181, "step": 41060 }, { "epoch": 4.107, "grad_norm": 10.3817777633667, "learning_rate": 1.7904000000000003e-06, "loss": 1.0637, "step": 41070 }, { "epoch": 4.108, "grad_norm": 20.212614059448242, "learning_rate": 1.7884e-06, "loss": 1.0596, "step": 41080 }, { "epoch": 4.109, "grad_norm": 7.579782009124756, "learning_rate": 1.7864000000000002e-06, "loss": 0.9701, "step": 41090 }, { "epoch": 4.11, "grad_norm": 126.69867706298828, "learning_rate": 1.7844e-06, "loss": 1.5729, "step": 41100 }, { "epoch": 4.111, "grad_norm": 15.561029434204102, "learning_rate": 1.7824000000000003e-06, "loss": 1.1575, "step": 41110 }, { "epoch": 4.112, "grad_norm": 11.264272689819336, "learning_rate": 1.7804000000000001e-06, "loss": 1.2608, "step": 41120 }, { "epoch": 4.113, "grad_norm": 10.368001937866211, "learning_rate": 1.7784000000000002e-06, "loss": 1.2963, "step": 41130 }, { "epoch": 4.114, "grad_norm": 14.39828109741211, "learning_rate": 1.7764e-06, "loss": 1.156, "step": 41140 }, { "epoch": 4.115, "grad_norm": 10.103524208068848, "learning_rate": 1.7744000000000001e-06, "loss": 0.9833, "step": 41150 }, { "epoch": 4.116, "grad_norm": 18.904821395874023, "learning_rate": 1.7724000000000002e-06, "loss": 1.569, "step": 41160 }, { "epoch": 4.117, "grad_norm": 16.029382705688477, "learning_rate": 1.7704000000000002e-06, "loss": 1.0345, "step": 41170 }, { "epoch": 4.118, "grad_norm": 12.089861869812012, "learning_rate": 1.7684e-06, "loss": 1.1734, "step": 41180 }, { "epoch": 4.119, "grad_norm": 13.378978729248047, "learning_rate": 1.7664000000000001e-06, "loss": 1.5198, "step": 41190 }, { "epoch": 4.12, "grad_norm": 14.37255859375, "learning_rate": 1.7644000000000002e-06, "loss": 1.4014, "step": 41200 }, { "epoch": 4.121, "grad_norm": 3.9864590167999268, "learning_rate": 1.7624000000000003e-06, "loss": 1.0823, "step": 41210 }, { "epoch": 4.122, "grad_norm": 15.125597953796387, "learning_rate": 1.7604000000000001e-06, "loss": 1.4773, "step": 41220 }, { "epoch": 4.123, "grad_norm": 10.017621040344238, "learning_rate": 1.7584000000000002e-06, "loss": 0.7778, "step": 41230 }, { "epoch": 4.124, "grad_norm": 18.641681671142578, "learning_rate": 1.7564e-06, "loss": 1.4662, "step": 41240 }, { "epoch": 4.125, "grad_norm": 1.5794744491577148, "learning_rate": 1.7546000000000001e-06, "loss": 1.0485, "step": 41250 }, { "epoch": 4.126, "grad_norm": 11.299543380737305, "learning_rate": 1.7526000000000002e-06, "loss": 1.1708, "step": 41260 }, { "epoch": 4.127, "grad_norm": 20.040584564208984, "learning_rate": 1.7506e-06, "loss": 1.3783, "step": 41270 }, { "epoch": 4.128, "grad_norm": 13.518338203430176, "learning_rate": 1.7486e-06, "loss": 1.4544, "step": 41280 }, { "epoch": 4.129, "grad_norm": 44.384334564208984, "learning_rate": 1.7466000000000001e-06, "loss": 1.2924, "step": 41290 }, { "epoch": 4.13, "grad_norm": 8.805014610290527, "learning_rate": 1.7446000000000002e-06, "loss": 1.4653, "step": 41300 }, { "epoch": 4.131, "grad_norm": 12.5553617477417, "learning_rate": 1.7426e-06, "loss": 1.4091, "step": 41310 }, { "epoch": 4.132, "grad_norm": 12.487467765808105, "learning_rate": 1.7406000000000001e-06, "loss": 1.1777, "step": 41320 }, { "epoch": 4.133, "grad_norm": 9.486154556274414, "learning_rate": 1.7386e-06, "loss": 1.1109, "step": 41330 }, { "epoch": 4.134, "grad_norm": 7.552662372589111, "learning_rate": 1.7366000000000002e-06, "loss": 1.0116, "step": 41340 }, { "epoch": 4.135, "grad_norm": 15.218793869018555, "learning_rate": 1.7346e-06, "loss": 1.5087, "step": 41350 }, { "epoch": 4.136, "grad_norm": 16.26384735107422, "learning_rate": 1.7326000000000001e-06, "loss": 1.3426, "step": 41360 }, { "epoch": 4.1370000000000005, "grad_norm": 3.6620965003967285, "learning_rate": 1.7306e-06, "loss": 1.2542, "step": 41370 }, { "epoch": 4.138, "grad_norm": 12.656413078308105, "learning_rate": 1.7286000000000003e-06, "loss": 1.362, "step": 41380 }, { "epoch": 4.139, "grad_norm": 11.7527437210083, "learning_rate": 1.7266000000000001e-06, "loss": 1.323, "step": 41390 }, { "epoch": 4.14, "grad_norm": 11.43874454498291, "learning_rate": 1.7246000000000002e-06, "loss": 1.303, "step": 41400 }, { "epoch": 4.141, "grad_norm": 12.211397171020508, "learning_rate": 1.7226e-06, "loss": 1.3113, "step": 41410 }, { "epoch": 4.142, "grad_norm": 9.56558895111084, "learning_rate": 1.7206e-06, "loss": 1.1929, "step": 41420 }, { "epoch": 4.143, "grad_norm": 7.146576404571533, "learning_rate": 1.7186000000000004e-06, "loss": 1.0616, "step": 41430 }, { "epoch": 4.144, "grad_norm": 12.202047348022461, "learning_rate": 1.7166000000000002e-06, "loss": 1.401, "step": 41440 }, { "epoch": 4.145, "grad_norm": 5.149089336395264, "learning_rate": 1.7146e-06, "loss": 1.3211, "step": 41450 }, { "epoch": 4.146, "grad_norm": 6.976104736328125, "learning_rate": 1.7126000000000001e-06, "loss": 1.1609, "step": 41460 }, { "epoch": 4.147, "grad_norm": 11.347419738769531, "learning_rate": 1.7106e-06, "loss": 1.0482, "step": 41470 }, { "epoch": 4.148, "grad_norm": 9.633705139160156, "learning_rate": 1.7086000000000003e-06, "loss": 1.4378, "step": 41480 }, { "epoch": 4.149, "grad_norm": 16.018856048583984, "learning_rate": 1.7066e-06, "loss": 1.2125, "step": 41490 }, { "epoch": 4.15, "grad_norm": 11.410557746887207, "learning_rate": 1.7046000000000002e-06, "loss": 1.0709, "step": 41500 }, { "epoch": 4.151, "grad_norm": 9.486397743225098, "learning_rate": 1.7026e-06, "loss": 0.9924, "step": 41510 }, { "epoch": 4.152, "grad_norm": 13.585552215576172, "learning_rate": 1.7006e-06, "loss": 1.1015, "step": 41520 }, { "epoch": 4.153, "grad_norm": 12.157424926757812, "learning_rate": 1.6986000000000003e-06, "loss": 1.141, "step": 41530 }, { "epoch": 4.154, "grad_norm": 12.895865440368652, "learning_rate": 1.6966000000000002e-06, "loss": 1.1158, "step": 41540 }, { "epoch": 4.155, "grad_norm": 15.088886260986328, "learning_rate": 1.6946e-06, "loss": 1.2589, "step": 41550 }, { "epoch": 4.156, "grad_norm": 13.889354705810547, "learning_rate": 1.6926000000000001e-06, "loss": 1.1019, "step": 41560 }, { "epoch": 4.157, "grad_norm": 11.485007286071777, "learning_rate": 1.6906e-06, "loss": 1.029, "step": 41570 }, { "epoch": 4.158, "grad_norm": 10.110180854797363, "learning_rate": 1.6886000000000002e-06, "loss": 1.3946, "step": 41580 }, { "epoch": 4.159, "grad_norm": 7.414301872253418, "learning_rate": 1.6866e-06, "loss": 1.2109, "step": 41590 }, { "epoch": 4.16, "grad_norm": 16.165037155151367, "learning_rate": 1.6846000000000001e-06, "loss": 1.6414, "step": 41600 }, { "epoch": 4.161, "grad_norm": 12.594076156616211, "learning_rate": 1.6826e-06, "loss": 1.142, "step": 41610 }, { "epoch": 4.162, "grad_norm": 13.732057571411133, "learning_rate": 1.6806e-06, "loss": 1.0289, "step": 41620 }, { "epoch": 4.163, "grad_norm": 14.548304557800293, "learning_rate": 1.6786000000000003e-06, "loss": 1.0753, "step": 41630 }, { "epoch": 4.164, "grad_norm": 13.17808723449707, "learning_rate": 1.6766000000000002e-06, "loss": 1.1996, "step": 41640 }, { "epoch": 4.165, "grad_norm": 17.996610641479492, "learning_rate": 1.6746e-06, "loss": 1.2579, "step": 41650 }, { "epoch": 4.166, "grad_norm": 15.47800350189209, "learning_rate": 1.6726e-06, "loss": 1.0198, "step": 41660 }, { "epoch": 4.167, "grad_norm": 14.476968765258789, "learning_rate": 1.6706e-06, "loss": 1.0746, "step": 41670 }, { "epoch": 4.168, "grad_norm": 11.437701225280762, "learning_rate": 1.6686000000000002e-06, "loss": 1.0406, "step": 41680 }, { "epoch": 4.169, "grad_norm": 11.52318286895752, "learning_rate": 1.6666e-06, "loss": 1.2243, "step": 41690 }, { "epoch": 4.17, "grad_norm": 10.823590278625488, "learning_rate": 1.6646000000000001e-06, "loss": 1.4102, "step": 41700 }, { "epoch": 4.171, "grad_norm": 12.043392181396484, "learning_rate": 1.6626e-06, "loss": 1.1891, "step": 41710 }, { "epoch": 4.172, "grad_norm": 10.625138282775879, "learning_rate": 1.6606000000000002e-06, "loss": 1.226, "step": 41720 }, { "epoch": 4.173, "grad_norm": 13.608776092529297, "learning_rate": 1.6586000000000003e-06, "loss": 1.3988, "step": 41730 }, { "epoch": 4.174, "grad_norm": 9.5189790725708, "learning_rate": 1.6566000000000002e-06, "loss": 1.3347, "step": 41740 }, { "epoch": 4.175, "grad_norm": 12.359060287475586, "learning_rate": 1.6546e-06, "loss": 1.231, "step": 41750 }, { "epoch": 4.176, "grad_norm": 7.947897911071777, "learning_rate": 1.6526e-06, "loss": 1.0198, "step": 41760 }, { "epoch": 4.177, "grad_norm": 14.372332572937012, "learning_rate": 1.6506000000000003e-06, "loss": 1.1752, "step": 41770 }, { "epoch": 4.178, "grad_norm": 12.468786239624023, "learning_rate": 1.6486000000000002e-06, "loss": 1.239, "step": 41780 }, { "epoch": 4.179, "grad_norm": 11.222219467163086, "learning_rate": 1.6466e-06, "loss": 1.0053, "step": 41790 }, { "epoch": 4.18, "grad_norm": 7.42545747756958, "learning_rate": 1.6446e-06, "loss": 1.2222, "step": 41800 }, { "epoch": 4.181, "grad_norm": 21.787723541259766, "learning_rate": 1.6426e-06, "loss": 1.2944, "step": 41810 }, { "epoch": 4.182, "grad_norm": 9.543527603149414, "learning_rate": 1.6406000000000002e-06, "loss": 1.178, "step": 41820 }, { "epoch": 4.183, "grad_norm": 12.278138160705566, "learning_rate": 1.6386000000000003e-06, "loss": 1.1099, "step": 41830 }, { "epoch": 4.184, "grad_norm": 11.992142677307129, "learning_rate": 1.6366000000000001e-06, "loss": 1.1003, "step": 41840 }, { "epoch": 4.185, "grad_norm": 11.722626686096191, "learning_rate": 1.6346e-06, "loss": 1.2748, "step": 41850 }, { "epoch": 4.186, "grad_norm": 12.313939094543457, "learning_rate": 1.6326e-06, "loss": 0.986, "step": 41860 }, { "epoch": 4.187, "grad_norm": 15.035469055175781, "learning_rate": 1.6306000000000003e-06, "loss": 1.3171, "step": 41870 }, { "epoch": 4.188, "grad_norm": 15.173311233520508, "learning_rate": 1.6286000000000002e-06, "loss": 1.2455, "step": 41880 }, { "epoch": 4.189, "grad_norm": 33.420284271240234, "learning_rate": 1.6266e-06, "loss": 1.2802, "step": 41890 }, { "epoch": 4.19, "grad_norm": 11.91150951385498, "learning_rate": 1.6246e-06, "loss": 1.4153, "step": 41900 }, { "epoch": 4.191, "grad_norm": 11.003836631774902, "learning_rate": 1.6226e-06, "loss": 1.2033, "step": 41910 }, { "epoch": 4.192, "grad_norm": 11.56380844116211, "learning_rate": 1.6206000000000002e-06, "loss": 1.1708, "step": 41920 }, { "epoch": 4.193, "grad_norm": 17.97216796875, "learning_rate": 1.6186000000000003e-06, "loss": 1.1704, "step": 41930 }, { "epoch": 4.194, "grad_norm": 8.197113990783691, "learning_rate": 1.6166000000000001e-06, "loss": 1.3304, "step": 41940 }, { "epoch": 4.195, "grad_norm": 10.474298477172852, "learning_rate": 1.6146e-06, "loss": 1.4146, "step": 41950 }, { "epoch": 4.196, "grad_norm": 11.269421577453613, "learning_rate": 1.6126e-06, "loss": 1.3539, "step": 41960 }, { "epoch": 4.197, "grad_norm": 11.419291496276855, "learning_rate": 1.6106000000000003e-06, "loss": 1.1506, "step": 41970 }, { "epoch": 4.198, "grad_norm": 6.776576042175293, "learning_rate": 1.6086000000000002e-06, "loss": 1.0142, "step": 41980 }, { "epoch": 4.199, "grad_norm": 20.196964263916016, "learning_rate": 1.6066e-06, "loss": 1.6023, "step": 41990 }, { "epoch": 4.2, "grad_norm": 12.517870903015137, "learning_rate": 1.6046e-06, "loss": 1.3481, "step": 42000 }, { "epoch": 4.201, "grad_norm": 12.976771354675293, "learning_rate": 1.6026000000000003e-06, "loss": 1.1621, "step": 42010 }, { "epoch": 4.202, "grad_norm": 8.139640808105469, "learning_rate": 1.6006000000000002e-06, "loss": 1.4556, "step": 42020 }, { "epoch": 4.203, "grad_norm": 7.747459411621094, "learning_rate": 1.5986000000000002e-06, "loss": 1.1425, "step": 42030 }, { "epoch": 4.204, "grad_norm": 5.707986831665039, "learning_rate": 1.5966e-06, "loss": 1.0703, "step": 42040 }, { "epoch": 4.205, "grad_norm": 16.931550979614258, "learning_rate": 1.5946e-06, "loss": 1.3725, "step": 42050 }, { "epoch": 4.206, "grad_norm": 8.46220588684082, "learning_rate": 1.5926000000000002e-06, "loss": 0.9888, "step": 42060 }, { "epoch": 4.207, "grad_norm": 6.342188358306885, "learning_rate": 1.5906000000000003e-06, "loss": 1.0109, "step": 42070 }, { "epoch": 4.208, "grad_norm": 13.291387557983398, "learning_rate": 1.5886000000000001e-06, "loss": 1.3269, "step": 42080 }, { "epoch": 4.209, "grad_norm": 13.284997940063477, "learning_rate": 1.5866e-06, "loss": 0.9398, "step": 42090 }, { "epoch": 4.21, "grad_norm": 8.189059257507324, "learning_rate": 1.5846e-06, "loss": 1.3285, "step": 42100 }, { "epoch": 4.211, "grad_norm": 14.197911262512207, "learning_rate": 1.5826000000000003e-06, "loss": 1.3225, "step": 42110 }, { "epoch": 4.212, "grad_norm": 9.91851806640625, "learning_rate": 1.5806000000000002e-06, "loss": 1.1409, "step": 42120 }, { "epoch": 4.213, "grad_norm": 10.834633827209473, "learning_rate": 1.5786000000000002e-06, "loss": 1.1824, "step": 42130 }, { "epoch": 4.214, "grad_norm": 13.863362312316895, "learning_rate": 1.5766e-06, "loss": 1.3068, "step": 42140 }, { "epoch": 4.215, "grad_norm": 7.597848892211914, "learning_rate": 1.5746e-06, "loss": 1.4018, "step": 42150 }, { "epoch": 4.216, "grad_norm": 14.048933029174805, "learning_rate": 1.5726000000000002e-06, "loss": 1.394, "step": 42160 }, { "epoch": 4.217, "grad_norm": 4.949995994567871, "learning_rate": 1.5706000000000003e-06, "loss": 1.0513, "step": 42170 }, { "epoch": 4.218, "grad_norm": 7.5058417320251465, "learning_rate": 1.5686000000000001e-06, "loss": 1.1949, "step": 42180 }, { "epoch": 4.219, "grad_norm": 13.691729545593262, "learning_rate": 1.5666e-06, "loss": 1.0162, "step": 42190 }, { "epoch": 4.22, "grad_norm": 7.545902729034424, "learning_rate": 1.5646e-06, "loss": 1.2785, "step": 42200 }, { "epoch": 4.221, "grad_norm": 11.473980903625488, "learning_rate": 1.5626000000000003e-06, "loss": 1.1402, "step": 42210 }, { "epoch": 4.222, "grad_norm": 10.146150588989258, "learning_rate": 1.5606000000000001e-06, "loss": 0.9795, "step": 42220 }, { "epoch": 4.223, "grad_norm": 12.926558494567871, "learning_rate": 1.5586000000000002e-06, "loss": 1.292, "step": 42230 }, { "epoch": 4.224, "grad_norm": 14.715789794921875, "learning_rate": 1.5566e-06, "loss": 1.0579, "step": 42240 }, { "epoch": 4.225, "grad_norm": 6.069051742553711, "learning_rate": 1.5546e-06, "loss": 1.0203, "step": 42250 }, { "epoch": 4.226, "grad_norm": 13.52805233001709, "learning_rate": 1.5526000000000002e-06, "loss": 1.0253, "step": 42260 }, { "epoch": 4.227, "grad_norm": 11.812847137451172, "learning_rate": 1.5506000000000002e-06, "loss": 1.0679, "step": 42270 }, { "epoch": 4.228, "grad_norm": 5.778861999511719, "learning_rate": 1.5486e-06, "loss": 1.2474, "step": 42280 }, { "epoch": 4.229, "grad_norm": 15.532513618469238, "learning_rate": 1.5466000000000002e-06, "loss": 1.3213, "step": 42290 }, { "epoch": 4.23, "grad_norm": 9.661678314208984, "learning_rate": 1.5446e-06, "loss": 0.9315, "step": 42300 }, { "epoch": 4.231, "grad_norm": 14.60292911529541, "learning_rate": 1.5426000000000003e-06, "loss": 1.2677, "step": 42310 }, { "epoch": 4.232, "grad_norm": 9.481269836425781, "learning_rate": 1.5406000000000001e-06, "loss": 1.3159, "step": 42320 }, { "epoch": 4.233, "grad_norm": 14.147126197814941, "learning_rate": 1.5386000000000002e-06, "loss": 1.3403, "step": 42330 }, { "epoch": 4.234, "grad_norm": 13.610280990600586, "learning_rate": 1.5366e-06, "loss": 1.2386, "step": 42340 }, { "epoch": 4.235, "grad_norm": 8.153485298156738, "learning_rate": 1.5346000000000003e-06, "loss": 1.3709, "step": 42350 }, { "epoch": 4.236, "grad_norm": 9.820429801940918, "learning_rate": 1.5326000000000002e-06, "loss": 1.3812, "step": 42360 }, { "epoch": 4.237, "grad_norm": 6.833520412445068, "learning_rate": 1.5306000000000002e-06, "loss": 1.374, "step": 42370 }, { "epoch": 4.2379999999999995, "grad_norm": 17.202497482299805, "learning_rate": 1.5286e-06, "loss": 1.3166, "step": 42380 }, { "epoch": 4.239, "grad_norm": 7.4985671043396, "learning_rate": 1.5266000000000001e-06, "loss": 1.13, "step": 42390 }, { "epoch": 4.24, "grad_norm": 16.179689407348633, "learning_rate": 1.5246000000000002e-06, "loss": 1.1315, "step": 42400 }, { "epoch": 4.241, "grad_norm": 13.8903169631958, "learning_rate": 1.5226000000000003e-06, "loss": 1.7474, "step": 42410 }, { "epoch": 4.242, "grad_norm": 12.180191993713379, "learning_rate": 1.5206e-06, "loss": 1.3245, "step": 42420 }, { "epoch": 4.243, "grad_norm": 7.618919849395752, "learning_rate": 1.5186000000000002e-06, "loss": 1.1929, "step": 42430 }, { "epoch": 4.244, "grad_norm": 11.210671424865723, "learning_rate": 1.5166e-06, "loss": 1.0975, "step": 42440 }, { "epoch": 4.245, "grad_norm": 10.878150939941406, "learning_rate": 1.5146000000000003e-06, "loss": 1.0727, "step": 42450 }, { "epoch": 4.246, "grad_norm": 13.134357452392578, "learning_rate": 1.5126000000000001e-06, "loss": 0.9415, "step": 42460 }, { "epoch": 4.247, "grad_norm": 13.007227897644043, "learning_rate": 1.5106000000000002e-06, "loss": 1.394, "step": 42470 }, { "epoch": 4.248, "grad_norm": 19.258075714111328, "learning_rate": 1.5086e-06, "loss": 0.8896, "step": 42480 }, { "epoch": 4.249, "grad_norm": 9.763400077819824, "learning_rate": 1.5066000000000001e-06, "loss": 1.2964, "step": 42490 }, { "epoch": 4.25, "grad_norm": 11.92331600189209, "learning_rate": 1.5046000000000002e-06, "loss": 1.0579, "step": 42500 }, { "epoch": 4.251, "grad_norm": 8.701257705688477, "learning_rate": 1.5026000000000002e-06, "loss": 1.1746, "step": 42510 }, { "epoch": 4.252, "grad_norm": 12.065032005310059, "learning_rate": 1.5006e-06, "loss": 1.2111, "step": 42520 }, { "epoch": 4.253, "grad_norm": 4.863498210906982, "learning_rate": 1.4986000000000001e-06, "loss": 1.0392, "step": 42530 }, { "epoch": 4.254, "grad_norm": 10.97297191619873, "learning_rate": 1.4966e-06, "loss": 1.4045, "step": 42540 }, { "epoch": 4.255, "grad_norm": 8.891632080078125, "learning_rate": 1.4946000000000003e-06, "loss": 1.1909, "step": 42550 }, { "epoch": 4.256, "grad_norm": 9.45926570892334, "learning_rate": 1.4926000000000001e-06, "loss": 1.1709, "step": 42560 }, { "epoch": 4.257, "grad_norm": 18.058170318603516, "learning_rate": 1.4906000000000002e-06, "loss": 1.0483, "step": 42570 }, { "epoch": 4.258, "grad_norm": 13.907476425170898, "learning_rate": 1.4886e-06, "loss": 1.3481, "step": 42580 }, { "epoch": 4.259, "grad_norm": 8.60117244720459, "learning_rate": 1.4866e-06, "loss": 1.0892, "step": 42590 }, { "epoch": 4.26, "grad_norm": 15.78613567352295, "learning_rate": 1.4846000000000002e-06, "loss": 1.1301, "step": 42600 }, { "epoch": 4.261, "grad_norm": 11.812270164489746, "learning_rate": 1.4826000000000002e-06, "loss": 1.0476, "step": 42610 }, { "epoch": 4.2620000000000005, "grad_norm": 8.921037673950195, "learning_rate": 1.4806e-06, "loss": 1.1405, "step": 42620 }, { "epoch": 4.263, "grad_norm": 15.478119850158691, "learning_rate": 1.4786000000000001e-06, "loss": 1.2588, "step": 42630 }, { "epoch": 4.264, "grad_norm": 5.920109272003174, "learning_rate": 1.4766000000000002e-06, "loss": 0.6452, "step": 42640 }, { "epoch": 4.265, "grad_norm": 11.602609634399414, "learning_rate": 1.4746000000000002e-06, "loss": 1.1806, "step": 42650 }, { "epoch": 4.266, "grad_norm": 6.678112030029297, "learning_rate": 1.4726e-06, "loss": 1.1743, "step": 42660 }, { "epoch": 4.267, "grad_norm": 14.070079803466797, "learning_rate": 1.4706000000000002e-06, "loss": 1.0412, "step": 42670 }, { "epoch": 4.268, "grad_norm": 11.089306831359863, "learning_rate": 1.4686e-06, "loss": 0.9969, "step": 42680 }, { "epoch": 4.269, "grad_norm": 14.99970817565918, "learning_rate": 1.4666000000000003e-06, "loss": 1.1971, "step": 42690 }, { "epoch": 4.27, "grad_norm": 28.522066116333008, "learning_rate": 1.4646000000000001e-06, "loss": 1.3571, "step": 42700 }, { "epoch": 4.271, "grad_norm": 8.218709945678711, "learning_rate": 1.4626000000000002e-06, "loss": 1.2694, "step": 42710 }, { "epoch": 4.272, "grad_norm": 16.78716468811035, "learning_rate": 1.4606e-06, "loss": 1.2732, "step": 42720 }, { "epoch": 4.273, "grad_norm": 14.564489364624023, "learning_rate": 1.4586e-06, "loss": 1.3585, "step": 42730 }, { "epoch": 4.274, "grad_norm": 14.192450523376465, "learning_rate": 1.4566000000000002e-06, "loss": 0.959, "step": 42740 }, { "epoch": 4.275, "grad_norm": 10.582295417785645, "learning_rate": 1.4546000000000002e-06, "loss": 1.1632, "step": 42750 }, { "epoch": 4.276, "grad_norm": 15.510900497436523, "learning_rate": 1.4526e-06, "loss": 1.2442, "step": 42760 }, { "epoch": 4.277, "grad_norm": 18.288005828857422, "learning_rate": 1.4506000000000001e-06, "loss": 1.5427, "step": 42770 }, { "epoch": 4.2780000000000005, "grad_norm": 16.209915161132812, "learning_rate": 1.4486e-06, "loss": 1.0817, "step": 42780 }, { "epoch": 4.279, "grad_norm": 6.620324611663818, "learning_rate": 1.4466000000000003e-06, "loss": 1.2953, "step": 42790 }, { "epoch": 4.28, "grad_norm": 8.770933151245117, "learning_rate": 1.4446000000000001e-06, "loss": 1.0811, "step": 42800 }, { "epoch": 4.281, "grad_norm": 13.173994064331055, "learning_rate": 1.4426000000000002e-06, "loss": 1.153, "step": 42810 }, { "epoch": 4.282, "grad_norm": 27.04987907409668, "learning_rate": 1.4406e-06, "loss": 1.2555, "step": 42820 }, { "epoch": 4.283, "grad_norm": 9.495498657226562, "learning_rate": 1.4386e-06, "loss": 0.8203, "step": 42830 }, { "epoch": 4.284, "grad_norm": 22.464887619018555, "learning_rate": 1.4366000000000001e-06, "loss": 1.673, "step": 42840 }, { "epoch": 4.285, "grad_norm": 5.643133163452148, "learning_rate": 1.4346000000000002e-06, "loss": 1.5075, "step": 42850 }, { "epoch": 4.286, "grad_norm": 20.533233642578125, "learning_rate": 1.4326e-06, "loss": 1.3315, "step": 42860 }, { "epoch": 4.287, "grad_norm": 22.95311164855957, "learning_rate": 1.4306000000000001e-06, "loss": 1.1111, "step": 42870 }, { "epoch": 4.288, "grad_norm": 13.716428756713867, "learning_rate": 1.4286e-06, "loss": 1.2944, "step": 42880 }, { "epoch": 4.289, "grad_norm": 10.717076301574707, "learning_rate": 1.4266000000000002e-06, "loss": 1.2748, "step": 42890 }, { "epoch": 4.29, "grad_norm": 19.787290573120117, "learning_rate": 1.4246e-06, "loss": 1.3453, "step": 42900 }, { "epoch": 4.291, "grad_norm": 11.54902458190918, "learning_rate": 1.4226000000000002e-06, "loss": 1.2583, "step": 42910 }, { "epoch": 4.292, "grad_norm": 10.28400707244873, "learning_rate": 1.4206e-06, "loss": 1.1863, "step": 42920 }, { "epoch": 4.293, "grad_norm": 6.025252342224121, "learning_rate": 1.4186000000000003e-06, "loss": 1.1802, "step": 42930 }, { "epoch": 4.294, "grad_norm": 8.44946002960205, "learning_rate": 1.4166000000000001e-06, "loss": 1.1142, "step": 42940 }, { "epoch": 4.295, "grad_norm": 10.501981735229492, "learning_rate": 1.4146000000000002e-06, "loss": 0.7323, "step": 42950 }, { "epoch": 4.296, "grad_norm": 11.423309326171875, "learning_rate": 1.4126e-06, "loss": 1.0169, "step": 42960 }, { "epoch": 4.297, "grad_norm": 19.452367782592773, "learning_rate": 1.4106e-06, "loss": 1.2269, "step": 42970 }, { "epoch": 4.298, "grad_norm": 2.3416810035705566, "learning_rate": 1.4086000000000002e-06, "loss": 0.8725, "step": 42980 }, { "epoch": 4.299, "grad_norm": 9.213696479797363, "learning_rate": 1.4066000000000002e-06, "loss": 1.1701, "step": 42990 }, { "epoch": 4.3, "grad_norm": 13.338786125183105, "learning_rate": 1.4046e-06, "loss": 1.3718, "step": 43000 }, { "epoch": 4.301, "grad_norm": 13.33263111114502, "learning_rate": 1.4026000000000001e-06, "loss": 1.3392, "step": 43010 }, { "epoch": 4.302, "grad_norm": 10.395807266235352, "learning_rate": 1.4006e-06, "loss": 1.2067, "step": 43020 }, { "epoch": 4.303, "grad_norm": 15.992231369018555, "learning_rate": 1.3986000000000003e-06, "loss": 1.0926, "step": 43030 }, { "epoch": 4.304, "grad_norm": 13.822946548461914, "learning_rate": 1.3966e-06, "loss": 0.9846, "step": 43040 }, { "epoch": 4.305, "grad_norm": 5.524209499359131, "learning_rate": 1.3946000000000002e-06, "loss": 1.1787, "step": 43050 }, { "epoch": 4.306, "grad_norm": 8.380218505859375, "learning_rate": 1.3926e-06, "loss": 0.8057, "step": 43060 }, { "epoch": 4.307, "grad_norm": 12.662495613098145, "learning_rate": 1.3906e-06, "loss": 1.3693, "step": 43070 }, { "epoch": 4.308, "grad_norm": 10.296587944030762, "learning_rate": 1.3886000000000001e-06, "loss": 1.1256, "step": 43080 }, { "epoch": 4.309, "grad_norm": 12.945707321166992, "learning_rate": 1.3866000000000002e-06, "loss": 1.406, "step": 43090 }, { "epoch": 4.31, "grad_norm": 12.78066349029541, "learning_rate": 1.3846e-06, "loss": 1.1685, "step": 43100 }, { "epoch": 4.311, "grad_norm": 20.51262092590332, "learning_rate": 1.3826000000000001e-06, "loss": 1.1261, "step": 43110 }, { "epoch": 4.312, "grad_norm": 16.806411743164062, "learning_rate": 1.3806e-06, "loss": 1.3507, "step": 43120 }, { "epoch": 4.313, "grad_norm": 12.26422119140625, "learning_rate": 1.3786000000000002e-06, "loss": 1.0511, "step": 43130 }, { "epoch": 4.314, "grad_norm": 15.59670639038086, "learning_rate": 1.3766e-06, "loss": 1.2806, "step": 43140 }, { "epoch": 4.315, "grad_norm": 5.737452507019043, "learning_rate": 1.3746000000000001e-06, "loss": 0.9962, "step": 43150 }, { "epoch": 4.316, "grad_norm": 10.49535846710205, "learning_rate": 1.3726e-06, "loss": 0.972, "step": 43160 }, { "epoch": 4.317, "grad_norm": 21.199840545654297, "learning_rate": 1.3706e-06, "loss": 1.428, "step": 43170 }, { "epoch": 4.318, "grad_norm": 16.872013092041016, "learning_rate": 1.3686000000000001e-06, "loss": 1.5275, "step": 43180 }, { "epoch": 4.319, "grad_norm": 31.777938842773438, "learning_rate": 1.3666000000000002e-06, "loss": 1.4393, "step": 43190 }, { "epoch": 4.32, "grad_norm": 18.654401779174805, "learning_rate": 1.3646e-06, "loss": 1.3364, "step": 43200 }, { "epoch": 4.321, "grad_norm": 38.17814254760742, "learning_rate": 1.3626e-06, "loss": 1.3232, "step": 43210 }, { "epoch": 4.322, "grad_norm": 19.660913467407227, "learning_rate": 1.3606e-06, "loss": 0.9777, "step": 43220 }, { "epoch": 4.323, "grad_norm": 12.522295951843262, "learning_rate": 1.3586000000000002e-06, "loss": 1.1329, "step": 43230 }, { "epoch": 4.324, "grad_norm": 15.212803840637207, "learning_rate": 1.3566e-06, "loss": 1.3582, "step": 43240 }, { "epoch": 4.325, "grad_norm": NaN, "learning_rate": 1.3548e-06, "loss": 1.1773, "step": 43250 }, { "epoch": 4.326, "grad_norm": 11.102665901184082, "learning_rate": 1.3528000000000002e-06, "loss": 1.2735, "step": 43260 }, { "epoch": 4.327, "grad_norm": 14.183083534240723, "learning_rate": 1.3508000000000003e-06, "loss": 1.1452, "step": 43270 }, { "epoch": 4.328, "grad_norm": 22.030437469482422, "learning_rate": 1.3488000000000001e-06, "loss": 1.1008, "step": 43280 }, { "epoch": 4.329, "grad_norm": 13.09237003326416, "learning_rate": 1.3468e-06, "loss": 1.4895, "step": 43290 }, { "epoch": 4.33, "grad_norm": 11.282512664794922, "learning_rate": 1.3448e-06, "loss": 1.2394, "step": 43300 }, { "epoch": 4.331, "grad_norm": 1.3490010499954224, "learning_rate": 1.3428000000000003e-06, "loss": 0.9285, "step": 43310 }, { "epoch": 4.332, "grad_norm": 22.665935516357422, "learning_rate": 1.3408000000000001e-06, "loss": 1.1549, "step": 43320 }, { "epoch": 4.333, "grad_norm": 15.375754356384277, "learning_rate": 1.3388e-06, "loss": 1.4157, "step": 43330 }, { "epoch": 4.334, "grad_norm": 9.134504318237305, "learning_rate": 1.3368e-06, "loss": 1.1429, "step": 43340 }, { "epoch": 4.335, "grad_norm": 13.859049797058105, "learning_rate": 1.3348e-06, "loss": 1.2506, "step": 43350 }, { "epoch": 4.336, "grad_norm": 14.404821395874023, "learning_rate": 1.3328000000000002e-06, "loss": 1.2961, "step": 43360 }, { "epoch": 4.337, "grad_norm": 8.67965030670166, "learning_rate": 1.3308000000000002e-06, "loss": 1.3487, "step": 43370 }, { "epoch": 4.338, "grad_norm": 19.78135108947754, "learning_rate": 1.3288e-06, "loss": 1.2629, "step": 43380 }, { "epoch": 4.339, "grad_norm": 14.800707817077637, "learning_rate": 1.3268e-06, "loss": 1.3937, "step": 43390 }, { "epoch": 4.34, "grad_norm": 20.397619247436523, "learning_rate": 1.3248e-06, "loss": 1.1691, "step": 43400 }, { "epoch": 4.341, "grad_norm": 13.792983055114746, "learning_rate": 1.3228000000000003e-06, "loss": 1.4167, "step": 43410 }, { "epoch": 4.342, "grad_norm": 23.5787353515625, "learning_rate": 1.3208000000000001e-06, "loss": 1.1171, "step": 43420 }, { "epoch": 4.343, "grad_norm": 12.105937957763672, "learning_rate": 1.3188e-06, "loss": 1.0271, "step": 43430 }, { "epoch": 4.344, "grad_norm": 14.096811294555664, "learning_rate": 1.3168e-06, "loss": 1.3655, "step": 43440 }, { "epoch": 4.345, "grad_norm": 12.389496803283691, "learning_rate": 1.3148000000000003e-06, "loss": 1.0132, "step": 43450 }, { "epoch": 4.346, "grad_norm": 15.320001602172852, "learning_rate": 1.3128000000000002e-06, "loss": 1.3265, "step": 43460 }, { "epoch": 4.3469999999999995, "grad_norm": 10.351358413696289, "learning_rate": 1.3108000000000002e-06, "loss": 1.4966, "step": 43470 }, { "epoch": 4.348, "grad_norm": 13.598313331604004, "learning_rate": 1.3088e-06, "loss": 1.0844, "step": 43480 }, { "epoch": 4.349, "grad_norm": 12.720455169677734, "learning_rate": 1.3068e-06, "loss": 1.1904, "step": 43490 }, { "epoch": 4.35, "grad_norm": 6.777400016784668, "learning_rate": 1.3048000000000002e-06, "loss": 0.9154, "step": 43500 }, { "epoch": 4.351, "grad_norm": 12.014396667480469, "learning_rate": 1.3028000000000003e-06, "loss": 1.3557, "step": 43510 }, { "epoch": 4.352, "grad_norm": 11.211977005004883, "learning_rate": 1.3008000000000001e-06, "loss": 1.3752, "step": 43520 }, { "epoch": 4.353, "grad_norm": 14.944233894348145, "learning_rate": 1.2988e-06, "loss": 1.3909, "step": 43530 }, { "epoch": 4.354, "grad_norm": 11.956809997558594, "learning_rate": 1.2968e-06, "loss": 1.0699, "step": 43540 }, { "epoch": 4.355, "grad_norm": 9.909040451049805, "learning_rate": 1.2948000000000003e-06, "loss": 1.4794, "step": 43550 }, { "epoch": 4.356, "grad_norm": 15.240025520324707, "learning_rate": 1.2928000000000001e-06, "loss": 1.4488, "step": 43560 }, { "epoch": 4.357, "grad_norm": 8.844791412353516, "learning_rate": 1.2908000000000002e-06, "loss": 1.0958, "step": 43570 }, { "epoch": 4.358, "grad_norm": 9.6756591796875, "learning_rate": 1.2888e-06, "loss": 0.8995, "step": 43580 }, { "epoch": 4.359, "grad_norm": 18.973949432373047, "learning_rate": 1.2868e-06, "loss": 0.9949, "step": 43590 }, { "epoch": 4.36, "grad_norm": 15.670254707336426, "learning_rate": 1.2848000000000002e-06, "loss": 0.9962, "step": 43600 }, { "epoch": 4.361, "grad_norm": 13.083096504211426, "learning_rate": 1.2828000000000002e-06, "loss": 0.9308, "step": 43610 }, { "epoch": 4.362, "grad_norm": 11.109720230102539, "learning_rate": 1.2808e-06, "loss": 1.591, "step": 43620 }, { "epoch": 4.3629999999999995, "grad_norm": 13.654829025268555, "learning_rate": 1.2788e-06, "loss": 1.4531, "step": 43630 }, { "epoch": 4.364, "grad_norm": 6.143967628479004, "learning_rate": 1.2768e-06, "loss": 1.1401, "step": 43640 }, { "epoch": 4.365, "grad_norm": 15.476044654846191, "learning_rate": 1.2748000000000003e-06, "loss": 1.3296, "step": 43650 }, { "epoch": 4.366, "grad_norm": 3.998551845550537, "learning_rate": 1.2728000000000001e-06, "loss": 0.9657, "step": 43660 }, { "epoch": 4.367, "grad_norm": 12.290226936340332, "learning_rate": 1.2708000000000002e-06, "loss": 1.2447, "step": 43670 }, { "epoch": 4.368, "grad_norm": 34.229671478271484, "learning_rate": 1.2688e-06, "loss": 1.5007, "step": 43680 }, { "epoch": 4.369, "grad_norm": 8.965970993041992, "learning_rate": 1.2667999999999999e-06, "loss": 1.2106, "step": 43690 }, { "epoch": 4.37, "grad_norm": 15.695554733276367, "learning_rate": 1.2648000000000002e-06, "loss": 1.465, "step": 43700 }, { "epoch": 4.371, "grad_norm": 13.82835865020752, "learning_rate": 1.2628000000000002e-06, "loss": 1.4113, "step": 43710 }, { "epoch": 4.372, "grad_norm": 16.688308715820312, "learning_rate": 1.2608e-06, "loss": 1.5424, "step": 43720 }, { "epoch": 4.373, "grad_norm": 10.421135902404785, "learning_rate": 1.2588e-06, "loss": 0.959, "step": 43730 }, { "epoch": 4.374, "grad_norm": 15.913087844848633, "learning_rate": 1.2568e-06, "loss": 1.2969, "step": 43740 }, { "epoch": 4.375, "grad_norm": 12.842989921569824, "learning_rate": 1.2548000000000003e-06, "loss": 1.0343, "step": 43750 }, { "epoch": 4.376, "grad_norm": 10.219537734985352, "learning_rate": 1.2528e-06, "loss": 0.9154, "step": 43760 }, { "epoch": 4.377, "grad_norm": 16.006826400756836, "learning_rate": 1.2508000000000002e-06, "loss": 1.2348, "step": 43770 }, { "epoch": 4.378, "grad_norm": 16.137121200561523, "learning_rate": 1.2488000000000002e-06, "loss": 1.2526, "step": 43780 }, { "epoch": 4.379, "grad_norm": 10.725802421569824, "learning_rate": 1.2468e-06, "loss": 1.0305, "step": 43790 }, { "epoch": 4.38, "grad_norm": 5.87778902053833, "learning_rate": 1.2448000000000001e-06, "loss": 1.1073, "step": 43800 }, { "epoch": 4.381, "grad_norm": 13.906825065612793, "learning_rate": 1.2428000000000002e-06, "loss": 1.2134, "step": 43810 }, { "epoch": 4.382, "grad_norm": 16.410289764404297, "learning_rate": 1.2408e-06, "loss": 1.2322, "step": 43820 }, { "epoch": 4.383, "grad_norm": 7.902137279510498, "learning_rate": 1.2388000000000001e-06, "loss": 1.1249, "step": 43830 }, { "epoch": 4.384, "grad_norm": 14.29797649383545, "learning_rate": 1.2368000000000002e-06, "loss": 1.3817, "step": 43840 }, { "epoch": 4.385, "grad_norm": 9.195354461669922, "learning_rate": 1.2348000000000002e-06, "loss": 1.1532, "step": 43850 }, { "epoch": 4.386, "grad_norm": 17.04749298095703, "learning_rate": 1.2328e-06, "loss": 0.9869, "step": 43860 }, { "epoch": 4.3870000000000005, "grad_norm": 20.284353256225586, "learning_rate": 1.2308000000000001e-06, "loss": 1.1726, "step": 43870 }, { "epoch": 4.388, "grad_norm": 14.562019348144531, "learning_rate": 1.2288000000000002e-06, "loss": 1.1699, "step": 43880 }, { "epoch": 4.389, "grad_norm": 8.904990196228027, "learning_rate": 1.2268e-06, "loss": 1.158, "step": 43890 }, { "epoch": 4.39, "grad_norm": 13.205660820007324, "learning_rate": 1.2248000000000001e-06, "loss": 1.2469, "step": 43900 }, { "epoch": 4.391, "grad_norm": 17.042478561401367, "learning_rate": 1.2228000000000002e-06, "loss": 0.9635, "step": 43910 }, { "epoch": 4.392, "grad_norm": 8.866844177246094, "learning_rate": 1.2208e-06, "loss": 1.2868, "step": 43920 }, { "epoch": 4.393, "grad_norm": 8.501789093017578, "learning_rate": 1.2188e-06, "loss": 1.0242, "step": 43930 }, { "epoch": 4.394, "grad_norm": 23.30021095275879, "learning_rate": 1.2168000000000001e-06, "loss": 1.4853, "step": 43940 }, { "epoch": 4.395, "grad_norm": 17.1911563873291, "learning_rate": 1.2148000000000002e-06, "loss": 1.0925, "step": 43950 }, { "epoch": 4.396, "grad_norm": 15.650598526000977, "learning_rate": 1.2128e-06, "loss": 1.3663, "step": 43960 }, { "epoch": 4.397, "grad_norm": 16.84084701538086, "learning_rate": 1.2108000000000001e-06, "loss": 1.0722, "step": 43970 }, { "epoch": 4.398, "grad_norm": 17.762165069580078, "learning_rate": 1.2088000000000002e-06, "loss": 1.3709, "step": 43980 }, { "epoch": 4.399, "grad_norm": 11.632526397705078, "learning_rate": 1.2068e-06, "loss": 1.2177, "step": 43990 }, { "epoch": 4.4, "grad_norm": 13.640563011169434, "learning_rate": 1.2048e-06, "loss": 0.8434, "step": 44000 }, { "epoch": 4.401, "grad_norm": 10.316097259521484, "learning_rate": 1.2028000000000002e-06, "loss": 1.3039, "step": 44010 }, { "epoch": 4.402, "grad_norm": 6.941383361816406, "learning_rate": 1.2008000000000002e-06, "loss": 1.2211, "step": 44020 }, { "epoch": 4.4030000000000005, "grad_norm": 13.917546272277832, "learning_rate": 1.1988e-06, "loss": 1.3521, "step": 44030 }, { "epoch": 4.404, "grad_norm": 14.317952156066895, "learning_rate": 1.1968000000000001e-06, "loss": 1.1778, "step": 44040 }, { "epoch": 4.405, "grad_norm": 7.717184066772461, "learning_rate": 1.1948000000000002e-06, "loss": 1.0299, "step": 44050 }, { "epoch": 4.406, "grad_norm": 11.814888000488281, "learning_rate": 1.1928e-06, "loss": 1.2623, "step": 44060 }, { "epoch": 4.407, "grad_norm": 9.014458656311035, "learning_rate": 1.1908e-06, "loss": 1.1546, "step": 44070 }, { "epoch": 4.408, "grad_norm": 21.461891174316406, "learning_rate": 1.1888000000000002e-06, "loss": 1.3219, "step": 44080 }, { "epoch": 4.409, "grad_norm": 11.546101570129395, "learning_rate": 1.1868e-06, "loss": 1.0107, "step": 44090 }, { "epoch": 4.41, "grad_norm": 23.834896087646484, "learning_rate": 1.1848e-06, "loss": 1.5033, "step": 44100 }, { "epoch": 4.411, "grad_norm": 14.650790214538574, "learning_rate": 1.1828000000000001e-06, "loss": 1.2097, "step": 44110 }, { "epoch": 4.412, "grad_norm": 9.673583030700684, "learning_rate": 1.1808000000000002e-06, "loss": 1.0217, "step": 44120 }, { "epoch": 4.413, "grad_norm": 12.877738952636719, "learning_rate": 1.1788e-06, "loss": 1.3918, "step": 44130 }, { "epoch": 4.414, "grad_norm": 10.822298049926758, "learning_rate": 1.1768000000000001e-06, "loss": 0.991, "step": 44140 }, { "epoch": 4.415, "grad_norm": 10.710716247558594, "learning_rate": 1.1748000000000002e-06, "loss": 1.096, "step": 44150 }, { "epoch": 4.416, "grad_norm": 12.057084083557129, "learning_rate": 1.1728e-06, "loss": 1.1085, "step": 44160 }, { "epoch": 4.417, "grad_norm": 7.847001552581787, "learning_rate": 1.1708e-06, "loss": 1.1216, "step": 44170 }, { "epoch": 4.418, "grad_norm": 8.232547760009766, "learning_rate": 1.1688000000000001e-06, "loss": 1.5202, "step": 44180 }, { "epoch": 4.419, "grad_norm": 13.313277244567871, "learning_rate": 1.1668000000000002e-06, "loss": 1.2862, "step": 44190 }, { "epoch": 4.42, "grad_norm": 9.098024368286133, "learning_rate": 1.1648e-06, "loss": 1.3506, "step": 44200 }, { "epoch": 4.421, "grad_norm": 9.868247985839844, "learning_rate": 1.1628000000000001e-06, "loss": 1.2714, "step": 44210 }, { "epoch": 4.422, "grad_norm": 10.453042984008789, "learning_rate": 1.1608000000000002e-06, "loss": 1.3084, "step": 44220 }, { "epoch": 4.423, "grad_norm": 12.768912315368652, "learning_rate": 1.1588e-06, "loss": 1.1936, "step": 44230 }, { "epoch": 4.424, "grad_norm": 9.295141220092773, "learning_rate": 1.1568e-06, "loss": 1.3684, "step": 44240 }, { "epoch": 4.425, "grad_norm": 15.233993530273438, "learning_rate": 1.1548000000000001e-06, "loss": 1.1714, "step": 44250 }, { "epoch": 4.426, "grad_norm": 10.042970657348633, "learning_rate": 1.1528e-06, "loss": 1.0296, "step": 44260 }, { "epoch": 4.427, "grad_norm": 10.098040580749512, "learning_rate": 1.1508e-06, "loss": 1.1013, "step": 44270 }, { "epoch": 4.428, "grad_norm": 9.712102890014648, "learning_rate": 1.1488000000000001e-06, "loss": 1.1248, "step": 44280 }, { "epoch": 4.429, "grad_norm": 18.249608993530273, "learning_rate": 1.1468000000000002e-06, "loss": 1.4187, "step": 44290 }, { "epoch": 4.43, "grad_norm": 16.74721908569336, "learning_rate": 1.1448e-06, "loss": 1.5621, "step": 44300 }, { "epoch": 4.431, "grad_norm": 14.95107650756836, "learning_rate": 1.1428e-06, "loss": 1.1586, "step": 44310 }, { "epoch": 4.432, "grad_norm": 12.345484733581543, "learning_rate": 1.1408000000000002e-06, "loss": 1.0713, "step": 44320 }, { "epoch": 4.433, "grad_norm": 12.301072120666504, "learning_rate": 1.1388e-06, "loss": 1.3663, "step": 44330 }, { "epoch": 4.434, "grad_norm": 10.626651763916016, "learning_rate": 1.1368e-06, "loss": 1.3432, "step": 44340 }, { "epoch": 4.435, "grad_norm": 14.093714714050293, "learning_rate": 1.1348000000000001e-06, "loss": 1.2071, "step": 44350 }, { "epoch": 4.436, "grad_norm": 13.038942337036133, "learning_rate": 1.1328000000000002e-06, "loss": 1.4287, "step": 44360 }, { "epoch": 4.437, "grad_norm": 9.708054542541504, "learning_rate": 1.1308e-06, "loss": 1.3481, "step": 44370 }, { "epoch": 4.438, "grad_norm": 21.862512588500977, "learning_rate": 1.1288e-06, "loss": 1.086, "step": 44380 }, { "epoch": 4.439, "grad_norm": 12.62967300415039, "learning_rate": 1.1268000000000002e-06, "loss": 1.5712, "step": 44390 }, { "epoch": 4.44, "grad_norm": 9.676387786865234, "learning_rate": 1.1248e-06, "loss": 0.9263, "step": 44400 }, { "epoch": 4.441, "grad_norm": 7.884552955627441, "learning_rate": 1.1228e-06, "loss": 1.0347, "step": 44410 }, { "epoch": 4.442, "grad_norm": 9.043564796447754, "learning_rate": 1.1208000000000001e-06, "loss": 1.0765, "step": 44420 }, { "epoch": 4.443, "grad_norm": 11.958871841430664, "learning_rate": 1.1188e-06, "loss": 1.1014, "step": 44430 }, { "epoch": 4.444, "grad_norm": 15.675307273864746, "learning_rate": 1.1168e-06, "loss": 1.0939, "step": 44440 }, { "epoch": 4.445, "grad_norm": 22.52808380126953, "learning_rate": 1.1148000000000001e-06, "loss": 1.293, "step": 44450 }, { "epoch": 4.446, "grad_norm": 4.971675395965576, "learning_rate": 1.1128000000000002e-06, "loss": 1.2024, "step": 44460 }, { "epoch": 4.447, "grad_norm": 14.677552223205566, "learning_rate": 1.1108e-06, "loss": 1.0446, "step": 44470 }, { "epoch": 4.448, "grad_norm": 9.029471397399902, "learning_rate": 1.1088e-06, "loss": 0.8797, "step": 44480 }, { "epoch": 4.449, "grad_norm": 16.38544273376465, "learning_rate": 1.1068000000000001e-06, "loss": 1.3277, "step": 44490 }, { "epoch": 4.45, "grad_norm": 11.636033058166504, "learning_rate": 1.1048e-06, "loss": 1.0777, "step": 44500 }, { "epoch": 4.451, "grad_norm": 12.01616096496582, "learning_rate": 1.1028e-06, "loss": 1.2089, "step": 44510 }, { "epoch": 4.452, "grad_norm": 12.240152359008789, "learning_rate": 1.1008000000000001e-06, "loss": 0.9809, "step": 44520 }, { "epoch": 4.453, "grad_norm": 18.91676139831543, "learning_rate": 1.0988000000000002e-06, "loss": 1.2387, "step": 44530 }, { "epoch": 4.454, "grad_norm": 16.597375869750977, "learning_rate": 1.0968e-06, "loss": 1.178, "step": 44540 }, { "epoch": 4.455, "grad_norm": 9.751705169677734, "learning_rate": 1.0948e-06, "loss": 1.0856, "step": 44550 }, { "epoch": 4.456, "grad_norm": 18.176464080810547, "learning_rate": 1.0928000000000002e-06, "loss": 1.4579, "step": 44560 }, { "epoch": 4.457, "grad_norm": 18.464439392089844, "learning_rate": 1.0908e-06, "loss": 1.3066, "step": 44570 }, { "epoch": 4.458, "grad_norm": 20.847949981689453, "learning_rate": 1.0888e-06, "loss": 1.342, "step": 44580 }, { "epoch": 4.459, "grad_norm": 9.750897407531738, "learning_rate": 1.0868000000000001e-06, "loss": 1.3173, "step": 44590 }, { "epoch": 4.46, "grad_norm": 9.885807991027832, "learning_rate": 1.0848e-06, "loss": 1.0102, "step": 44600 }, { "epoch": 4.461, "grad_norm": 8.982739448547363, "learning_rate": 1.0828e-06, "loss": 0.9779, "step": 44610 }, { "epoch": 4.462, "grad_norm": 12.751099586486816, "learning_rate": 1.0808e-06, "loss": 1.3273, "step": 44620 }, { "epoch": 4.463, "grad_norm": 23.474525451660156, "learning_rate": 1.0788000000000002e-06, "loss": 1.5399, "step": 44630 }, { "epoch": 4.464, "grad_norm": 19.18785285949707, "learning_rate": 1.0768e-06, "loss": 1.5127, "step": 44640 }, { "epoch": 4.465, "grad_norm": 11.876036643981934, "learning_rate": 1.0748e-06, "loss": 0.8721, "step": 44650 }, { "epoch": 4.466, "grad_norm": 17.969038009643555, "learning_rate": 1.0728000000000001e-06, "loss": 1.4022, "step": 44660 }, { "epoch": 4.467, "grad_norm": 11.285443305969238, "learning_rate": 1.0708e-06, "loss": 1.3769, "step": 44670 }, { "epoch": 4.468, "grad_norm": 20.74547576904297, "learning_rate": 1.0688e-06, "loss": 1.3587, "step": 44680 }, { "epoch": 4.469, "grad_norm": 12.790069580078125, "learning_rate": 1.0668e-06, "loss": 1.0622, "step": 44690 }, { "epoch": 4.47, "grad_norm": 11.811049461364746, "learning_rate": 1.0648000000000002e-06, "loss": 1.0456, "step": 44700 }, { "epoch": 4.471, "grad_norm": 12.632521629333496, "learning_rate": 1.0628e-06, "loss": 1.0009, "step": 44710 }, { "epoch": 4.4719999999999995, "grad_norm": 15.765287399291992, "learning_rate": 1.0608e-06, "loss": 1.493, "step": 44720 }, { "epoch": 4.473, "grad_norm": 18.263980865478516, "learning_rate": 1.0588000000000001e-06, "loss": 1.2218, "step": 44730 }, { "epoch": 4.474, "grad_norm": 29.7799072265625, "learning_rate": 1.0568e-06, "loss": 1.0501, "step": 44740 }, { "epoch": 4.475, "grad_norm": 11.128750801086426, "learning_rate": 1.0548e-06, "loss": 1.3403, "step": 44750 }, { "epoch": 4.476, "grad_norm": 8.923828125, "learning_rate": 1.0528000000000001e-06, "loss": 1.2212, "step": 44760 }, { "epoch": 4.477, "grad_norm": 14.123628616333008, "learning_rate": 1.0508000000000002e-06, "loss": 1.2172, "step": 44770 }, { "epoch": 4.478, "grad_norm": 6.54686975479126, "learning_rate": 1.0488e-06, "loss": 1.0398, "step": 44780 }, { "epoch": 4.479, "grad_norm": 9.813666343688965, "learning_rate": 1.0468e-06, "loss": 1.1565, "step": 44790 }, { "epoch": 4.48, "grad_norm": 16.23066520690918, "learning_rate": 1.0448000000000001e-06, "loss": 1.3251, "step": 44800 }, { "epoch": 4.481, "grad_norm": 16.493261337280273, "learning_rate": 1.0428e-06, "loss": 1.4241, "step": 44810 }, { "epoch": 4.482, "grad_norm": 9.298705101013184, "learning_rate": 1.0408e-06, "loss": 1.076, "step": 44820 }, { "epoch": 4.483, "grad_norm": 7.988087177276611, "learning_rate": 1.0388000000000001e-06, "loss": 1.1292, "step": 44830 }, { "epoch": 4.484, "grad_norm": 16.799821853637695, "learning_rate": 1.0368e-06, "loss": 1.3639, "step": 44840 }, { "epoch": 4.485, "grad_norm": 9.939912796020508, "learning_rate": 1.0348e-06, "loss": 1.1368, "step": 44850 }, { "epoch": 4.486, "grad_norm": 20.80634117126465, "learning_rate": 1.0328e-06, "loss": 1.2258, "step": 44860 }, { "epoch": 4.487, "grad_norm": 16.750511169433594, "learning_rate": 1.0308000000000002e-06, "loss": 1.5867, "step": 44870 }, { "epoch": 4.4879999999999995, "grad_norm": 13.10855770111084, "learning_rate": 1.0288e-06, "loss": 0.9852, "step": 44880 }, { "epoch": 4.489, "grad_norm": 7.233658790588379, "learning_rate": 1.0268e-06, "loss": 1.0319, "step": 44890 }, { "epoch": 4.49, "grad_norm": 12.920644760131836, "learning_rate": 1.0248000000000001e-06, "loss": 1.1584, "step": 44900 }, { "epoch": 4.491, "grad_norm": 4.467281818389893, "learning_rate": 1.0228e-06, "loss": 0.9391, "step": 44910 }, { "epoch": 4.492, "grad_norm": 12.5606050491333, "learning_rate": 1.0208e-06, "loss": 1.2854, "step": 44920 }, { "epoch": 4.493, "grad_norm": 13.043724060058594, "learning_rate": 1.0188e-06, "loss": 1.1845, "step": 44930 }, { "epoch": 4.494, "grad_norm": 10.879907608032227, "learning_rate": 1.0168000000000002e-06, "loss": 1.1141, "step": 44940 }, { "epoch": 4.495, "grad_norm": 15.108843803405762, "learning_rate": 1.0148e-06, "loss": 1.5535, "step": 44950 }, { "epoch": 4.496, "grad_norm": 21.082801818847656, "learning_rate": 1.0128e-06, "loss": 1.4444, "step": 44960 }, { "epoch": 4.497, "grad_norm": 7.4850335121154785, "learning_rate": 1.0108000000000001e-06, "loss": 1.1552, "step": 44970 }, { "epoch": 4.498, "grad_norm": 11.211822509765625, "learning_rate": 1.0088e-06, "loss": 1.2358, "step": 44980 }, { "epoch": 4.499, "grad_norm": 12.080265998840332, "learning_rate": 1.0068e-06, "loss": 1.1477, "step": 44990 }, { "epoch": 4.5, "grad_norm": 7.907993316650391, "learning_rate": 1.0048e-06, "loss": 0.9031, "step": 45000 }, { "epoch": 4.501, "grad_norm": 12.462440490722656, "learning_rate": 1.0028e-06, "loss": 1.2671, "step": 45010 }, { "epoch": 4.502, "grad_norm": 14.184640884399414, "learning_rate": 1.0008e-06, "loss": 0.9419, "step": 45020 }, { "epoch": 4.503, "grad_norm": 10.356825828552246, "learning_rate": 9.988e-07, "loss": 0.8624, "step": 45030 }, { "epoch": 4.504, "grad_norm": 12.827052116394043, "learning_rate": 9.968000000000001e-07, "loss": 1.048, "step": 45040 }, { "epoch": 4.505, "grad_norm": 18.788280487060547, "learning_rate": 9.948e-07, "loss": 1.4307, "step": 45050 }, { "epoch": 4.506, "grad_norm": 17.474475860595703, "learning_rate": 9.928e-07, "loss": 0.9155, "step": 45060 }, { "epoch": 4.507, "grad_norm": 17.623973846435547, "learning_rate": 9.908000000000001e-07, "loss": 1.3295, "step": 45070 }, { "epoch": 4.508, "grad_norm": 17.83444595336914, "learning_rate": 9.888e-07, "loss": 1.3053, "step": 45080 }, { "epoch": 4.509, "grad_norm": 21.881563186645508, "learning_rate": 9.868e-07, "loss": 1.107, "step": 45090 }, { "epoch": 4.51, "grad_norm": 5.887456893920898, "learning_rate": 9.848e-07, "loss": 0.9895, "step": 45100 }, { "epoch": 4.511, "grad_norm": 22.119016647338867, "learning_rate": 9.828000000000001e-07, "loss": 1.6439, "step": 45110 }, { "epoch": 4.5120000000000005, "grad_norm": 14.420635223388672, "learning_rate": 9.808e-07, "loss": 1.1708, "step": 45120 }, { "epoch": 4.513, "grad_norm": 6.702330589294434, "learning_rate": 9.788e-07, "loss": 0.9334, "step": 45130 }, { "epoch": 4.514, "grad_norm": 14.82872200012207, "learning_rate": 9.768000000000001e-07, "loss": 1.2124, "step": 45140 }, { "epoch": 4.515, "grad_norm": 17.46520233154297, "learning_rate": 9.748e-07, "loss": 1.0953, "step": 45150 }, { "epoch": 4.516, "grad_norm": 8.21792221069336, "learning_rate": 9.728e-07, "loss": 0.9612, "step": 45160 }, { "epoch": 4.517, "grad_norm": 22.9891357421875, "learning_rate": 9.708e-07, "loss": 1.1273, "step": 45170 }, { "epoch": 4.518, "grad_norm": 11.858664512634277, "learning_rate": 9.688e-07, "loss": 1.1488, "step": 45180 }, { "epoch": 4.519, "grad_norm": 13.18388557434082, "learning_rate": 9.668e-07, "loss": 1.1118, "step": 45190 }, { "epoch": 4.52, "grad_norm": 2.9013054370880127, "learning_rate": 9.648e-07, "loss": 1.223, "step": 45200 }, { "epoch": 4.521, "grad_norm": 19.261333465576172, "learning_rate": 9.628000000000001e-07, "loss": 1.1818, "step": 45210 }, { "epoch": 4.522, "grad_norm": 16.674442291259766, "learning_rate": 9.608e-07, "loss": 1.3043, "step": 45220 }, { "epoch": 4.523, "grad_norm": 5.136440753936768, "learning_rate": 9.588000000000002e-07, "loss": 1.1746, "step": 45230 }, { "epoch": 4.524, "grad_norm": 23.157920837402344, "learning_rate": 9.568e-07, "loss": 1.4457, "step": 45240 }, { "epoch": 4.525, "grad_norm": 12.679525375366211, "learning_rate": 9.548e-07, "loss": 0.9297, "step": 45250 }, { "epoch": 4.526, "grad_norm": 17.756479263305664, "learning_rate": 9.53e-07, "loss": 1.1816, "step": 45260 }, { "epoch": 4.527, "grad_norm": 16.485782623291016, "learning_rate": 9.510000000000001e-07, "loss": 1.0918, "step": 45270 }, { "epoch": 4.5280000000000005, "grad_norm": 10.827254295349121, "learning_rate": 9.49e-07, "loss": 1.4318, "step": 45280 }, { "epoch": 4.529, "grad_norm": 15.99609661102295, "learning_rate": 9.470000000000001e-07, "loss": 1.1154, "step": 45290 }, { "epoch": 4.53, "grad_norm": 7.454152584075928, "learning_rate": 9.450000000000001e-07, "loss": 1.1527, "step": 45300 }, { "epoch": 4.531, "grad_norm": 9.543957710266113, "learning_rate": 9.43e-07, "loss": 1.3053, "step": 45310 }, { "epoch": 4.532, "grad_norm": 22.00533103942871, "learning_rate": 9.410000000000001e-07, "loss": 1.6652, "step": 45320 }, { "epoch": 4.533, "grad_norm": 14.583036422729492, "learning_rate": 9.39e-07, "loss": 1.0041, "step": 45330 }, { "epoch": 4.534, "grad_norm": 11.578705787658691, "learning_rate": 9.370000000000001e-07, "loss": 1.2948, "step": 45340 }, { "epoch": 4.535, "grad_norm": 8.279053688049316, "learning_rate": 9.35e-07, "loss": 1.0111, "step": 45350 }, { "epoch": 4.536, "grad_norm": 22.226892471313477, "learning_rate": 9.33e-07, "loss": 1.4414, "step": 45360 }, { "epoch": 4.537, "grad_norm": 13.62691593170166, "learning_rate": 9.310000000000001e-07, "loss": 1.5267, "step": 45370 }, { "epoch": 4.538, "grad_norm": 8.916881561279297, "learning_rate": 9.29e-07, "loss": 1.0944, "step": 45380 }, { "epoch": 4.539, "grad_norm": 20.211593627929688, "learning_rate": 9.270000000000001e-07, "loss": 1.1892, "step": 45390 }, { "epoch": 4.54, "grad_norm": 11.86705207824707, "learning_rate": 9.25e-07, "loss": 1.1245, "step": 45400 }, { "epoch": 4.541, "grad_norm": 10.393292427062988, "learning_rate": 9.23e-07, "loss": 1.1736, "step": 45410 }, { "epoch": 4.542, "grad_norm": 16.37786102294922, "learning_rate": 9.210000000000001e-07, "loss": 1.1475, "step": 45420 }, { "epoch": 4.543, "grad_norm": 9.762177467346191, "learning_rate": 9.19e-07, "loss": 1.033, "step": 45430 }, { "epoch": 4.5440000000000005, "grad_norm": 7.364811897277832, "learning_rate": 9.170000000000001e-07, "loss": 1.3335, "step": 45440 }, { "epoch": 4.545, "grad_norm": 17.16506004333496, "learning_rate": 9.15e-07, "loss": 1.3847, "step": 45450 }, { "epoch": 4.546, "grad_norm": 18.746536254882812, "learning_rate": 9.130000000000001e-07, "loss": 1.3621, "step": 45460 }, { "epoch": 4.547, "grad_norm": 10.805517196655273, "learning_rate": 9.11e-07, "loss": 1.0436, "step": 45470 }, { "epoch": 4.548, "grad_norm": 16.004335403442383, "learning_rate": 9.09e-07, "loss": 1.0413, "step": 45480 }, { "epoch": 4.549, "grad_norm": 24.71526527404785, "learning_rate": 9.070000000000001e-07, "loss": 1.0547, "step": 45490 }, { "epoch": 4.55, "grad_norm": 13.247299194335938, "learning_rate": 9.05e-07, "loss": 1.0919, "step": 45500 }, { "epoch": 4.551, "grad_norm": 10.122688293457031, "learning_rate": 9.030000000000001e-07, "loss": 1.2741, "step": 45510 }, { "epoch": 4.552, "grad_norm": 9.91264533996582, "learning_rate": 9.01e-07, "loss": 1.4275, "step": 45520 }, { "epoch": 4.553, "grad_norm": 13.605655670166016, "learning_rate": 8.99e-07, "loss": 0.9316, "step": 45530 }, { "epoch": 4.554, "grad_norm": 13.55661678314209, "learning_rate": 8.97e-07, "loss": 1.3873, "step": 45540 }, { "epoch": 4.555, "grad_norm": 12.084312438964844, "learning_rate": 8.95e-07, "loss": 1.1712, "step": 45550 }, { "epoch": 4.556, "grad_norm": 15.211421012878418, "learning_rate": 8.930000000000001e-07, "loss": 1.0998, "step": 45560 }, { "epoch": 4.557, "grad_norm": 25.85140609741211, "learning_rate": 8.91e-07, "loss": 1.4608, "step": 45570 }, { "epoch": 4.558, "grad_norm": 21.0035400390625, "learning_rate": 8.890000000000002e-07, "loss": 1.1427, "step": 45580 }, { "epoch": 4.559, "grad_norm": 26.64835548400879, "learning_rate": 8.87e-07, "loss": 1.3206, "step": 45590 }, { "epoch": 4.5600000000000005, "grad_norm": 14.099993705749512, "learning_rate": 8.85e-07, "loss": 1.1786, "step": 45600 }, { "epoch": 4.561, "grad_norm": 16.141645431518555, "learning_rate": 8.830000000000001e-07, "loss": 1.5023, "step": 45610 }, { "epoch": 4.562, "grad_norm": 17.344619750976562, "learning_rate": 8.81e-07, "loss": 1.5007, "step": 45620 }, { "epoch": 4.563, "grad_norm": 8.740788459777832, "learning_rate": 8.790000000000002e-07, "loss": 0.9042, "step": 45630 }, { "epoch": 4.564, "grad_norm": 14.385453224182129, "learning_rate": 8.77e-07, "loss": 0.9082, "step": 45640 }, { "epoch": 4.5649999999999995, "grad_norm": 12.349804878234863, "learning_rate": 8.75e-07, "loss": 1.2775, "step": 45650 }, { "epoch": 4.566, "grad_norm": 12.564716339111328, "learning_rate": 8.73e-07, "loss": 1.1562, "step": 45660 }, { "epoch": 4.567, "grad_norm": 14.569131851196289, "learning_rate": 8.71e-07, "loss": 1.5238, "step": 45670 }, { "epoch": 4.568, "grad_norm": 13.026010513305664, "learning_rate": 8.690000000000002e-07, "loss": 1.2976, "step": 45680 }, { "epoch": 4.569, "grad_norm": 12.02340030670166, "learning_rate": 8.67e-07, "loss": 1.1673, "step": 45690 }, { "epoch": 4.57, "grad_norm": 15.371016502380371, "learning_rate": 8.65e-07, "loss": 1.5111, "step": 45700 }, { "epoch": 4.571, "grad_norm": 21.097440719604492, "learning_rate": 8.63e-07, "loss": 1.2596, "step": 45710 }, { "epoch": 4.572, "grad_norm": 15.545428276062012, "learning_rate": 8.61e-07, "loss": 1.2803, "step": 45720 }, { "epoch": 4.573, "grad_norm": 12.099672317504883, "learning_rate": 8.590000000000002e-07, "loss": 1.5753, "step": 45730 }, { "epoch": 4.574, "grad_norm": 9.94796371459961, "learning_rate": 8.57e-07, "loss": 1.1295, "step": 45740 }, { "epoch": 4.575, "grad_norm": 19.44548225402832, "learning_rate": 8.550000000000002e-07, "loss": 1.2763, "step": 45750 }, { "epoch": 4.576, "grad_norm": 18.572851181030273, "learning_rate": 8.53e-07, "loss": 1.1081, "step": 45760 }, { "epoch": 4.577, "grad_norm": 12.524190902709961, "learning_rate": 8.51e-07, "loss": 1.2211, "step": 45770 }, { "epoch": 4.578, "grad_norm": 14.56291675567627, "learning_rate": 8.490000000000002e-07, "loss": 1.0363, "step": 45780 }, { "epoch": 4.579, "grad_norm": 11.310712814331055, "learning_rate": 8.47e-07, "loss": 0.9598, "step": 45790 }, { "epoch": 4.58, "grad_norm": 10.459589958190918, "learning_rate": 8.450000000000002e-07, "loss": 1.0308, "step": 45800 }, { "epoch": 4.5809999999999995, "grad_norm": 10.84343433380127, "learning_rate": 8.43e-07, "loss": 1.1629, "step": 45810 }, { "epoch": 4.582, "grad_norm": 8.123089790344238, "learning_rate": 8.41e-07, "loss": 0.9752, "step": 45820 }, { "epoch": 4.583, "grad_norm": 15.375628471374512, "learning_rate": 8.390000000000001e-07, "loss": 1.1532, "step": 45830 }, { "epoch": 4.584, "grad_norm": 17.439382553100586, "learning_rate": 8.37e-07, "loss": 1.3285, "step": 45840 }, { "epoch": 4.585, "grad_norm": 17.947872161865234, "learning_rate": 8.350000000000002e-07, "loss": 0.9963, "step": 45850 }, { "epoch": 4.586, "grad_norm": 10.751060485839844, "learning_rate": 8.33e-07, "loss": 1.3295, "step": 45860 }, { "epoch": 4.587, "grad_norm": 8.439923286437988, "learning_rate": 8.31e-07, "loss": 1.1668, "step": 45870 }, { "epoch": 4.588, "grad_norm": 3.5884368419647217, "learning_rate": 8.290000000000001e-07, "loss": 1.2362, "step": 45880 }, { "epoch": 4.589, "grad_norm": 36.69309616088867, "learning_rate": 8.27e-07, "loss": 1.1977, "step": 45890 }, { "epoch": 4.59, "grad_norm": 11.824286460876465, "learning_rate": 8.250000000000001e-07, "loss": 1.1475, "step": 45900 }, { "epoch": 4.591, "grad_norm": 6.288175582885742, "learning_rate": 8.23e-07, "loss": 1.3439, "step": 45910 }, { "epoch": 4.592, "grad_norm": 5.075621604919434, "learning_rate": 8.210000000000002e-07, "loss": 0.88, "step": 45920 }, { "epoch": 4.593, "grad_norm": 5.18319034576416, "learning_rate": 8.190000000000001e-07, "loss": 0.7931, "step": 45930 }, { "epoch": 4.594, "grad_norm": 13.778714179992676, "learning_rate": 8.17e-07, "loss": 1.1927, "step": 45940 }, { "epoch": 4.595, "grad_norm": 9.419107437133789, "learning_rate": 8.150000000000001e-07, "loss": 1.0322, "step": 45950 }, { "epoch": 4.596, "grad_norm": 17.486082077026367, "learning_rate": 8.13e-07, "loss": 1.1056, "step": 45960 }, { "epoch": 4.5969999999999995, "grad_norm": 6.5352783203125, "learning_rate": 8.110000000000002e-07, "loss": 1.2042, "step": 45970 }, { "epoch": 4.598, "grad_norm": 24.053211212158203, "learning_rate": 8.090000000000001e-07, "loss": 1.0407, "step": 45980 }, { "epoch": 4.599, "grad_norm": 18.206750869750977, "learning_rate": 8.07e-07, "loss": 1.4328, "step": 45990 }, { "epoch": 4.6, "grad_norm": 16.106037139892578, "learning_rate": 8.050000000000001e-07, "loss": 0.637, "step": 46000 }, { "epoch": 4.601, "grad_norm": 14.437710762023926, "learning_rate": 8.03e-07, "loss": 1.1929, "step": 46010 }, { "epoch": 4.602, "grad_norm": 14.318513870239258, "learning_rate": 8.010000000000001e-07, "loss": 0.9723, "step": 46020 }, { "epoch": 4.603, "grad_norm": 13.886785507202148, "learning_rate": 7.990000000000001e-07, "loss": 1.0715, "step": 46030 }, { "epoch": 4.604, "grad_norm": 16.5504093170166, "learning_rate": 7.97e-07, "loss": 0.9057, "step": 46040 }, { "epoch": 4.605, "grad_norm": 20.915037155151367, "learning_rate": 7.950000000000001e-07, "loss": 1.3066, "step": 46050 }, { "epoch": 4.606, "grad_norm": 14.048877716064453, "learning_rate": 7.93e-07, "loss": 1.2183, "step": 46060 }, { "epoch": 4.607, "grad_norm": 18.454408645629883, "learning_rate": 7.910000000000001e-07, "loss": 1.4089, "step": 46070 }, { "epoch": 4.608, "grad_norm": 15.591011047363281, "learning_rate": 7.890000000000001e-07, "loss": 1.2236, "step": 46080 }, { "epoch": 4.609, "grad_norm": 15.324291229248047, "learning_rate": 7.870000000000002e-07, "loss": 1.3844, "step": 46090 }, { "epoch": 4.61, "grad_norm": 12.466224670410156, "learning_rate": 7.850000000000001e-07, "loss": 1.0285, "step": 46100 }, { "epoch": 4.611, "grad_norm": 14.653960227966309, "learning_rate": 7.83e-07, "loss": 0.9212, "step": 46110 }, { "epoch": 4.612, "grad_norm": 13.830098152160645, "learning_rate": 7.810000000000001e-07, "loss": 1.1619, "step": 46120 }, { "epoch": 4.6129999999999995, "grad_norm": 9.870322227478027, "learning_rate": 7.790000000000001e-07, "loss": 0.9849, "step": 46130 }, { "epoch": 4.614, "grad_norm": 8.710328102111816, "learning_rate": 7.770000000000001e-07, "loss": 1.2313, "step": 46140 }, { "epoch": 4.615, "grad_norm": 13.97889232635498, "learning_rate": 7.750000000000001e-07, "loss": 1.0132, "step": 46150 }, { "epoch": 4.616, "grad_norm": 2.6352128982543945, "learning_rate": 7.73e-07, "loss": 1.3696, "step": 46160 }, { "epoch": 4.617, "grad_norm": 16.821962356567383, "learning_rate": 7.710000000000001e-07, "loss": 0.9311, "step": 46170 }, { "epoch": 4.618, "grad_norm": 14.33956527709961, "learning_rate": 7.690000000000001e-07, "loss": 1.1282, "step": 46180 }, { "epoch": 4.619, "grad_norm": 12.25429916381836, "learning_rate": 7.670000000000001e-07, "loss": 1.3831, "step": 46190 }, { "epoch": 4.62, "grad_norm": 23.963905334472656, "learning_rate": 7.650000000000001e-07, "loss": 1.033, "step": 46200 }, { "epoch": 4.621, "grad_norm": 6.958648681640625, "learning_rate": 7.630000000000001e-07, "loss": 1.2374, "step": 46210 }, { "epoch": 4.622, "grad_norm": 11.383172035217285, "learning_rate": 7.610000000000001e-07, "loss": 1.1392, "step": 46220 }, { "epoch": 4.623, "grad_norm": 12.992165565490723, "learning_rate": 7.590000000000001e-07, "loss": 0.9534, "step": 46230 }, { "epoch": 4.624, "grad_norm": 10.452249526977539, "learning_rate": 7.570000000000001e-07, "loss": 1.1255, "step": 46240 }, { "epoch": 4.625, "grad_norm": 6.6197404861450195, "learning_rate": 7.550000000000001e-07, "loss": 1.2518, "step": 46250 }, { "epoch": 4.626, "grad_norm": 13.457558631896973, "learning_rate": 7.530000000000001e-07, "loss": 1.0255, "step": 46260 }, { "epoch": 4.627, "grad_norm": 5.6025710105896, "learning_rate": 7.510000000000001e-07, "loss": 0.9375, "step": 46270 }, { "epoch": 4.628, "grad_norm": 16.619482040405273, "learning_rate": 7.49e-07, "loss": 1.1988, "step": 46280 }, { "epoch": 4.629, "grad_norm": 11.013862609863281, "learning_rate": 7.470000000000001e-07, "loss": 0.9754, "step": 46290 }, { "epoch": 4.63, "grad_norm": 12.518280982971191, "learning_rate": 7.450000000000001e-07, "loss": 1.1967, "step": 46300 }, { "epoch": 4.631, "grad_norm": 12.71496868133545, "learning_rate": 7.430000000000001e-07, "loss": 0.9613, "step": 46310 }, { "epoch": 4.632, "grad_norm": 16.571823120117188, "learning_rate": 7.410000000000001e-07, "loss": 1.2779, "step": 46320 }, { "epoch": 4.633, "grad_norm": 18.211156845092773, "learning_rate": 7.39e-07, "loss": 1.2026, "step": 46330 }, { "epoch": 4.634, "grad_norm": 8.194426536560059, "learning_rate": 7.370000000000001e-07, "loss": 0.972, "step": 46340 }, { "epoch": 4.635, "grad_norm": 19.45575714111328, "learning_rate": 7.350000000000001e-07, "loss": 0.9582, "step": 46350 }, { "epoch": 4.636, "grad_norm": 11.803675651550293, "learning_rate": 7.330000000000001e-07, "loss": 1.2389, "step": 46360 }, { "epoch": 4.6370000000000005, "grad_norm": 153.16976928710938, "learning_rate": 7.310000000000001e-07, "loss": 1.3337, "step": 46370 }, { "epoch": 4.638, "grad_norm": 24.542015075683594, "learning_rate": 7.290000000000001e-07, "loss": 0.8875, "step": 46380 }, { "epoch": 4.639, "grad_norm": 12.287471771240234, "learning_rate": 7.270000000000001e-07, "loss": 1.3073, "step": 46390 }, { "epoch": 4.64, "grad_norm": 19.004915237426758, "learning_rate": 7.25e-07, "loss": 1.0772, "step": 46400 }, { "epoch": 4.641, "grad_norm": 11.591814994812012, "learning_rate": 7.230000000000001e-07, "loss": 1.1627, "step": 46410 }, { "epoch": 4.642, "grad_norm": 30.733745574951172, "learning_rate": 7.210000000000001e-07, "loss": 1.4903, "step": 46420 }, { "epoch": 4.643, "grad_norm": 5.502108097076416, "learning_rate": 7.190000000000001e-07, "loss": 0.9528, "step": 46430 }, { "epoch": 4.644, "grad_norm": 15.268719673156738, "learning_rate": 7.170000000000001e-07, "loss": 1.2738, "step": 46440 }, { "epoch": 4.645, "grad_norm": 26.345645904541016, "learning_rate": 7.15e-07, "loss": 1.2546, "step": 46450 }, { "epoch": 4.646, "grad_norm": 9.434538841247559, "learning_rate": 7.130000000000001e-07, "loss": 1.0788, "step": 46460 }, { "epoch": 4.647, "grad_norm": 10.561675071716309, "learning_rate": 7.110000000000001e-07, "loss": 1.0885, "step": 46470 }, { "epoch": 4.648, "grad_norm": 25.50137710571289, "learning_rate": 7.090000000000001e-07, "loss": 1.1187, "step": 46480 }, { "epoch": 4.649, "grad_norm": 23.4624080657959, "learning_rate": 7.070000000000001e-07, "loss": 1.1164, "step": 46490 }, { "epoch": 4.65, "grad_norm": 14.630852699279785, "learning_rate": 7.05e-07, "loss": 1.5121, "step": 46500 }, { "epoch": 4.651, "grad_norm": 18.04327964782715, "learning_rate": 7.030000000000001e-07, "loss": 1.1195, "step": 46510 }, { "epoch": 4.652, "grad_norm": 16.67776107788086, "learning_rate": 7.01e-07, "loss": 0.9331, "step": 46520 }, { "epoch": 4.6530000000000005, "grad_norm": 11.352432250976562, "learning_rate": 6.990000000000001e-07, "loss": 0.8758, "step": 46530 }, { "epoch": 4.654, "grad_norm": 18.093460083007812, "learning_rate": 6.970000000000001e-07, "loss": 1.423, "step": 46540 }, { "epoch": 4.655, "grad_norm": 15.090108871459961, "learning_rate": 6.950000000000001e-07, "loss": 1.1776, "step": 46550 }, { "epoch": 4.656, "grad_norm": 9.96672534942627, "learning_rate": 6.930000000000001e-07, "loss": 1.0361, "step": 46560 }, { "epoch": 4.657, "grad_norm": 16.421283721923828, "learning_rate": 6.91e-07, "loss": 1.1186, "step": 46570 }, { "epoch": 4.658, "grad_norm": 13.369482040405273, "learning_rate": 6.890000000000001e-07, "loss": 1.2256, "step": 46580 }, { "epoch": 4.659, "grad_norm": 20.24983787536621, "learning_rate": 6.87e-07, "loss": 1.4365, "step": 46590 }, { "epoch": 4.66, "grad_norm": 10.698502540588379, "learning_rate": 6.850000000000001e-07, "loss": 1.0873, "step": 46600 }, { "epoch": 4.661, "grad_norm": 9.67016315460205, "learning_rate": 6.830000000000001e-07, "loss": 1.117, "step": 46610 }, { "epoch": 4.662, "grad_norm": 13.585132598876953, "learning_rate": 6.81e-07, "loss": 1.6358, "step": 46620 }, { "epoch": 4.663, "grad_norm": 12.344901084899902, "learning_rate": 6.790000000000001e-07, "loss": 1.2569, "step": 46630 }, { "epoch": 4.664, "grad_norm": 9.834601402282715, "learning_rate": 6.77e-07, "loss": 1.4525, "step": 46640 }, { "epoch": 4.665, "grad_norm": 17.344913482666016, "learning_rate": 6.750000000000001e-07, "loss": 0.9851, "step": 46650 }, { "epoch": 4.666, "grad_norm": 19.977542877197266, "learning_rate": 6.730000000000001e-07, "loss": 1.3195, "step": 46660 }, { "epoch": 4.667, "grad_norm": 10.028802871704102, "learning_rate": 6.710000000000001e-07, "loss": 0.8771, "step": 46670 }, { "epoch": 4.668, "grad_norm": 24.09394645690918, "learning_rate": 6.690000000000001e-07, "loss": 1.036, "step": 46680 }, { "epoch": 4.6690000000000005, "grad_norm": 18.47347068786621, "learning_rate": 6.67e-07, "loss": 1.2684, "step": 46690 }, { "epoch": 4.67, "grad_norm": 16.756168365478516, "learning_rate": 6.650000000000001e-07, "loss": 1.4017, "step": 46700 }, { "epoch": 4.671, "grad_norm": 12.11337947845459, "learning_rate": 6.63e-07, "loss": 1.0991, "step": 46710 }, { "epoch": 4.672, "grad_norm": 11.619734764099121, "learning_rate": 6.610000000000001e-07, "loss": 1.2671, "step": 46720 }, { "epoch": 4.673, "grad_norm": 9.587692260742188, "learning_rate": 6.590000000000001e-07, "loss": 1.2271, "step": 46730 }, { "epoch": 4.674, "grad_norm": 9.928637504577637, "learning_rate": 6.57e-07, "loss": 0.8564, "step": 46740 }, { "epoch": 4.675, "grad_norm": 15.121119499206543, "learning_rate": 6.550000000000001e-07, "loss": 1.5737, "step": 46750 }, { "epoch": 4.676, "grad_norm": 19.570262908935547, "learning_rate": 6.53e-07, "loss": 1.2458, "step": 46760 }, { "epoch": 4.677, "grad_norm": 17.424236297607422, "learning_rate": 6.510000000000001e-07, "loss": 1.0676, "step": 46770 }, { "epoch": 4.678, "grad_norm": 9.264199256896973, "learning_rate": 6.490000000000001e-07, "loss": 1.3227, "step": 46780 }, { "epoch": 4.679, "grad_norm": 13.71746826171875, "learning_rate": 6.47e-07, "loss": 0.9078, "step": 46790 }, { "epoch": 4.68, "grad_norm": 20.35131072998047, "learning_rate": 6.450000000000001e-07, "loss": 1.6876, "step": 46800 }, { "epoch": 4.681, "grad_norm": 6.750603199005127, "learning_rate": 6.43e-07, "loss": 1.2762, "step": 46810 }, { "epoch": 4.682, "grad_norm": 12.444735527038574, "learning_rate": 6.410000000000001e-07, "loss": 1.3237, "step": 46820 }, { "epoch": 4.683, "grad_norm": 19.28900146484375, "learning_rate": 6.39e-07, "loss": 1.2636, "step": 46830 }, { "epoch": 4.684, "grad_norm": 8.048917770385742, "learning_rate": 6.370000000000001e-07, "loss": 1.2659, "step": 46840 }, { "epoch": 4.6850000000000005, "grad_norm": 10.288005828857422, "learning_rate": 6.350000000000001e-07, "loss": 0.9304, "step": 46850 }, { "epoch": 4.686, "grad_norm": 12.988576889038086, "learning_rate": 6.33e-07, "loss": 1.1941, "step": 46860 }, { "epoch": 4.687, "grad_norm": 9.227282524108887, "learning_rate": 6.310000000000001e-07, "loss": 0.8328, "step": 46870 }, { "epoch": 4.688, "grad_norm": 9.15925121307373, "learning_rate": 6.29e-07, "loss": 1.0504, "step": 46880 }, { "epoch": 4.689, "grad_norm": 12.733846664428711, "learning_rate": 6.270000000000001e-07, "loss": 1.4365, "step": 46890 }, { "epoch": 4.6899999999999995, "grad_norm": 16.01046371459961, "learning_rate": 6.25e-07, "loss": 1.0801, "step": 46900 }, { "epoch": 4.691, "grad_norm": 17.41234588623047, "learning_rate": 6.230000000000001e-07, "loss": 1.1822, "step": 46910 }, { "epoch": 4.692, "grad_norm": 16.686107635498047, "learning_rate": 6.210000000000001e-07, "loss": 1.4843, "step": 46920 }, { "epoch": 4.693, "grad_norm": 20.03928565979004, "learning_rate": 6.19e-07, "loss": 1.2936, "step": 46930 }, { "epoch": 4.694, "grad_norm": 10.024227142333984, "learning_rate": 6.17e-07, "loss": 1.2526, "step": 46940 }, { "epoch": 4.695, "grad_norm": 12.91640567779541, "learning_rate": 6.15e-07, "loss": 1.3999, "step": 46950 }, { "epoch": 4.696, "grad_norm": 20.425212860107422, "learning_rate": 6.130000000000001e-07, "loss": 1.3763, "step": 46960 }, { "epoch": 4.697, "grad_norm": 12.900979042053223, "learning_rate": 6.110000000000001e-07, "loss": 1.2411, "step": 46970 }, { "epoch": 4.698, "grad_norm": 9.59206771850586, "learning_rate": 6.090000000000001e-07, "loss": 1.2758, "step": 46980 }, { "epoch": 4.699, "grad_norm": 22.589502334594727, "learning_rate": 6.07e-07, "loss": 1.0573, "step": 46990 }, { "epoch": 4.7, "grad_norm": 14.937651634216309, "learning_rate": 6.05e-07, "loss": 1.2405, "step": 47000 }, { "epoch": 4.701, "grad_norm": 15.989797592163086, "learning_rate": 6.030000000000001e-07, "loss": 1.497, "step": 47010 }, { "epoch": 4.702, "grad_norm": 11.879857063293457, "learning_rate": 6.01e-07, "loss": 1.2846, "step": 47020 }, { "epoch": 4.703, "grad_norm": 26.237279891967773, "learning_rate": 5.990000000000001e-07, "loss": 1.1818, "step": 47030 }, { "epoch": 4.704, "grad_norm": 15.58030891418457, "learning_rate": 5.970000000000001e-07, "loss": 1.2153, "step": 47040 }, { "epoch": 4.705, "grad_norm": 13.050728797912598, "learning_rate": 5.95e-07, "loss": 1.3826, "step": 47050 }, { "epoch": 4.7059999999999995, "grad_norm": 11.890304565429688, "learning_rate": 5.930000000000001e-07, "loss": 1.5276, "step": 47060 }, { "epoch": 4.707, "grad_norm": 14.813711166381836, "learning_rate": 5.91e-07, "loss": 1.2225, "step": 47070 }, { "epoch": 4.708, "grad_norm": 11.343430519104004, "learning_rate": 5.890000000000001e-07, "loss": 1.2055, "step": 47080 }, { "epoch": 4.709, "grad_norm": 19.969602584838867, "learning_rate": 5.870000000000001e-07, "loss": 1.3754, "step": 47090 }, { "epoch": 4.71, "grad_norm": 12.194093704223633, "learning_rate": 5.850000000000001e-07, "loss": 1.3306, "step": 47100 }, { "epoch": 4.711, "grad_norm": 7.72124719619751, "learning_rate": 5.830000000000001e-07, "loss": 1.6323, "step": 47110 }, { "epoch": 4.712, "grad_norm": 17.651622772216797, "learning_rate": 5.81e-07, "loss": 1.1136, "step": 47120 }, { "epoch": 4.713, "grad_norm": 14.683650016784668, "learning_rate": 5.790000000000001e-07, "loss": 1.4829, "step": 47130 }, { "epoch": 4.714, "grad_norm": 14.822915077209473, "learning_rate": 5.77e-07, "loss": 1.1017, "step": 47140 }, { "epoch": 4.715, "grad_norm": 14.57126235961914, "learning_rate": 5.750000000000001e-07, "loss": 1.3147, "step": 47150 }, { "epoch": 4.716, "grad_norm": 15.750631332397461, "learning_rate": 5.730000000000001e-07, "loss": 1.4904, "step": 47160 }, { "epoch": 4.717, "grad_norm": 11.18923282623291, "learning_rate": 5.71e-07, "loss": 1.3491, "step": 47170 }, { "epoch": 4.718, "grad_norm": 15.406498908996582, "learning_rate": 5.690000000000001e-07, "loss": 1.1207, "step": 47180 }, { "epoch": 4.719, "grad_norm": 18.01078224182129, "learning_rate": 5.67e-07, "loss": 1.0605, "step": 47190 }, { "epoch": 4.72, "grad_norm": 13.256921768188477, "learning_rate": 5.650000000000001e-07, "loss": 1.2364, "step": 47200 }, { "epoch": 4.721, "grad_norm": 11.363579750061035, "learning_rate": 5.63e-07, "loss": 1.1781, "step": 47210 }, { "epoch": 4.7219999999999995, "grad_norm": 9.904160499572754, "learning_rate": 5.61e-07, "loss": 1.3883, "step": 47220 }, { "epoch": 4.723, "grad_norm": 15.038572311401367, "learning_rate": 5.590000000000001e-07, "loss": 1.5003, "step": 47230 }, { "epoch": 4.724, "grad_norm": 5.534049987792969, "learning_rate": 5.57e-07, "loss": 1.0048, "step": 47240 }, { "epoch": 4.725, "grad_norm": 7.939199924468994, "learning_rate": 5.550000000000001e-07, "loss": 0.9913, "step": 47250 }, { "epoch": 4.726, "grad_norm": 19.920578002929688, "learning_rate": 5.532000000000001e-07, "loss": 1.2557, "step": 47260 }, { "epoch": 4.727, "grad_norm": 5.960339069366455, "learning_rate": 5.512000000000001e-07, "loss": 0.9807, "step": 47270 }, { "epoch": 4.728, "grad_norm": 15.92531967163086, "learning_rate": 5.492e-07, "loss": 1.5984, "step": 47280 }, { "epoch": 4.729, "grad_norm": 17.15091896057129, "learning_rate": 5.472e-07, "loss": 1.3866, "step": 47290 }, { "epoch": 4.73, "grad_norm": 5.384702682495117, "learning_rate": 5.452e-07, "loss": 0.9851, "step": 47300 }, { "epoch": 4.731, "grad_norm": 16.594070434570312, "learning_rate": 5.432e-07, "loss": 1.2817, "step": 47310 }, { "epoch": 4.732, "grad_norm": 12.88974380493164, "learning_rate": 5.412000000000001e-07, "loss": 1.2175, "step": 47320 }, { "epoch": 4.733, "grad_norm": 6.686081886291504, "learning_rate": 5.392000000000001e-07, "loss": 0.8634, "step": 47330 }, { "epoch": 4.734, "grad_norm": 14.341961860656738, "learning_rate": 5.372e-07, "loss": 1.1407, "step": 47340 }, { "epoch": 4.735, "grad_norm": 16.38446617126465, "learning_rate": 5.352e-07, "loss": 1.4016, "step": 47350 }, { "epoch": 4.736, "grad_norm": 16.196006774902344, "learning_rate": 5.332e-07, "loss": 1.172, "step": 47360 }, { "epoch": 4.737, "grad_norm": 13.914946556091309, "learning_rate": 5.312000000000001e-07, "loss": 1.3749, "step": 47370 }, { "epoch": 4.7379999999999995, "grad_norm": 18.85614776611328, "learning_rate": 5.292000000000001e-07, "loss": 1.427, "step": 47380 }, { "epoch": 4.739, "grad_norm": 13.755147933959961, "learning_rate": 5.272000000000001e-07, "loss": 1.2009, "step": 47390 }, { "epoch": 4.74, "grad_norm": 24.512054443359375, "learning_rate": 5.252e-07, "loss": 1.1306, "step": 47400 }, { "epoch": 4.741, "grad_norm": 12.764700889587402, "learning_rate": 5.232e-07, "loss": 0.9431, "step": 47410 }, { "epoch": 4.742, "grad_norm": 10.962005615234375, "learning_rate": 5.212000000000001e-07, "loss": 1.0721, "step": 47420 }, { "epoch": 4.743, "grad_norm": 12.578849792480469, "learning_rate": 5.192e-07, "loss": 1.2918, "step": 47430 }, { "epoch": 4.744, "grad_norm": 10.939888954162598, "learning_rate": 5.172000000000001e-07, "loss": 1.4011, "step": 47440 }, { "epoch": 4.745, "grad_norm": 15.360641479492188, "learning_rate": 5.152000000000001e-07, "loss": 0.9199, "step": 47450 }, { "epoch": 4.746, "grad_norm": 11.202898025512695, "learning_rate": 5.132e-07, "loss": 1.3847, "step": 47460 }, { "epoch": 4.747, "grad_norm": 7.604897975921631, "learning_rate": 5.112000000000001e-07, "loss": 1.2839, "step": 47470 }, { "epoch": 4.748, "grad_norm": 14.356551170349121, "learning_rate": 5.092e-07, "loss": 1.208, "step": 47480 }, { "epoch": 4.749, "grad_norm": 14.392218589782715, "learning_rate": 5.072000000000001e-07, "loss": 1.4401, "step": 47490 }, { "epoch": 4.75, "grad_norm": 14.80517292022705, "learning_rate": 5.052e-07, "loss": 1.0192, "step": 47500 }, { "epoch": 4.751, "grad_norm": 16.063772201538086, "learning_rate": 5.032e-07, "loss": 1.2434, "step": 47510 }, { "epoch": 4.752, "grad_norm": 10.472773551940918, "learning_rate": 5.012000000000001e-07, "loss": 1.2228, "step": 47520 }, { "epoch": 4.753, "grad_norm": 18.22627830505371, "learning_rate": 4.992e-07, "loss": 1.1507, "step": 47530 }, { "epoch": 4.754, "grad_norm": 16.454776763916016, "learning_rate": 4.972000000000001e-07, "loss": 1.466, "step": 47540 }, { "epoch": 4.755, "grad_norm": 15.895477294921875, "learning_rate": 4.952e-07, "loss": 1.3118, "step": 47550 }, { "epoch": 4.756, "grad_norm": 14.383667945861816, "learning_rate": 4.932000000000001e-07, "loss": 1.0818, "step": 47560 }, { "epoch": 4.757, "grad_norm": 12.986364364624023, "learning_rate": 4.912000000000001e-07, "loss": 1.47, "step": 47570 }, { "epoch": 4.758, "grad_norm": 14.065024375915527, "learning_rate": 4.892e-07, "loss": 1.1655, "step": 47580 }, { "epoch": 4.759, "grad_norm": 11.062742233276367, "learning_rate": 4.872000000000001e-07, "loss": 1.1232, "step": 47590 }, { "epoch": 4.76, "grad_norm": 11.748169898986816, "learning_rate": 4.852e-07, "loss": 1.125, "step": 47600 }, { "epoch": 4.761, "grad_norm": 12.285787582397461, "learning_rate": 4.832000000000001e-07, "loss": 1.2583, "step": 47610 }, { "epoch": 4.7620000000000005, "grad_norm": 9.45317554473877, "learning_rate": 4.812e-07, "loss": 1.0138, "step": 47620 }, { "epoch": 4.763, "grad_norm": 8.378962516784668, "learning_rate": 4.792e-07, "loss": 1.3587, "step": 47630 }, { "epoch": 4.764, "grad_norm": 14.803422927856445, "learning_rate": 4.772000000000001e-07, "loss": 1.2268, "step": 47640 }, { "epoch": 4.765, "grad_norm": 13.518595695495605, "learning_rate": 4.752e-07, "loss": 0.7695, "step": 47650 }, { "epoch": 4.766, "grad_norm": 13.128028869628906, "learning_rate": 4.7320000000000003e-07, "loss": 1.0845, "step": 47660 }, { "epoch": 4.767, "grad_norm": 13.671338081359863, "learning_rate": 4.7120000000000004e-07, "loss": 1.3261, "step": 47670 }, { "epoch": 4.768, "grad_norm": 10.717680931091309, "learning_rate": 4.6920000000000005e-07, "loss": 1.2679, "step": 47680 }, { "epoch": 4.769, "grad_norm": 16.54100799560547, "learning_rate": 4.672e-07, "loss": 1.3786, "step": 47690 }, { "epoch": 4.77, "grad_norm": 7.2644147872924805, "learning_rate": 4.652e-07, "loss": 1.0476, "step": 47700 }, { "epoch": 4.771, "grad_norm": 9.183688163757324, "learning_rate": 4.632e-07, "loss": 1.2962, "step": 47710 }, { "epoch": 4.772, "grad_norm": 7.524464130401611, "learning_rate": 4.6120000000000003e-07, "loss": 1.0955, "step": 47720 }, { "epoch": 4.773, "grad_norm": 16.205413818359375, "learning_rate": 4.592000000000001e-07, "loss": 1.5885, "step": 47730 }, { "epoch": 4.774, "grad_norm": 17.883371353149414, "learning_rate": 4.572e-07, "loss": 1.1673, "step": 47740 }, { "epoch": 4.775, "grad_norm": 12.788191795349121, "learning_rate": 4.552e-07, "loss": 1.1878, "step": 47750 }, { "epoch": 4.776, "grad_norm": 13.053648948669434, "learning_rate": 4.532e-07, "loss": 0.7899, "step": 47760 }, { "epoch": 4.777, "grad_norm": 12.10445499420166, "learning_rate": 4.512e-07, "loss": 1.2416, "step": 47770 }, { "epoch": 4.7780000000000005, "grad_norm": 13.703575134277344, "learning_rate": 4.492000000000001e-07, "loss": 1.4151, "step": 47780 }, { "epoch": 4.779, "grad_norm": 16.382476806640625, "learning_rate": 4.472000000000001e-07, "loss": 1.2841, "step": 47790 }, { "epoch": 4.78, "grad_norm": 12.628400802612305, "learning_rate": 4.452e-07, "loss": 1.0288, "step": 47800 }, { "epoch": 4.781, "grad_norm": 13.718515396118164, "learning_rate": 4.432e-07, "loss": 1.2873, "step": 47810 }, { "epoch": 4.782, "grad_norm": 10.306167602539062, "learning_rate": 4.412e-07, "loss": 1.2439, "step": 47820 }, { "epoch": 4.783, "grad_norm": 12.161497116088867, "learning_rate": 4.3920000000000007e-07, "loss": 1.2693, "step": 47830 }, { "epoch": 4.784, "grad_norm": 8.344747543334961, "learning_rate": 4.372000000000001e-07, "loss": 1.2064, "step": 47840 }, { "epoch": 4.785, "grad_norm": 19.63575553894043, "learning_rate": 4.352000000000001e-07, "loss": 1.1449, "step": 47850 }, { "epoch": 4.786, "grad_norm": 11.102298736572266, "learning_rate": 4.332e-07, "loss": 1.3254, "step": 47860 }, { "epoch": 4.787, "grad_norm": 18.30681610107422, "learning_rate": 4.312e-07, "loss": 1.2388, "step": 47870 }, { "epoch": 4.788, "grad_norm": 4.387425899505615, "learning_rate": 4.2920000000000006e-07, "loss": 1.1437, "step": 47880 }, { "epoch": 4.789, "grad_norm": 17.47730827331543, "learning_rate": 4.2720000000000007e-07, "loss": 1.0814, "step": 47890 }, { "epoch": 4.79, "grad_norm": 11.70669174194336, "learning_rate": 4.252000000000001e-07, "loss": 1.0286, "step": 47900 }, { "epoch": 4.791, "grad_norm": 10.256853103637695, "learning_rate": 4.232000000000001e-07, "loss": 0.8169, "step": 47910 }, { "epoch": 4.792, "grad_norm": 7.251125335693359, "learning_rate": 4.212e-07, "loss": 1.1741, "step": 47920 }, { "epoch": 4.793, "grad_norm": 15.580222129821777, "learning_rate": 4.1920000000000005e-07, "loss": 1.1461, "step": 47930 }, { "epoch": 4.7940000000000005, "grad_norm": 14.123849868774414, "learning_rate": 4.1720000000000006e-07, "loss": 1.1517, "step": 47940 }, { "epoch": 4.795, "grad_norm": 15.716085433959961, "learning_rate": 4.1520000000000007e-07, "loss": 1.2605, "step": 47950 }, { "epoch": 4.796, "grad_norm": 6.7494096755981445, "learning_rate": 4.132000000000001e-07, "loss": 0.9225, "step": 47960 }, { "epoch": 4.797, "grad_norm": 13.01016616821289, "learning_rate": 4.112e-07, "loss": 1.0878, "step": 47970 }, { "epoch": 4.798, "grad_norm": 7.401202201843262, "learning_rate": 4.0920000000000004e-07, "loss": 1.4309, "step": 47980 }, { "epoch": 4.799, "grad_norm": 10.253573417663574, "learning_rate": 4.0720000000000005e-07, "loss": 1.2938, "step": 47990 }, { "epoch": 4.8, "grad_norm": 9.439580917358398, "learning_rate": 4.0520000000000005e-07, "loss": 0.8881, "step": 48000 }, { "epoch": 4.801, "grad_norm": 11.027134895324707, "learning_rate": 4.0320000000000006e-07, "loss": 0.7573, "step": 48010 }, { "epoch": 4.802, "grad_norm": 6.751844882965088, "learning_rate": 4.0120000000000007e-07, "loss": 1.0886, "step": 48020 }, { "epoch": 4.803, "grad_norm": 13.410517692565918, "learning_rate": 3.9920000000000003e-07, "loss": 1.1182, "step": 48030 }, { "epoch": 4.804, "grad_norm": 6.167282581329346, "learning_rate": 3.9720000000000004e-07, "loss": 1.3046, "step": 48040 }, { "epoch": 4.805, "grad_norm": 8.469466209411621, "learning_rate": 3.9520000000000004e-07, "loss": 1.1764, "step": 48050 }, { "epoch": 4.806, "grad_norm": 12.131616592407227, "learning_rate": 3.9320000000000005e-07, "loss": 1.2145, "step": 48060 }, { "epoch": 4.807, "grad_norm": 10.844801902770996, "learning_rate": 3.9120000000000006e-07, "loss": 1.2735, "step": 48070 }, { "epoch": 4.808, "grad_norm": 20.290603637695312, "learning_rate": 3.8920000000000007e-07, "loss": 1.1103, "step": 48080 }, { "epoch": 4.809, "grad_norm": 14.296882629394531, "learning_rate": 3.872e-07, "loss": 1.2296, "step": 48090 }, { "epoch": 4.8100000000000005, "grad_norm": 13.904142379760742, "learning_rate": 3.8520000000000003e-07, "loss": 1.3511, "step": 48100 }, { "epoch": 4.811, "grad_norm": 1.9006843566894531, "learning_rate": 3.8320000000000004e-07, "loss": 1.2073, "step": 48110 }, { "epoch": 4.812, "grad_norm": 14.908390998840332, "learning_rate": 3.8120000000000005e-07, "loss": 1.3341, "step": 48120 }, { "epoch": 4.813, "grad_norm": 16.474443435668945, "learning_rate": 3.7920000000000006e-07, "loss": 1.2275, "step": 48130 }, { "epoch": 4.814, "grad_norm": 9.926040649414062, "learning_rate": 3.772e-07, "loss": 1.2841, "step": 48140 }, { "epoch": 4.8149999999999995, "grad_norm": 20.10559844970703, "learning_rate": 3.752e-07, "loss": 0.8681, "step": 48150 }, { "epoch": 4.816, "grad_norm": 13.550344467163086, "learning_rate": 3.7320000000000003e-07, "loss": 0.9893, "step": 48160 }, { "epoch": 4.817, "grad_norm": 13.362993240356445, "learning_rate": 3.7120000000000004e-07, "loss": 1.2673, "step": 48170 }, { "epoch": 4.818, "grad_norm": 16.592138290405273, "learning_rate": 3.6920000000000005e-07, "loss": 1.3701, "step": 48180 }, { "epoch": 4.819, "grad_norm": 10.297795295715332, "learning_rate": 3.6720000000000006e-07, "loss": 0.9434, "step": 48190 }, { "epoch": 4.82, "grad_norm": 22.095474243164062, "learning_rate": 3.652e-07, "loss": 1.3179, "step": 48200 }, { "epoch": 4.821, "grad_norm": 23.35454750061035, "learning_rate": 3.632e-07, "loss": 1.2315, "step": 48210 }, { "epoch": 4.822, "grad_norm": 14.96501636505127, "learning_rate": 3.6120000000000003e-07, "loss": 1.0085, "step": 48220 }, { "epoch": 4.823, "grad_norm": 18.714309692382812, "learning_rate": 3.5920000000000004e-07, "loss": 1.1383, "step": 48230 }, { "epoch": 4.824, "grad_norm": 9.217681884765625, "learning_rate": 3.5720000000000005e-07, "loss": 1.0604, "step": 48240 }, { "epoch": 4.825, "grad_norm": 12.653575897216797, "learning_rate": 3.5520000000000006e-07, "loss": 1.0111, "step": 48250 }, { "epoch": 4.826, "grad_norm": 29.143741607666016, "learning_rate": 3.532e-07, "loss": 1.2022, "step": 48260 }, { "epoch": 4.827, "grad_norm": 11.138057708740234, "learning_rate": 3.512e-07, "loss": 1.1722, "step": 48270 }, { "epoch": 4.828, "grad_norm": 13.511101722717285, "learning_rate": 3.4920000000000003e-07, "loss": 1.5752, "step": 48280 }, { "epoch": 4.829, "grad_norm": 14.287810325622559, "learning_rate": 3.4720000000000004e-07, "loss": 1.1903, "step": 48290 }, { "epoch": 4.83, "grad_norm": 13.574259757995605, "learning_rate": 3.4520000000000005e-07, "loss": 0.9942, "step": 48300 }, { "epoch": 4.8309999999999995, "grad_norm": 11.149930953979492, "learning_rate": 3.4320000000000006e-07, "loss": 1.1167, "step": 48310 }, { "epoch": 4.832, "grad_norm": 12.141799926757812, "learning_rate": 3.412e-07, "loss": 0.9636, "step": 48320 }, { "epoch": 4.833, "grad_norm": 21.99290657043457, "learning_rate": 3.392e-07, "loss": 1.4289, "step": 48330 }, { "epoch": 4.834, "grad_norm": 24.00645637512207, "learning_rate": 3.3720000000000003e-07, "loss": 1.3183, "step": 48340 }, { "epoch": 4.835, "grad_norm": 17.21304702758789, "learning_rate": 3.3520000000000004e-07, "loss": 0.9642, "step": 48350 }, { "epoch": 4.836, "grad_norm": 16.077877044677734, "learning_rate": 3.3320000000000005e-07, "loss": 1.2136, "step": 48360 }, { "epoch": 4.837, "grad_norm": 8.479795455932617, "learning_rate": 3.312e-07, "loss": 0.7927, "step": 48370 }, { "epoch": 4.838, "grad_norm": 9.569647789001465, "learning_rate": 3.292e-07, "loss": 1.1165, "step": 48380 }, { "epoch": 4.839, "grad_norm": 20.958900451660156, "learning_rate": 3.272e-07, "loss": 0.7329, "step": 48390 }, { "epoch": 4.84, "grad_norm": 9.821215629577637, "learning_rate": 3.2520000000000003e-07, "loss": 1.3442, "step": 48400 }, { "epoch": 4.841, "grad_norm": 25.69924545288086, "learning_rate": 3.2320000000000004e-07, "loss": 1.2745, "step": 48410 }, { "epoch": 4.842, "grad_norm": 14.449767112731934, "learning_rate": 3.2120000000000004e-07, "loss": 1.4525, "step": 48420 }, { "epoch": 4.843, "grad_norm": 31.459869384765625, "learning_rate": 3.192e-07, "loss": 1.2878, "step": 48430 }, { "epoch": 4.844, "grad_norm": 20.076562881469727, "learning_rate": 3.172e-07, "loss": 1.7414, "step": 48440 }, { "epoch": 4.845, "grad_norm": 15.359808921813965, "learning_rate": 3.152e-07, "loss": 1.4355, "step": 48450 }, { "epoch": 4.846, "grad_norm": 8.72826862335205, "learning_rate": 3.132e-07, "loss": 1.8541, "step": 48460 }, { "epoch": 4.8469999999999995, "grad_norm": 17.565486907958984, "learning_rate": 3.1120000000000003e-07, "loss": 1.007, "step": 48470 }, { "epoch": 4.848, "grad_norm": 26.350252151489258, "learning_rate": 3.0920000000000004e-07, "loss": 1.3148, "step": 48480 }, { "epoch": 4.849, "grad_norm": 22.422231674194336, "learning_rate": 3.0720000000000005e-07, "loss": 1.0464, "step": 48490 }, { "epoch": 4.85, "grad_norm": 4.940583229064941, "learning_rate": 3.052e-07, "loss": 1.1831, "step": 48500 }, { "epoch": 4.851, "grad_norm": 7.200697422027588, "learning_rate": 3.032e-07, "loss": 1.4093, "step": 48510 }, { "epoch": 4.852, "grad_norm": 19.685985565185547, "learning_rate": 3.012e-07, "loss": 0.8692, "step": 48520 }, { "epoch": 4.853, "grad_norm": 17.66096305847168, "learning_rate": 2.9920000000000003e-07, "loss": 1.0958, "step": 48530 }, { "epoch": 4.854, "grad_norm": 15.155620574951172, "learning_rate": 2.9720000000000004e-07, "loss": 1.5275, "step": 48540 }, { "epoch": 4.855, "grad_norm": 6.375478267669678, "learning_rate": 2.9520000000000005e-07, "loss": 1.3166, "step": 48550 }, { "epoch": 4.856, "grad_norm": 11.450559616088867, "learning_rate": 2.932e-07, "loss": 1.32, "step": 48560 }, { "epoch": 4.857, "grad_norm": 5.482268333435059, "learning_rate": 2.912e-07, "loss": 1.3896, "step": 48570 }, { "epoch": 4.858, "grad_norm": 15.051796913146973, "learning_rate": 2.892e-07, "loss": 1.4569, "step": 48580 }, { "epoch": 4.859, "grad_norm": 32.78013610839844, "learning_rate": 2.8720000000000003e-07, "loss": 1.3912, "step": 48590 }, { "epoch": 4.86, "grad_norm": 12.877062797546387, "learning_rate": 2.8520000000000004e-07, "loss": 1.147, "step": 48600 }, { "epoch": 4.861, "grad_norm": 9.800518035888672, "learning_rate": 2.8320000000000005e-07, "loss": 0.7981, "step": 48610 }, { "epoch": 4.862, "grad_norm": 11.529276847839355, "learning_rate": 2.812e-07, "loss": 1.2638, "step": 48620 }, { "epoch": 4.8629999999999995, "grad_norm": 18.61834716796875, "learning_rate": 2.792e-07, "loss": 1.1915, "step": 48630 }, { "epoch": 4.864, "grad_norm": 12.148416519165039, "learning_rate": 2.772e-07, "loss": 1.1092, "step": 48640 }, { "epoch": 4.865, "grad_norm": 9.392664909362793, "learning_rate": 2.7520000000000003e-07, "loss": 1.182, "step": 48650 }, { "epoch": 4.866, "grad_norm": 14.475680351257324, "learning_rate": 2.7320000000000004e-07, "loss": 1.3256, "step": 48660 }, { "epoch": 4.867, "grad_norm": 16.113954544067383, "learning_rate": 2.712e-07, "loss": 1.1111, "step": 48670 }, { "epoch": 4.868, "grad_norm": 10.03485107421875, "learning_rate": 2.692e-07, "loss": 1.0542, "step": 48680 }, { "epoch": 4.869, "grad_norm": 14.912086486816406, "learning_rate": 2.672e-07, "loss": 1.0697, "step": 48690 }, { "epoch": 4.87, "grad_norm": 9.063339233398438, "learning_rate": 2.652e-07, "loss": 1.0259, "step": 48700 }, { "epoch": 4.871, "grad_norm": 15.626537322998047, "learning_rate": 2.6320000000000003e-07, "loss": 1.0215, "step": 48710 }, { "epoch": 4.872, "grad_norm": 13.895442962646484, "learning_rate": 2.6120000000000004e-07, "loss": 1.0108, "step": 48720 }, { "epoch": 4.873, "grad_norm": 13.027714729309082, "learning_rate": 2.592e-07, "loss": 1.2171, "step": 48730 }, { "epoch": 4.874, "grad_norm": 10.954648971557617, "learning_rate": 2.572e-07, "loss": 1.3213, "step": 48740 }, { "epoch": 4.875, "grad_norm": 2.579612970352173, "learning_rate": 2.552e-07, "loss": 1.0389, "step": 48750 }, { "epoch": 4.876, "grad_norm": 6.872035026550293, "learning_rate": 2.532e-07, "loss": 1.1078, "step": 48760 }, { "epoch": 4.877, "grad_norm": 15.394369125366211, "learning_rate": 2.512e-07, "loss": 1.238, "step": 48770 }, { "epoch": 4.878, "grad_norm": 11.708088874816895, "learning_rate": 2.4920000000000003e-07, "loss": 1.1287, "step": 48780 }, { "epoch": 4.879, "grad_norm": 2.5574259757995605, "learning_rate": 2.472e-07, "loss": 1.0091, "step": 48790 }, { "epoch": 4.88, "grad_norm": 6.260869979858398, "learning_rate": 2.452e-07, "loss": 1.4283, "step": 48800 }, { "epoch": 4.881, "grad_norm": 18.17123031616211, "learning_rate": 2.432e-07, "loss": 1.2768, "step": 48810 }, { "epoch": 4.882, "grad_norm": 12.198153495788574, "learning_rate": 2.412e-07, "loss": 1.376, "step": 48820 }, { "epoch": 4.883, "grad_norm": 12.14577579498291, "learning_rate": 2.392e-07, "loss": 1.1416, "step": 48830 }, { "epoch": 4.884, "grad_norm": 20.041431427001953, "learning_rate": 2.3720000000000003e-07, "loss": 1.0058, "step": 48840 }, { "epoch": 4.885, "grad_norm": 14.683784484863281, "learning_rate": 2.3520000000000001e-07, "loss": 1.2542, "step": 48850 }, { "epoch": 4.886, "grad_norm": 9.728124618530273, "learning_rate": 2.3320000000000002e-07, "loss": 1.0967, "step": 48860 }, { "epoch": 4.8870000000000005, "grad_norm": 11.023387908935547, "learning_rate": 2.3120000000000003e-07, "loss": 1.1548, "step": 48870 }, { "epoch": 4.888, "grad_norm": 6.322512626647949, "learning_rate": 2.2920000000000001e-07, "loss": 0.9967, "step": 48880 }, { "epoch": 4.889, "grad_norm": 28.427635192871094, "learning_rate": 2.2720000000000002e-07, "loss": 1.4521, "step": 48890 }, { "epoch": 4.89, "grad_norm": 11.005186080932617, "learning_rate": 2.252e-07, "loss": 1.0456, "step": 48900 }, { "epoch": 4.891, "grad_norm": 19.191762924194336, "learning_rate": 2.2320000000000001e-07, "loss": 1.1881, "step": 48910 }, { "epoch": 4.892, "grad_norm": 21.477291107177734, "learning_rate": 2.2120000000000002e-07, "loss": 1.0557, "step": 48920 }, { "epoch": 4.893, "grad_norm": 14.493163108825684, "learning_rate": 2.192e-07, "loss": 1.4274, "step": 48930 }, { "epoch": 4.894, "grad_norm": 14.457898139953613, "learning_rate": 2.172e-07, "loss": 1.2965, "step": 48940 }, { "epoch": 4.895, "grad_norm": 12.62874698638916, "learning_rate": 2.1520000000000002e-07, "loss": 1.09, "step": 48950 }, { "epoch": 4.896, "grad_norm": 12.86253547668457, "learning_rate": 2.132e-07, "loss": 1.1708, "step": 48960 }, { "epoch": 4.897, "grad_norm": 10.68841552734375, "learning_rate": 2.112e-07, "loss": 0.9697, "step": 48970 }, { "epoch": 4.898, "grad_norm": 10.637373924255371, "learning_rate": 2.0920000000000002e-07, "loss": 0.7978, "step": 48980 }, { "epoch": 4.899, "grad_norm": 16.64255142211914, "learning_rate": 2.072e-07, "loss": 1.1436, "step": 48990 }, { "epoch": 4.9, "grad_norm": 16.376541137695312, "learning_rate": 2.052e-07, "loss": 1.3412, "step": 49000 }, { "epoch": 4.901, "grad_norm": 21.67300033569336, "learning_rate": 2.0320000000000002e-07, "loss": 1.208, "step": 49010 }, { "epoch": 4.902, "grad_norm": 17.129066467285156, "learning_rate": 2.012e-07, "loss": 1.1759, "step": 49020 }, { "epoch": 4.9030000000000005, "grad_norm": 13.401540756225586, "learning_rate": 1.992e-07, "loss": 1.084, "step": 49030 }, { "epoch": 4.904, "grad_norm": 14.75893783569336, "learning_rate": 1.9720000000000002e-07, "loss": 1.244, "step": 49040 }, { "epoch": 4.905, "grad_norm": 7.079417705535889, "learning_rate": 1.952e-07, "loss": 1.1866, "step": 49050 }, { "epoch": 4.906, "grad_norm": 18.441015243530273, "learning_rate": 1.932e-07, "loss": 1.4483, "step": 49060 }, { "epoch": 4.907, "grad_norm": 14.434812545776367, "learning_rate": 1.9120000000000004e-07, "loss": 1.4347, "step": 49070 }, { "epoch": 4.908, "grad_norm": 10.604021072387695, "learning_rate": 1.892e-07, "loss": 1.0579, "step": 49080 }, { "epoch": 4.909, "grad_norm": 15.56004524230957, "learning_rate": 1.872e-07, "loss": 1.0885, "step": 49090 }, { "epoch": 4.91, "grad_norm": 13.56520938873291, "learning_rate": 1.852e-07, "loss": 1.0697, "step": 49100 }, { "epoch": 4.911, "grad_norm": 9.948500633239746, "learning_rate": 1.832e-07, "loss": 1.5606, "step": 49110 }, { "epoch": 4.912, "grad_norm": 19.966821670532227, "learning_rate": 1.8120000000000003e-07, "loss": 1.1019, "step": 49120 }, { "epoch": 4.913, "grad_norm": 17.539501190185547, "learning_rate": 1.792e-07, "loss": 1.199, "step": 49130 }, { "epoch": 4.914, "grad_norm": 31.281076431274414, "learning_rate": 1.772e-07, "loss": 1.2662, "step": 49140 }, { "epoch": 4.915, "grad_norm": 14.215863227844238, "learning_rate": 1.7520000000000003e-07, "loss": 1.0542, "step": 49150 }, { "epoch": 4.916, "grad_norm": 29.44825553894043, "learning_rate": 1.732e-07, "loss": 1.0143, "step": 49160 }, { "epoch": 4.917, "grad_norm": 16.07088851928711, "learning_rate": 1.7120000000000002e-07, "loss": 1.1703, "step": 49170 }, { "epoch": 4.918, "grad_norm": 14.46146297454834, "learning_rate": 1.6920000000000003e-07, "loss": 1.1278, "step": 49180 }, { "epoch": 4.9190000000000005, "grad_norm": 18.360193252563477, "learning_rate": 1.672e-07, "loss": 1.1989, "step": 49190 }, { "epoch": 4.92, "grad_norm": 11.730563163757324, "learning_rate": 1.6520000000000002e-07, "loss": 1.6257, "step": 49200 }, { "epoch": 4.921, "grad_norm": 11.381979942321777, "learning_rate": 1.6320000000000003e-07, "loss": 0.9721, "step": 49210 }, { "epoch": 4.922, "grad_norm": 16.30154037475586, "learning_rate": 1.6120000000000001e-07, "loss": 1.1642, "step": 49220 }, { "epoch": 4.923, "grad_norm": 10.434528350830078, "learning_rate": 1.5920000000000002e-07, "loss": 1.1024, "step": 49230 }, { "epoch": 4.924, "grad_norm": 14.231740951538086, "learning_rate": 1.5720000000000003e-07, "loss": 1.2206, "step": 49240 }, { "epoch": 4.925, "grad_norm": 14.264418601989746, "learning_rate": 1.5520000000000001e-07, "loss": 1.3438, "step": 49250 }, { "epoch": 4.926, "grad_norm": 16.249393463134766, "learning_rate": 1.534e-07, "loss": 1.0079, "step": 49260 }, { "epoch": 4.927, "grad_norm": 12.803217887878418, "learning_rate": 1.5140000000000002e-07, "loss": 1.09, "step": 49270 }, { "epoch": 4.928, "grad_norm": 17.157121658325195, "learning_rate": 1.494e-07, "loss": 1.1412, "step": 49280 }, { "epoch": 4.929, "grad_norm": 19.02286148071289, "learning_rate": 1.474e-07, "loss": 1.343, "step": 49290 }, { "epoch": 4.93, "grad_norm": 6.42728853225708, "learning_rate": 1.4540000000000002e-07, "loss": 0.9185, "step": 49300 }, { "epoch": 4.931, "grad_norm": 18.8515567779541, "learning_rate": 1.4340000000000003e-07, "loss": 1.2051, "step": 49310 }, { "epoch": 4.932, "grad_norm": 13.925591468811035, "learning_rate": 1.414e-07, "loss": 1.1481, "step": 49320 }, { "epoch": 4.933, "grad_norm": 13.94491195678711, "learning_rate": 1.394e-07, "loss": 1.1918, "step": 49330 }, { "epoch": 4.934, "grad_norm": 11.357576370239258, "learning_rate": 1.3740000000000003e-07, "loss": 1.0557, "step": 49340 }, { "epoch": 4.9350000000000005, "grad_norm": 21.27686882019043, "learning_rate": 1.354e-07, "loss": 1.3726, "step": 49350 }, { "epoch": 4.936, "grad_norm": 8.028021812438965, "learning_rate": 1.3340000000000002e-07, "loss": 1.157, "step": 49360 }, { "epoch": 4.937, "grad_norm": 24.999191284179688, "learning_rate": 1.3140000000000003e-07, "loss": 1.5977, "step": 49370 }, { "epoch": 4.938, "grad_norm": 14.995708465576172, "learning_rate": 1.294e-07, "loss": 1.125, "step": 49380 }, { "epoch": 4.939, "grad_norm": 11.787599563598633, "learning_rate": 1.2740000000000002e-07, "loss": 1.3611, "step": 49390 }, { "epoch": 4.9399999999999995, "grad_norm": 13.805063247680664, "learning_rate": 1.2540000000000002e-07, "loss": 1.3104, "step": 49400 }, { "epoch": 4.941, "grad_norm": 15.742304801940918, "learning_rate": 1.234e-07, "loss": 1.1768, "step": 49410 }, { "epoch": 4.942, "grad_norm": 20.15599250793457, "learning_rate": 1.2140000000000002e-07, "loss": 1.1376, "step": 49420 }, { "epoch": 4.943, "grad_norm": 14.181190490722656, "learning_rate": 1.194e-07, "loss": 1.186, "step": 49430 }, { "epoch": 4.944, "grad_norm": 14.973407745361328, "learning_rate": 1.1740000000000002e-07, "loss": 1.0802, "step": 49440 }, { "epoch": 4.945, "grad_norm": 13.86755657196045, "learning_rate": 1.1540000000000001e-07, "loss": 1.1853, "step": 49450 }, { "epoch": 4.946, "grad_norm": 13.15807056427002, "learning_rate": 1.1340000000000001e-07, "loss": 1.0649, "step": 49460 }, { "epoch": 4.947, "grad_norm": 18.220409393310547, "learning_rate": 1.1140000000000002e-07, "loss": 1.2288, "step": 49470 }, { "epoch": 4.948, "grad_norm": 16.072856903076172, "learning_rate": 1.0940000000000001e-07, "loss": 1.3154, "step": 49480 }, { "epoch": 4.949, "grad_norm": 12.626816749572754, "learning_rate": 1.0740000000000001e-07, "loss": 1.3739, "step": 49490 }, { "epoch": 4.95, "grad_norm": 6.317652702331543, "learning_rate": 1.0540000000000002e-07, "loss": 1.4945, "step": 49500 }, { "epoch": 4.951, "grad_norm": 10.174875259399414, "learning_rate": 1.0340000000000001e-07, "loss": 1.0777, "step": 49510 }, { "epoch": 4.952, "grad_norm": 14.65125560760498, "learning_rate": 1.0140000000000001e-07, "loss": 1.4415, "step": 49520 }, { "epoch": 4.953, "grad_norm": 4.697287082672119, "learning_rate": 9.94e-08, "loss": 1.1834, "step": 49530 }, { "epoch": 4.954, "grad_norm": 15.976245880126953, "learning_rate": 9.740000000000001e-08, "loss": 1.0063, "step": 49540 }, { "epoch": 4.955, "grad_norm": 16.01375389099121, "learning_rate": 9.540000000000001e-08, "loss": 1.2818, "step": 49550 }, { "epoch": 4.9559999999999995, "grad_norm": 11.953120231628418, "learning_rate": 9.34e-08, "loss": 1.4504, "step": 49560 }, { "epoch": 4.957, "grad_norm": 13.643078804016113, "learning_rate": 9.140000000000001e-08, "loss": 1.0046, "step": 49570 }, { "epoch": 4.958, "grad_norm": 23.26430892944336, "learning_rate": 8.94e-08, "loss": 1.1373, "step": 49580 }, { "epoch": 4.959, "grad_norm": 7.325770378112793, "learning_rate": 8.74e-08, "loss": 0.9904, "step": 49590 }, { "epoch": 4.96, "grad_norm": 9.837796211242676, "learning_rate": 8.540000000000001e-08, "loss": 1.1606, "step": 49600 }, { "epoch": 4.961, "grad_norm": 10.744565963745117, "learning_rate": 8.34e-08, "loss": 0.9567, "step": 49610 }, { "epoch": 4.962, "grad_norm": 10.797896385192871, "learning_rate": 8.14e-08, "loss": 1.2643, "step": 49620 }, { "epoch": 4.963, "grad_norm": 11.051605224609375, "learning_rate": 7.94e-08, "loss": 1.3669, "step": 49630 }, { "epoch": 4.964, "grad_norm": 8.041471481323242, "learning_rate": 7.74e-08, "loss": 1.3035, "step": 49640 }, { "epoch": 4.965, "grad_norm": 13.023823738098145, "learning_rate": 7.54e-08, "loss": 1.1797, "step": 49650 }, { "epoch": 4.966, "grad_norm": 5.316483974456787, "learning_rate": 7.340000000000001e-08, "loss": 0.8815, "step": 49660 }, { "epoch": 4.967, "grad_norm": 17.49151611328125, "learning_rate": 7.14e-08, "loss": 1.2825, "step": 49670 }, { "epoch": 4.968, "grad_norm": 13.25382137298584, "learning_rate": 6.94e-08, "loss": 1.3921, "step": 49680 }, { "epoch": 4.969, "grad_norm": 8.035177230834961, "learning_rate": 6.740000000000001e-08, "loss": 1.2613, "step": 49690 }, { "epoch": 4.97, "grad_norm": 9.562631607055664, "learning_rate": 6.54e-08, "loss": 1.2025, "step": 49700 }, { "epoch": 4.971, "grad_norm": 18.279869079589844, "learning_rate": 6.340000000000001e-08, "loss": 1.3302, "step": 49710 }, { "epoch": 4.9719999999999995, "grad_norm": 11.480021476745605, "learning_rate": 6.140000000000001e-08, "loss": 1.018, "step": 49720 }, { "epoch": 4.973, "grad_norm": 8.225981712341309, "learning_rate": 5.94e-08, "loss": 1.0425, "step": 49730 }, { "epoch": 4.974, "grad_norm": 11.223268508911133, "learning_rate": 5.740000000000001e-08, "loss": 0.8917, "step": 49740 }, { "epoch": 4.975, "grad_norm": 19.101062774658203, "learning_rate": 5.54e-08, "loss": 1.2038, "step": 49750 }, { "epoch": 4.976, "grad_norm": 14.422289848327637, "learning_rate": 5.340000000000001e-08, "loss": 1.1247, "step": 49760 }, { "epoch": 4.977, "grad_norm": 34.52775573730469, "learning_rate": 5.14e-08, "loss": 1.093, "step": 49770 }, { "epoch": 4.978, "grad_norm": 7.270913124084473, "learning_rate": 4.9400000000000006e-08, "loss": 1.367, "step": 49780 }, { "epoch": 4.979, "grad_norm": 11.253016471862793, "learning_rate": 4.740000000000001e-08, "loss": 0.8254, "step": 49790 }, { "epoch": 4.98, "grad_norm": 11.906766891479492, "learning_rate": 4.54e-08, "loss": 1.3216, "step": 49800 }, { "epoch": 4.981, "grad_norm": 8.105388641357422, "learning_rate": 4.3400000000000005e-08, "loss": 1.1037, "step": 49810 }, { "epoch": 4.982, "grad_norm": 20.388019561767578, "learning_rate": 4.14e-08, "loss": 1.5266, "step": 49820 }, { "epoch": 4.983, "grad_norm": 17.578882217407227, "learning_rate": 3.94e-08, "loss": 0.821, "step": 49830 }, { "epoch": 4.984, "grad_norm": 9.598089218139648, "learning_rate": 3.7400000000000004e-08, "loss": 1.214, "step": 49840 }, { "epoch": 4.985, "grad_norm": 14.738775253295898, "learning_rate": 3.5400000000000006e-08, "loss": 1.2301, "step": 49850 }, { "epoch": 4.986, "grad_norm": 11.469500541687012, "learning_rate": 3.34e-08, "loss": 1.0393, "step": 49860 }, { "epoch": 4.987, "grad_norm": 10.688315391540527, "learning_rate": 3.14e-08, "loss": 1.0455, "step": 49870 }, { "epoch": 4.9879999999999995, "grad_norm": 23.265121459960938, "learning_rate": 2.9400000000000002e-08, "loss": 1.1126, "step": 49880 }, { "epoch": 4.989, "grad_norm": 13.350594520568848, "learning_rate": 2.74e-08, "loss": 1.3018, "step": 49890 }, { "epoch": 4.99, "grad_norm": 17.47894859313965, "learning_rate": 2.5400000000000002e-08, "loss": 1.4483, "step": 49900 }, { "epoch": 4.991, "grad_norm": 15.110867500305176, "learning_rate": 2.34e-08, "loss": 1.3584, "step": 49910 }, { "epoch": 4.992, "grad_norm": 8.704626083374023, "learning_rate": 2.1400000000000003e-08, "loss": 1.1065, "step": 49920 }, { "epoch": 4.993, "grad_norm": 9.261788368225098, "learning_rate": 1.9400000000000002e-08, "loss": 0.8647, "step": 49930 }, { "epoch": 4.994, "grad_norm": 17.000699996948242, "learning_rate": 1.74e-08, "loss": 1.4472, "step": 49940 }, { "epoch": 4.995, "grad_norm": 18.359445571899414, "learning_rate": 1.54e-08, "loss": 1.1452, "step": 49950 }, { "epoch": 4.996, "grad_norm": 11.546000480651855, "learning_rate": 1.3400000000000001e-08, "loss": 1.0424, "step": 49960 }, { "epoch": 4.997, "grad_norm": 14.610387802124023, "learning_rate": 1.1400000000000001e-08, "loss": 1.1004, "step": 49970 }, { "epoch": 4.998, "grad_norm": 14.02322006225586, "learning_rate": 9.4e-09, "loss": 1.7723, "step": 49980 }, { "epoch": 4.999, "grad_norm": 12.449058532714844, "learning_rate": 7.4e-09, "loss": 1.448, "step": 49990 }, { "epoch": 5.0, "grad_norm": 10.581077575683594, "learning_rate": 5.4e-09, "loss": 0.7594, "step": 50000 } ], "logging_steps": 10, "max_steps": 50000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }