{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 69713, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.5, "learning_rate": 2.8686173264486516e-08, "loss": 0.859, "step": 1 }, { "epoch": 0.0, "grad_norm": 0.56640625, "learning_rate": 1.434308663224326e-07, "loss": 1.2508, "step": 5 }, { "epoch": 0.0, "grad_norm": 0.62109375, "learning_rate": 2.868617326448652e-07, "loss": 1.1806, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.62890625, "learning_rate": 4.3029259896729773e-07, "loss": 1.1581, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.58984375, "learning_rate": 5.737234652897304e-07, "loss": 1.0476, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.98828125, "learning_rate": 7.17154331612163e-07, "loss": 1.126, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.72265625, "learning_rate": 8.605851979345955e-07, "loss": 1.1032, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.6171875, "learning_rate": 1.0040160642570282e-06, "loss": 1.1629, "step": 35 }, { "epoch": 0.0, "grad_norm": 0.455078125, "learning_rate": 1.1474469305794607e-06, "loss": 1.2222, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.498046875, "learning_rate": 1.2908777969018933e-06, "loss": 1.307, "step": 45 }, { "epoch": 0.0, "grad_norm": 0.56640625, "learning_rate": 1.434308663224326e-06, "loss": 1.0781, "step": 50 }, { "epoch": 0.0, "grad_norm": 0.703125, "learning_rate": 1.5777395295467586e-06, "loss": 1.2634, "step": 55 }, { "epoch": 0.0, "grad_norm": 0.546875, "learning_rate": 1.721170395869191e-06, "loss": 1.0647, "step": 60 }, { "epoch": 0.0, "grad_norm": 0.6484375, "learning_rate": 1.8646012621916239e-06, "loss": 1.1197, "step": 65 }, { "epoch": 0.0, "grad_norm": 0.451171875, "learning_rate": 2.0080321285140564e-06, "loss": 1.0787, "step": 70 }, { "epoch": 0.0, "grad_norm": 0.6875, "learning_rate": 2.151462994836489e-06, "loss": 1.0495, "step": 75 }, { "epoch": 0.0, "grad_norm": 0.5234375, "learning_rate": 2.2948938611589215e-06, "loss": 1.1097, "step": 80 }, { "epoch": 0.0, "grad_norm": 0.64453125, "learning_rate": 2.438324727481354e-06, "loss": 1.0675, "step": 85 }, { "epoch": 0.0, "grad_norm": 0.546875, "learning_rate": 2.5817555938037866e-06, "loss": 1.2212, "step": 90 }, { "epoch": 0.0, "grad_norm": 0.60546875, "learning_rate": 2.725186460126219e-06, "loss": 1.0741, "step": 95 }, { "epoch": 0.0, "grad_norm": 0.49609375, "learning_rate": 2.868617326448652e-06, "loss": 1.2163, "step": 100 }, { "epoch": 0.0, "grad_norm": 0.6171875, "learning_rate": 3.0120481927710846e-06, "loss": 1.1092, "step": 105 }, { "epoch": 0.0, "grad_norm": 0.55859375, "learning_rate": 3.155479059093517e-06, "loss": 1.1085, "step": 110 }, { "epoch": 0.0, "grad_norm": 0.54296875, "learning_rate": 3.2989099254159493e-06, "loss": 0.9502, "step": 115 }, { "epoch": 0.0, "grad_norm": 0.4921875, "learning_rate": 3.442340791738382e-06, "loss": 1.1783, "step": 120 }, { "epoch": 0.0, "grad_norm": 0.466796875, "learning_rate": 3.585771658060815e-06, "loss": 1.1416, "step": 125 }, { "epoch": 0.0, "grad_norm": 0.51171875, "learning_rate": 3.7292025243832477e-06, "loss": 1.0175, "step": 130 }, { "epoch": 0.0, "grad_norm": 0.609375, "learning_rate": 3.87263339070568e-06, "loss": 1.1436, "step": 135 }, { "epoch": 0.0, "grad_norm": 0.51953125, "learning_rate": 4.016064257028113e-06, "loss": 1.0344, "step": 140 }, { "epoch": 0.0, "grad_norm": 0.49609375, "learning_rate": 4.159495123350545e-06, "loss": 1.1485, "step": 145 }, { "epoch": 0.0, "grad_norm": 0.4921875, "learning_rate": 4.302925989672978e-06, "loss": 1.2148, "step": 150 }, { "epoch": 0.0, "grad_norm": 0.62109375, "learning_rate": 4.4463568559954104e-06, "loss": 0.9588, "step": 155 }, { "epoch": 0.0, "grad_norm": 0.55859375, "learning_rate": 4.589787722317843e-06, "loss": 1.1672, "step": 160 }, { "epoch": 0.0, "grad_norm": 0.53515625, "learning_rate": 4.7332185886402755e-06, "loss": 1.0145, "step": 165 }, { "epoch": 0.0, "grad_norm": 0.44140625, "learning_rate": 4.876649454962708e-06, "loss": 1.1636, "step": 170 }, { "epoch": 0.0, "grad_norm": 0.490234375, "learning_rate": 5.020080321285141e-06, "loss": 1.1657, "step": 175 }, { "epoch": 0.0, "grad_norm": 0.4609375, "learning_rate": 5.163511187607573e-06, "loss": 1.053, "step": 180 }, { "epoch": 0.0, "grad_norm": 0.51953125, "learning_rate": 5.306942053930006e-06, "loss": 1.0516, "step": 185 }, { "epoch": 0.0, "grad_norm": 0.56640625, "learning_rate": 5.450372920252438e-06, "loss": 1.0715, "step": 190 }, { "epoch": 0.0, "grad_norm": 0.44921875, "learning_rate": 5.593803786574872e-06, "loss": 0.9784, "step": 195 }, { "epoch": 0.0, "grad_norm": 0.5546875, "learning_rate": 5.737234652897304e-06, "loss": 1.0953, "step": 200 }, { "epoch": 0.0, "grad_norm": 0.466796875, "learning_rate": 5.880665519219737e-06, "loss": 0.9477, "step": 205 }, { "epoch": 0.0, "grad_norm": 0.5234375, "learning_rate": 6.024096385542169e-06, "loss": 1.1993, "step": 210 }, { "epoch": 0.0, "grad_norm": 0.546875, "learning_rate": 6.167527251864602e-06, "loss": 1.1235, "step": 215 }, { "epoch": 0.0, "grad_norm": 0.6015625, "learning_rate": 6.310958118187034e-06, "loss": 1.0081, "step": 220 }, { "epoch": 0.0, "grad_norm": 0.478515625, "learning_rate": 6.454388984509467e-06, "loss": 1.0265, "step": 225 }, { "epoch": 0.0, "grad_norm": 0.44921875, "learning_rate": 6.5978198508318986e-06, "loss": 0.9588, "step": 230 }, { "epoch": 0.0, "grad_norm": 0.498046875, "learning_rate": 6.741250717154332e-06, "loss": 1.0807, "step": 235 }, { "epoch": 0.0, "grad_norm": 0.6015625, "learning_rate": 6.884681583476764e-06, "loss": 1.214, "step": 240 }, { "epoch": 0.0, "grad_norm": 0.494140625, "learning_rate": 7.028112449799197e-06, "loss": 1.1833, "step": 245 }, { "epoch": 0.0, "grad_norm": 0.50390625, "learning_rate": 7.17154331612163e-06, "loss": 1.0432, "step": 250 }, { "epoch": 0.0, "grad_norm": 0.5078125, "learning_rate": 7.314974182444062e-06, "loss": 1.1392, "step": 255 }, { "epoch": 0.0, "grad_norm": 0.5546875, "learning_rate": 7.4584050487664955e-06, "loss": 1.08, "step": 260 }, { "epoch": 0.0, "grad_norm": 0.5859375, "learning_rate": 7.601835915088927e-06, "loss": 0.9535, "step": 265 }, { "epoch": 0.0, "grad_norm": 0.6171875, "learning_rate": 7.74526678141136e-06, "loss": 1.1902, "step": 270 }, { "epoch": 0.0, "grad_norm": 0.61328125, "learning_rate": 7.888697647733792e-06, "loss": 1.0534, "step": 275 }, { "epoch": 0.0, "grad_norm": 0.46484375, "learning_rate": 8.032128514056226e-06, "loss": 0.9991, "step": 280 }, { "epoch": 0.0, "grad_norm": 0.5234375, "learning_rate": 8.175559380378659e-06, "loss": 1.03, "step": 285 }, { "epoch": 0.0, "grad_norm": 0.498046875, "learning_rate": 8.31899024670109e-06, "loss": 1.0939, "step": 290 }, { "epoch": 0.0, "grad_norm": 0.53515625, "learning_rate": 8.462421113023524e-06, "loss": 1.1392, "step": 295 }, { "epoch": 0.0, "grad_norm": 0.49609375, "learning_rate": 8.605851979345956e-06, "loss": 0.9538, "step": 300 }, { "epoch": 0.0, "grad_norm": 0.490234375, "learning_rate": 8.74928284566839e-06, "loss": 0.9449, "step": 305 }, { "epoch": 0.0, "grad_norm": 0.51171875, "learning_rate": 8.892713711990821e-06, "loss": 1.1433, "step": 310 }, { "epoch": 0.0, "grad_norm": 0.439453125, "learning_rate": 9.036144578313253e-06, "loss": 0.9169, "step": 315 }, { "epoch": 0.0, "grad_norm": 0.50390625, "learning_rate": 9.179575444635686e-06, "loss": 0.9726, "step": 320 }, { "epoch": 0.0, "grad_norm": 0.51171875, "learning_rate": 9.323006310958118e-06, "loss": 1.2255, "step": 325 }, { "epoch": 0.0, "grad_norm": 0.60546875, "learning_rate": 9.466437177280551e-06, "loss": 1.2127, "step": 330 }, { "epoch": 0.0, "grad_norm": 0.50390625, "learning_rate": 9.609868043602983e-06, "loss": 1.0853, "step": 335 }, { "epoch": 0.0, "grad_norm": 0.421875, "learning_rate": 9.753298909925416e-06, "loss": 0.9329, "step": 340 }, { "epoch": 0.0, "grad_norm": 0.4609375, "learning_rate": 9.896729776247848e-06, "loss": 0.8877, "step": 345 }, { "epoch": 0.01, "grad_norm": 0.515625, "learning_rate": 1.0040160642570281e-05, "loss": 1.2255, "step": 350 }, { "epoch": 0.01, "grad_norm": 0.70703125, "learning_rate": 1.0183591508892715e-05, "loss": 0.9594, "step": 355 }, { "epoch": 0.01, "grad_norm": 0.51171875, "learning_rate": 1.0327022375215146e-05, "loss": 1.2497, "step": 360 }, { "epoch": 0.01, "grad_norm": 0.498046875, "learning_rate": 1.047045324153758e-05, "loss": 0.9763, "step": 365 }, { "epoch": 0.01, "grad_norm": 0.490234375, "learning_rate": 1.0613884107860011e-05, "loss": 0.9794, "step": 370 }, { "epoch": 0.01, "grad_norm": 0.54296875, "learning_rate": 1.0757314974182445e-05, "loss": 0.9919, "step": 375 }, { "epoch": 0.01, "grad_norm": 0.4765625, "learning_rate": 1.0900745840504876e-05, "loss": 0.9107, "step": 380 }, { "epoch": 0.01, "grad_norm": 0.453125, "learning_rate": 1.104417670682731e-05, "loss": 1.1007, "step": 385 }, { "epoch": 0.01, "grad_norm": 0.5546875, "learning_rate": 1.1187607573149743e-05, "loss": 1.1553, "step": 390 }, { "epoch": 0.01, "grad_norm": 0.47265625, "learning_rate": 1.1331038439472175e-05, "loss": 0.9138, "step": 395 }, { "epoch": 0.01, "grad_norm": 0.54296875, "learning_rate": 1.1474469305794608e-05, "loss": 0.9199, "step": 400 }, { "epoch": 0.01, "grad_norm": 0.466796875, "learning_rate": 1.161790017211704e-05, "loss": 0.8803, "step": 405 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 1.1761331038439473e-05, "loss": 0.9837, "step": 410 }, { "epoch": 0.01, "grad_norm": 0.416015625, "learning_rate": 1.1904761904761905e-05, "loss": 1.0048, "step": 415 }, { "epoch": 0.01, "grad_norm": 0.5859375, "learning_rate": 1.2048192771084338e-05, "loss": 0.9014, "step": 420 }, { "epoch": 0.01, "grad_norm": 0.498046875, "learning_rate": 1.2191623637406772e-05, "loss": 0.9297, "step": 425 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 1.2335054503729204e-05, "loss": 1.0564, "step": 430 }, { "epoch": 0.01, "grad_norm": 0.486328125, "learning_rate": 1.2478485370051635e-05, "loss": 0.9937, "step": 435 }, { "epoch": 0.01, "grad_norm": 0.494140625, "learning_rate": 1.2621916236374069e-05, "loss": 0.9515, "step": 440 }, { "epoch": 0.01, "grad_norm": 0.51953125, "learning_rate": 1.27653471026965e-05, "loss": 1.1943, "step": 445 }, { "epoch": 0.01, "grad_norm": 0.478515625, "learning_rate": 1.2908777969018934e-05, "loss": 1.0089, "step": 450 }, { "epoch": 0.01, "grad_norm": 0.578125, "learning_rate": 1.3052208835341367e-05, "loss": 0.9401, "step": 455 }, { "epoch": 0.01, "grad_norm": 0.5078125, "learning_rate": 1.3195639701663797e-05, "loss": 1.0221, "step": 460 }, { "epoch": 0.01, "grad_norm": 0.51953125, "learning_rate": 1.333907056798623e-05, "loss": 0.9598, "step": 465 }, { "epoch": 0.01, "grad_norm": 0.5390625, "learning_rate": 1.3482501434308664e-05, "loss": 1.02, "step": 470 }, { "epoch": 0.01, "grad_norm": 0.498046875, "learning_rate": 1.3625932300631097e-05, "loss": 1.0364, "step": 475 }, { "epoch": 0.01, "grad_norm": 0.482421875, "learning_rate": 1.3769363166953527e-05, "loss": 1.068, "step": 480 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 1.391279403327596e-05, "loss": 1.1302, "step": 485 }, { "epoch": 0.01, "grad_norm": 0.44921875, "learning_rate": 1.4056224899598394e-05, "loss": 0.9552, "step": 490 }, { "epoch": 0.01, "grad_norm": 0.54296875, "learning_rate": 1.4199655765920827e-05, "loss": 1.1418, "step": 495 }, { "epoch": 0.01, "grad_norm": 0.474609375, "learning_rate": 1.434308663224326e-05, "loss": 0.9391, "step": 500 }, { "epoch": 0.01, "grad_norm": 0.5625, "learning_rate": 1.448651749856569e-05, "loss": 1.0627, "step": 505 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 1.4629948364888124e-05, "loss": 1.0318, "step": 510 }, { "epoch": 0.01, "grad_norm": 0.4921875, "learning_rate": 1.4773379231210558e-05, "loss": 0.9085, "step": 515 }, { "epoch": 0.01, "grad_norm": 0.5, "learning_rate": 1.4916810097532991e-05, "loss": 0.9348, "step": 520 }, { "epoch": 0.01, "grad_norm": 0.5078125, "learning_rate": 1.5060240963855424e-05, "loss": 1.0198, "step": 525 }, { "epoch": 0.01, "grad_norm": 0.546875, "learning_rate": 1.5203671830177854e-05, "loss": 0.9923, "step": 530 }, { "epoch": 0.01, "grad_norm": 0.462890625, "learning_rate": 1.5347102696500288e-05, "loss": 1.1061, "step": 535 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 1.549053356282272e-05, "loss": 0.9106, "step": 540 }, { "epoch": 0.01, "grad_norm": 0.51171875, "learning_rate": 1.5633964429145155e-05, "loss": 0.9399, "step": 545 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 1.5777395295467585e-05, "loss": 0.9962, "step": 550 }, { "epoch": 0.01, "grad_norm": 0.47265625, "learning_rate": 1.5920826161790018e-05, "loss": 1.0109, "step": 555 }, { "epoch": 0.01, "grad_norm": 0.5, "learning_rate": 1.606425702811245e-05, "loss": 0.9186, "step": 560 }, { "epoch": 0.01, "grad_norm": 0.474609375, "learning_rate": 1.6207687894434885e-05, "loss": 1.1396, "step": 565 }, { "epoch": 0.01, "grad_norm": 0.470703125, "learning_rate": 1.6351118760757318e-05, "loss": 1.006, "step": 570 }, { "epoch": 0.01, "grad_norm": 0.5078125, "learning_rate": 1.6494549627079748e-05, "loss": 0.9358, "step": 575 }, { "epoch": 0.01, "grad_norm": 0.4609375, "learning_rate": 1.663798049340218e-05, "loss": 1.0365, "step": 580 }, { "epoch": 0.01, "grad_norm": 0.474609375, "learning_rate": 1.6781411359724615e-05, "loss": 0.976, "step": 585 }, { "epoch": 0.01, "grad_norm": 0.490234375, "learning_rate": 1.6924842226047048e-05, "loss": 0.9621, "step": 590 }, { "epoch": 0.01, "grad_norm": 0.546875, "learning_rate": 1.706827309236948e-05, "loss": 1.0107, "step": 595 }, { "epoch": 0.01, "grad_norm": 0.4921875, "learning_rate": 1.721170395869191e-05, "loss": 0.972, "step": 600 }, { "epoch": 0.01, "grad_norm": 0.76171875, "learning_rate": 1.7355134825014345e-05, "loss": 1.0638, "step": 605 }, { "epoch": 0.01, "grad_norm": 0.4921875, "learning_rate": 1.749856569133678e-05, "loss": 0.985, "step": 610 }, { "epoch": 0.01, "grad_norm": 0.48046875, "learning_rate": 1.764199655765921e-05, "loss": 0.9469, "step": 615 }, { "epoch": 0.01, "grad_norm": 0.57421875, "learning_rate": 1.7785427423981642e-05, "loss": 1.1185, "step": 620 }, { "epoch": 0.01, "grad_norm": 0.54296875, "learning_rate": 1.7928858290304075e-05, "loss": 1.0834, "step": 625 }, { "epoch": 0.01, "grad_norm": 0.5546875, "learning_rate": 1.8072289156626505e-05, "loss": 0.9151, "step": 630 }, { "epoch": 0.01, "grad_norm": 0.609375, "learning_rate": 1.821572002294894e-05, "loss": 0.8918, "step": 635 }, { "epoch": 0.01, "grad_norm": 0.478515625, "learning_rate": 1.8359150889271372e-05, "loss": 1.0947, "step": 640 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 1.8502581755593802e-05, "loss": 0.9554, "step": 645 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 1.8646012621916235e-05, "loss": 1.0884, "step": 650 }, { "epoch": 0.01, "grad_norm": 0.546875, "learning_rate": 1.878944348823867e-05, "loss": 0.9896, "step": 655 }, { "epoch": 0.01, "grad_norm": 0.51171875, "learning_rate": 1.8932874354561102e-05, "loss": 1.026, "step": 660 }, { "epoch": 0.01, "grad_norm": 0.5546875, "learning_rate": 1.9076305220883535e-05, "loss": 1.047, "step": 665 }, { "epoch": 0.01, "grad_norm": 0.55078125, "learning_rate": 1.9219736087205965e-05, "loss": 0.9945, "step": 670 }, { "epoch": 0.01, "grad_norm": 0.6171875, "learning_rate": 1.93631669535284e-05, "loss": 1.0214, "step": 675 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 1.9506597819850832e-05, "loss": 0.9594, "step": 680 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 1.9650028686173266e-05, "loss": 0.9142, "step": 685 }, { "epoch": 0.01, "grad_norm": 0.54296875, "learning_rate": 1.9793459552495696e-05, "loss": 1.0362, "step": 690 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 1.993689041881813e-05, "loss": 0.9183, "step": 695 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 2.0080321285140562e-05, "loss": 1.0075, "step": 700 }, { "epoch": 0.01, "grad_norm": 0.458984375, "learning_rate": 2.0223752151462996e-05, "loss": 0.9477, "step": 705 }, { "epoch": 0.01, "grad_norm": 0.640625, "learning_rate": 2.036718301778543e-05, "loss": 1.0359, "step": 710 }, { "epoch": 0.01, "grad_norm": 0.49609375, "learning_rate": 2.051061388410786e-05, "loss": 1.0742, "step": 715 }, { "epoch": 0.01, "grad_norm": 0.61328125, "learning_rate": 2.0654044750430293e-05, "loss": 0.8609, "step": 720 }, { "epoch": 0.01, "grad_norm": 0.4921875, "learning_rate": 2.0797475616752726e-05, "loss": 1.1525, "step": 725 }, { "epoch": 0.01, "grad_norm": 0.5078125, "learning_rate": 2.094090648307516e-05, "loss": 0.9171, "step": 730 }, { "epoch": 0.01, "grad_norm": 0.5078125, "learning_rate": 2.1084337349397593e-05, "loss": 1.0555, "step": 735 }, { "epoch": 0.01, "grad_norm": 0.470703125, "learning_rate": 2.1227768215720023e-05, "loss": 1.0368, "step": 740 }, { "epoch": 0.01, "grad_norm": 0.51171875, "learning_rate": 2.1371199082042456e-05, "loss": 0.9625, "step": 745 }, { "epoch": 0.01, "grad_norm": 0.5234375, "learning_rate": 2.151462994836489e-05, "loss": 1.0213, "step": 750 }, { "epoch": 0.01, "grad_norm": 0.58203125, "learning_rate": 2.1658060814687323e-05, "loss": 1.1338, "step": 755 }, { "epoch": 0.01, "grad_norm": 0.53515625, "learning_rate": 2.1801491681009753e-05, "loss": 0.9872, "step": 760 }, { "epoch": 0.01, "grad_norm": 0.5390625, "learning_rate": 2.1944922547332186e-05, "loss": 1.0546, "step": 765 }, { "epoch": 0.01, "grad_norm": 0.57421875, "learning_rate": 2.208835341365462e-05, "loss": 1.1086, "step": 770 }, { "epoch": 0.01, "grad_norm": 0.515625, "learning_rate": 2.2231784279977053e-05, "loss": 0.936, "step": 775 }, { "epoch": 0.01, "grad_norm": 0.55859375, "learning_rate": 2.2375215146299486e-05, "loss": 1.058, "step": 780 }, { "epoch": 0.01, "grad_norm": 0.546875, "learning_rate": 2.2518646012621916e-05, "loss": 0.88, "step": 785 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 2.266207687894435e-05, "loss": 1.0216, "step": 790 }, { "epoch": 0.01, "grad_norm": 0.56640625, "learning_rate": 2.2805507745266783e-05, "loss": 0.8847, "step": 795 }, { "epoch": 0.01, "grad_norm": 3.5, "learning_rate": 2.2948938611589217e-05, "loss": 0.9893, "step": 800 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 2.309236947791165e-05, "loss": 0.9958, "step": 805 }, { "epoch": 0.01, "grad_norm": 0.56640625, "learning_rate": 2.323580034423408e-05, "loss": 0.9778, "step": 810 }, { "epoch": 0.01, "grad_norm": 0.51953125, "learning_rate": 2.3379231210556513e-05, "loss": 1.105, "step": 815 }, { "epoch": 0.01, "grad_norm": 0.494140625, "learning_rate": 2.3522662076878947e-05, "loss": 1.0109, "step": 820 }, { "epoch": 0.01, "grad_norm": 0.53515625, "learning_rate": 2.366609294320138e-05, "loss": 1.1851, "step": 825 }, { "epoch": 0.01, "grad_norm": 0.51171875, "learning_rate": 2.380952380952381e-05, "loss": 0.9586, "step": 830 }, { "epoch": 0.01, "grad_norm": 0.56640625, "learning_rate": 2.3952954675846244e-05, "loss": 1.0967, "step": 835 }, { "epoch": 0.01, "grad_norm": 0.53515625, "learning_rate": 2.4096385542168677e-05, "loss": 1.1236, "step": 840 }, { "epoch": 0.01, "grad_norm": 0.53515625, "learning_rate": 2.423981640849111e-05, "loss": 0.8882, "step": 845 }, { "epoch": 0.01, "grad_norm": 0.478515625, "learning_rate": 2.4383247274813544e-05, "loss": 1.0636, "step": 850 }, { "epoch": 0.01, "grad_norm": 0.62890625, "learning_rate": 2.4526678141135974e-05, "loss": 0.9651, "step": 855 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 2.4670109007458407e-05, "loss": 1.023, "step": 860 }, { "epoch": 0.01, "grad_norm": 0.5390625, "learning_rate": 2.481353987378084e-05, "loss": 0.9432, "step": 865 }, { "epoch": 0.01, "grad_norm": 0.55859375, "learning_rate": 2.495697074010327e-05, "loss": 1.2359, "step": 870 }, { "epoch": 0.01, "grad_norm": 0.58984375, "learning_rate": 2.5100401606425704e-05, "loss": 1.031, "step": 875 }, { "epoch": 0.01, "grad_norm": 0.5703125, "learning_rate": 2.5243832472748137e-05, "loss": 1.0033, "step": 880 }, { "epoch": 0.01, "grad_norm": 0.53515625, "learning_rate": 2.538726333907057e-05, "loss": 0.9732, "step": 885 }, { "epoch": 0.01, "grad_norm": 0.494140625, "learning_rate": 2.5530694205393e-05, "loss": 1.0595, "step": 890 }, { "epoch": 0.01, "grad_norm": 0.4453125, "learning_rate": 2.5674125071715434e-05, "loss": 0.9085, "step": 895 }, { "epoch": 0.01, "grad_norm": 0.5078125, "learning_rate": 2.5817555938037867e-05, "loss": 1.0979, "step": 900 }, { "epoch": 0.01, "grad_norm": 0.55859375, "learning_rate": 2.5960986804360297e-05, "loss": 1.088, "step": 905 }, { "epoch": 0.01, "grad_norm": 0.5, "learning_rate": 2.6104417670682734e-05, "loss": 1.0669, "step": 910 }, { "epoch": 0.01, "grad_norm": 0.5859375, "learning_rate": 2.6247848537005164e-05, "loss": 0.9043, "step": 915 }, { "epoch": 0.01, "grad_norm": 0.4765625, "learning_rate": 2.6391279403327594e-05, "loss": 0.9435, "step": 920 }, { "epoch": 0.01, "grad_norm": 0.46875, "learning_rate": 2.653471026965003e-05, "loss": 0.9912, "step": 925 }, { "epoch": 0.01, "grad_norm": 0.56640625, "learning_rate": 2.667814113597246e-05, "loss": 0.9251, "step": 930 }, { "epoch": 0.01, "grad_norm": 0.4921875, "learning_rate": 2.682157200229489e-05, "loss": 0.9888, "step": 935 }, { "epoch": 0.01, "grad_norm": 0.56640625, "learning_rate": 2.6965002868617328e-05, "loss": 0.9168, "step": 940 }, { "epoch": 0.01, "grad_norm": 0.51953125, "learning_rate": 2.7108433734939758e-05, "loss": 0.9026, "step": 945 }, { "epoch": 0.01, "grad_norm": 0.578125, "learning_rate": 2.7251864601262195e-05, "loss": 1.0875, "step": 950 }, { "epoch": 0.01, "grad_norm": 0.55078125, "learning_rate": 2.7395295467584625e-05, "loss": 0.8695, "step": 955 }, { "epoch": 0.01, "grad_norm": 0.4921875, "learning_rate": 2.7538726333907055e-05, "loss": 0.922, "step": 960 }, { "epoch": 0.01, "grad_norm": 0.470703125, "learning_rate": 2.768215720022949e-05, "loss": 1.0311, "step": 965 }, { "epoch": 0.01, "grad_norm": 0.5625, "learning_rate": 2.782558806655192e-05, "loss": 1.0643, "step": 970 }, { "epoch": 0.01, "grad_norm": 0.5078125, "learning_rate": 2.7969018932874358e-05, "loss": 1.128, "step": 975 }, { "epoch": 0.01, "grad_norm": 0.56640625, "learning_rate": 2.8112449799196788e-05, "loss": 1.0122, "step": 980 }, { "epoch": 0.01, "grad_norm": 0.466796875, "learning_rate": 2.8255880665519218e-05, "loss": 1.1364, "step": 985 }, { "epoch": 0.01, "grad_norm": 0.5078125, "learning_rate": 2.8399311531841655e-05, "loss": 1.0668, "step": 990 }, { "epoch": 0.01, "grad_norm": 0.6015625, "learning_rate": 2.8542742398164085e-05, "loss": 0.9467, "step": 995 }, { "epoch": 0.01, "grad_norm": 0.5, "learning_rate": 2.868617326448652e-05, "loss": 1.0225, "step": 1000 }, { "epoch": 0.01, "grad_norm": 0.54296875, "learning_rate": 2.882960413080895e-05, "loss": 0.9799, "step": 1005 }, { "epoch": 0.01, "grad_norm": 0.55859375, "learning_rate": 2.897303499713138e-05, "loss": 1.1023, "step": 1010 }, { "epoch": 0.01, "grad_norm": 0.546875, "learning_rate": 2.911646586345382e-05, "loss": 0.9273, "step": 1015 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 2.925989672977625e-05, "loss": 1.0199, "step": 1020 }, { "epoch": 0.01, "grad_norm": 0.48828125, "learning_rate": 2.9403327596098685e-05, "loss": 1.0596, "step": 1025 }, { "epoch": 0.01, "grad_norm": 0.51953125, "learning_rate": 2.9546758462421115e-05, "loss": 0.8337, "step": 1030 }, { "epoch": 0.01, "grad_norm": 0.57421875, "learning_rate": 2.9690189328743545e-05, "loss": 0.9971, "step": 1035 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 2.9833620195065982e-05, "loss": 0.9867, "step": 1040 }, { "epoch": 0.01, "grad_norm": 0.5, "learning_rate": 2.9977051061388412e-05, "loss": 0.8743, "step": 1045 }, { "epoch": 0.02, "grad_norm": 0.86328125, "learning_rate": 3.012048192771085e-05, "loss": 1.0844, "step": 1050 }, { "epoch": 0.02, "grad_norm": 0.490234375, "learning_rate": 3.026391279403328e-05, "loss": 0.9911, "step": 1055 }, { "epoch": 0.02, "grad_norm": 0.51171875, "learning_rate": 3.040734366035571e-05, "loss": 0.965, "step": 1060 }, { "epoch": 0.02, "grad_norm": 0.56640625, "learning_rate": 3.055077452667814e-05, "loss": 0.9537, "step": 1065 }, { "epoch": 0.02, "grad_norm": 0.53125, "learning_rate": 3.0694205393000576e-05, "loss": 0.9508, "step": 1070 }, { "epoch": 0.02, "grad_norm": 0.55859375, "learning_rate": 3.0837636259323e-05, "loss": 1.0471, "step": 1075 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 3.098106712564544e-05, "loss": 1.1665, "step": 1080 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 3.112449799196787e-05, "loss": 0.8957, "step": 1085 }, { "epoch": 0.02, "grad_norm": 0.53125, "learning_rate": 3.126792885829031e-05, "loss": 1.0339, "step": 1090 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 3.1411359724612736e-05, "loss": 0.9343, "step": 1095 }, { "epoch": 0.02, "grad_norm": 0.65625, "learning_rate": 3.155479059093517e-05, "loss": 1.0931, "step": 1100 }, { "epoch": 0.02, "grad_norm": 0.5546875, "learning_rate": 3.16982214572576e-05, "loss": 0.932, "step": 1105 }, { "epoch": 0.02, "grad_norm": 0.5, "learning_rate": 3.1841652323580036e-05, "loss": 0.97, "step": 1110 }, { "epoch": 0.02, "grad_norm": 0.51171875, "learning_rate": 3.198508318990247e-05, "loss": 1.0856, "step": 1115 }, { "epoch": 0.02, "grad_norm": 0.59765625, "learning_rate": 3.21285140562249e-05, "loss": 0.8973, "step": 1120 }, { "epoch": 0.02, "grad_norm": 0.53125, "learning_rate": 3.227194492254733e-05, "loss": 1.018, "step": 1125 }, { "epoch": 0.02, "grad_norm": 0.58203125, "learning_rate": 3.241537578886977e-05, "loss": 1.0977, "step": 1130 }, { "epoch": 0.02, "grad_norm": 0.53515625, "learning_rate": 3.2558806655192196e-05, "loss": 0.8744, "step": 1135 }, { "epoch": 0.02, "grad_norm": 0.478515625, "learning_rate": 3.2702237521514636e-05, "loss": 0.8979, "step": 1140 }, { "epoch": 0.02, "grad_norm": 0.578125, "learning_rate": 3.284566838783706e-05, "loss": 0.9762, "step": 1145 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 3.2989099254159496e-05, "loss": 0.9018, "step": 1150 }, { "epoch": 0.02, "grad_norm": 0.55859375, "learning_rate": 3.313253012048193e-05, "loss": 1.0398, "step": 1155 }, { "epoch": 0.02, "grad_norm": 0.51171875, "learning_rate": 3.327596098680436e-05, "loss": 1.0457, "step": 1160 }, { "epoch": 0.02, "grad_norm": 0.5390625, "learning_rate": 3.3419391853126796e-05, "loss": 0.984, "step": 1165 }, { "epoch": 0.02, "grad_norm": 0.478515625, "learning_rate": 3.356282271944923e-05, "loss": 0.8453, "step": 1170 }, { "epoch": 0.02, "grad_norm": 0.4921875, "learning_rate": 3.3706253585771656e-05, "loss": 1.1258, "step": 1175 }, { "epoch": 0.02, "grad_norm": 0.5078125, "learning_rate": 3.3849684452094096e-05, "loss": 0.9574, "step": 1180 }, { "epoch": 0.02, "grad_norm": 0.53125, "learning_rate": 3.399311531841652e-05, "loss": 0.9937, "step": 1185 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 3.413654618473896e-05, "loss": 1.0114, "step": 1190 }, { "epoch": 0.02, "grad_norm": 0.4609375, "learning_rate": 3.427997705106139e-05, "loss": 0.9564, "step": 1195 }, { "epoch": 0.02, "grad_norm": 0.515625, "learning_rate": 3.442340791738382e-05, "loss": 1.1259, "step": 1200 }, { "epoch": 0.02, "grad_norm": 0.466796875, "learning_rate": 3.456683878370626e-05, "loss": 0.9919, "step": 1205 }, { "epoch": 0.02, "grad_norm": 0.58203125, "learning_rate": 3.471026965002869e-05, "loss": 1.0561, "step": 1210 }, { "epoch": 0.02, "grad_norm": 0.474609375, "learning_rate": 3.485370051635112e-05, "loss": 0.9138, "step": 1215 }, { "epoch": 0.02, "grad_norm": 0.51171875, "learning_rate": 3.499713138267356e-05, "loss": 0.9386, "step": 1220 }, { "epoch": 0.02, "grad_norm": 0.48828125, "learning_rate": 3.5140562248995983e-05, "loss": 0.9422, "step": 1225 }, { "epoch": 0.02, "grad_norm": 0.482421875, "learning_rate": 3.528399311531842e-05, "loss": 0.9768, "step": 1230 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 3.542742398164085e-05, "loss": 1.1266, "step": 1235 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 3.5570854847963284e-05, "loss": 1.0733, "step": 1240 }, { "epoch": 0.02, "grad_norm": 0.5078125, "learning_rate": 3.571428571428572e-05, "loss": 1.001, "step": 1245 }, { "epoch": 0.02, "grad_norm": 0.52734375, "learning_rate": 3.585771658060815e-05, "loss": 0.8879, "step": 1250 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 3.6001147446930584e-05, "loss": 1.0183, "step": 1255 }, { "epoch": 0.02, "grad_norm": 0.435546875, "learning_rate": 3.614457831325301e-05, "loss": 0.9491, "step": 1260 }, { "epoch": 0.02, "grad_norm": 0.53125, "learning_rate": 3.6288009179575444e-05, "loss": 0.8943, "step": 1265 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 3.643144004589788e-05, "loss": 1.1007, "step": 1270 }, { "epoch": 0.02, "grad_norm": 0.48828125, "learning_rate": 3.657487091222031e-05, "loss": 0.9368, "step": 1275 }, { "epoch": 0.02, "grad_norm": 0.5078125, "learning_rate": 3.6718301778542744e-05, "loss": 0.9788, "step": 1280 }, { "epoch": 0.02, "grad_norm": 0.404296875, "learning_rate": 3.686173264486518e-05, "loss": 0.9342, "step": 1285 }, { "epoch": 0.02, "grad_norm": 0.5, "learning_rate": 3.7005163511187604e-05, "loss": 1.0328, "step": 1290 }, { "epoch": 0.02, "grad_norm": 0.380859375, "learning_rate": 3.7148594377510044e-05, "loss": 0.8514, "step": 1295 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 3.729202524383247e-05, "loss": 1.0245, "step": 1300 }, { "epoch": 0.02, "grad_norm": 0.51171875, "learning_rate": 3.743545611015491e-05, "loss": 0.9722, "step": 1305 }, { "epoch": 0.02, "grad_norm": 0.5546875, "learning_rate": 3.757888697647734e-05, "loss": 1.0261, "step": 1310 }, { "epoch": 0.02, "grad_norm": 0.5703125, "learning_rate": 3.772231784279977e-05, "loss": 0.9276, "step": 1315 }, { "epoch": 0.02, "grad_norm": 0.5, "learning_rate": 3.7865748709122204e-05, "loss": 1.0778, "step": 1320 }, { "epoch": 0.02, "grad_norm": 0.482421875, "learning_rate": 3.800917957544464e-05, "loss": 1.0856, "step": 1325 }, { "epoch": 0.02, "grad_norm": 0.4921875, "learning_rate": 3.815261044176707e-05, "loss": 0.928, "step": 1330 }, { "epoch": 0.02, "grad_norm": 0.53125, "learning_rate": 3.8296041308089504e-05, "loss": 0.9267, "step": 1335 }, { "epoch": 0.02, "grad_norm": 0.453125, "learning_rate": 3.843947217441193e-05, "loss": 0.9876, "step": 1340 }, { "epoch": 0.02, "grad_norm": 0.6015625, "learning_rate": 3.858290304073437e-05, "loss": 1.0872, "step": 1345 }, { "epoch": 0.02, "grad_norm": 0.4921875, "learning_rate": 3.87263339070568e-05, "loss": 0.8564, "step": 1350 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 3.886976477337924e-05, "loss": 1.1186, "step": 1355 }, { "epoch": 0.02, "grad_norm": 0.498046875, "learning_rate": 3.9013195639701665e-05, "loss": 0.9905, "step": 1360 }, { "epoch": 0.02, "grad_norm": 0.6015625, "learning_rate": 3.91566265060241e-05, "loss": 1.0624, "step": 1365 }, { "epoch": 0.02, "grad_norm": 0.5, "learning_rate": 3.930005737234653e-05, "loss": 0.9626, "step": 1370 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 3.9443488238668965e-05, "loss": 0.9534, "step": 1375 }, { "epoch": 0.02, "grad_norm": 0.515625, "learning_rate": 3.958691910499139e-05, "loss": 1.0367, "step": 1380 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 3.973034997131383e-05, "loss": 0.9886, "step": 1385 }, { "epoch": 0.02, "grad_norm": 0.4765625, "learning_rate": 3.987378083763626e-05, "loss": 1.1053, "step": 1390 }, { "epoch": 0.02, "grad_norm": 0.5, "learning_rate": 4.00172117039587e-05, "loss": 0.9696, "step": 1395 }, { "epoch": 0.02, "grad_norm": 0.62890625, "learning_rate": 4.0160642570281125e-05, "loss": 1.128, "step": 1400 }, { "epoch": 0.02, "grad_norm": 0.486328125, "learning_rate": 4.030407343660356e-05, "loss": 0.9621, "step": 1405 }, { "epoch": 0.02, "grad_norm": 0.49609375, "learning_rate": 4.044750430292599e-05, "loss": 0.9351, "step": 1410 }, { "epoch": 0.02, "grad_norm": 0.4140625, "learning_rate": 4.0590935169248425e-05, "loss": 0.9294, "step": 1415 }, { "epoch": 0.02, "grad_norm": 0.51171875, "learning_rate": 4.073436603557086e-05, "loss": 1.0204, "step": 1420 }, { "epoch": 0.02, "grad_norm": 0.5703125, "learning_rate": 4.087779690189329e-05, "loss": 1.0157, "step": 1425 }, { "epoch": 0.02, "grad_norm": 0.5, "learning_rate": 4.102122776821572e-05, "loss": 0.9373, "step": 1430 }, { "epoch": 0.02, "grad_norm": 0.546875, "learning_rate": 4.116465863453816e-05, "loss": 1.138, "step": 1435 }, { "epoch": 0.02, "grad_norm": 0.5, "learning_rate": 4.1308089500860585e-05, "loss": 0.8953, "step": 1440 }, { "epoch": 0.02, "grad_norm": 0.484375, "learning_rate": 4.1451520367183025e-05, "loss": 1.0174, "step": 1445 }, { "epoch": 0.02, "grad_norm": 0.55078125, "learning_rate": 4.159495123350545e-05, "loss": 0.9657, "step": 1450 }, { "epoch": 0.02, "grad_norm": 0.45703125, "learning_rate": 4.1738382099827885e-05, "loss": 1.029, "step": 1455 }, { "epoch": 0.02, "grad_norm": 0.515625, "learning_rate": 4.188181296615032e-05, "loss": 1.0178, "step": 1460 }, { "epoch": 0.02, "grad_norm": 0.5390625, "learning_rate": 4.202524383247275e-05, "loss": 0.9021, "step": 1465 }, { "epoch": 0.02, "grad_norm": 0.515625, "learning_rate": 4.2168674698795186e-05, "loss": 0.8547, "step": 1470 }, { "epoch": 0.02, "grad_norm": 0.490234375, "learning_rate": 4.231210556511762e-05, "loss": 0.9427, "step": 1475 }, { "epoch": 0.02, "grad_norm": 0.4765625, "learning_rate": 4.2455536431440046e-05, "loss": 0.9815, "step": 1480 }, { "epoch": 0.02, "grad_norm": 0.49609375, "learning_rate": 4.259896729776248e-05, "loss": 0.9458, "step": 1485 }, { "epoch": 0.02, "grad_norm": 0.48046875, "learning_rate": 4.274239816408491e-05, "loss": 1.0863, "step": 1490 }, { "epoch": 0.02, "grad_norm": 0.58203125, "learning_rate": 4.2885829030407346e-05, "loss": 0.9778, "step": 1495 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 4.302925989672978e-05, "loss": 1.0188, "step": 1500 }, { "epoch": 0.02, "grad_norm": 0.486328125, "learning_rate": 4.317269076305221e-05, "loss": 0.8671, "step": 1505 }, { "epoch": 0.02, "grad_norm": 0.46484375, "learning_rate": 4.3316121629374646e-05, "loss": 0.9179, "step": 1510 }, { "epoch": 0.02, "grad_norm": 0.47265625, "learning_rate": 4.345955249569707e-05, "loss": 1.0784, "step": 1515 }, { "epoch": 0.02, "grad_norm": 0.435546875, "learning_rate": 4.3602983362019506e-05, "loss": 0.9223, "step": 1520 }, { "epoch": 0.02, "grad_norm": 0.515625, "learning_rate": 4.374641422834194e-05, "loss": 1.0345, "step": 1525 }, { "epoch": 0.02, "grad_norm": 0.482421875, "learning_rate": 4.388984509466437e-05, "loss": 1.042, "step": 1530 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 4.4033275960986806e-05, "loss": 0.9997, "step": 1535 }, { "epoch": 0.02, "grad_norm": 0.52734375, "learning_rate": 4.417670682730924e-05, "loss": 1.0368, "step": 1540 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 4.4320137693631666e-05, "loss": 0.9121, "step": 1545 }, { "epoch": 0.02, "grad_norm": 0.484375, "learning_rate": 4.4463568559954106e-05, "loss": 0.9296, "step": 1550 }, { "epoch": 0.02, "grad_norm": 0.4296875, "learning_rate": 4.460699942627653e-05, "loss": 0.9765, "step": 1555 }, { "epoch": 0.02, "grad_norm": 0.48046875, "learning_rate": 4.475043029259897e-05, "loss": 1.0278, "step": 1560 }, { "epoch": 0.02, "grad_norm": 0.53125, "learning_rate": 4.48938611589214e-05, "loss": 1.0368, "step": 1565 }, { "epoch": 0.02, "grad_norm": 0.4921875, "learning_rate": 4.503729202524383e-05, "loss": 0.9063, "step": 1570 }, { "epoch": 0.02, "grad_norm": 0.53515625, "learning_rate": 4.5180722891566266e-05, "loss": 0.9922, "step": 1575 }, { "epoch": 0.02, "grad_norm": 0.45703125, "learning_rate": 4.53241537578887e-05, "loss": 0.9436, "step": 1580 }, { "epoch": 0.02, "grad_norm": 0.5078125, "learning_rate": 4.546758462421113e-05, "loss": 1.1216, "step": 1585 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 4.5611015490533566e-05, "loss": 1.095, "step": 1590 }, { "epoch": 0.02, "grad_norm": 0.474609375, "learning_rate": 4.575444635685599e-05, "loss": 1.0015, "step": 1595 }, { "epoch": 0.02, "grad_norm": 0.466796875, "learning_rate": 4.589787722317843e-05, "loss": 0.9483, "step": 1600 }, { "epoch": 0.02, "grad_norm": 0.490234375, "learning_rate": 4.604130808950086e-05, "loss": 0.9021, "step": 1605 }, { "epoch": 0.02, "grad_norm": 0.494140625, "learning_rate": 4.61847389558233e-05, "loss": 0.9578, "step": 1610 }, { "epoch": 0.02, "grad_norm": 0.484375, "learning_rate": 4.632816982214573e-05, "loss": 1.0509, "step": 1615 }, { "epoch": 0.02, "grad_norm": 0.51171875, "learning_rate": 4.647160068846816e-05, "loss": 0.9595, "step": 1620 }, { "epoch": 0.02, "grad_norm": 0.5078125, "learning_rate": 4.6615031554790593e-05, "loss": 1.0539, "step": 1625 }, { "epoch": 0.02, "grad_norm": 0.427734375, "learning_rate": 4.675846242111303e-05, "loss": 1.0104, "step": 1630 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 4.690189328743546e-05, "loss": 1.0165, "step": 1635 }, { "epoch": 0.02, "grad_norm": 0.478515625, "learning_rate": 4.7045324153757894e-05, "loss": 0.9361, "step": 1640 }, { "epoch": 0.02, "grad_norm": 0.458984375, "learning_rate": 4.718875502008032e-05, "loss": 0.928, "step": 1645 }, { "epoch": 0.02, "grad_norm": 0.470703125, "learning_rate": 4.733218588640276e-05, "loss": 0.9363, "step": 1650 }, { "epoch": 0.02, "grad_norm": 0.52734375, "learning_rate": 4.747561675272519e-05, "loss": 0.9143, "step": 1655 }, { "epoch": 0.02, "grad_norm": 0.48828125, "learning_rate": 4.761904761904762e-05, "loss": 0.9955, "step": 1660 }, { "epoch": 0.02, "grad_norm": 0.4921875, "learning_rate": 4.7762478485370054e-05, "loss": 0.9988, "step": 1665 }, { "epoch": 0.02, "grad_norm": 0.458984375, "learning_rate": 4.790590935169249e-05, "loss": 0.9563, "step": 1670 }, { "epoch": 0.02, "grad_norm": 0.5703125, "learning_rate": 4.804934021801492e-05, "loss": 1.0039, "step": 1675 }, { "epoch": 0.02, "grad_norm": 0.462890625, "learning_rate": 4.8192771084337354e-05, "loss": 1.1524, "step": 1680 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 4.833620195065978e-05, "loss": 1.0741, "step": 1685 }, { "epoch": 0.02, "grad_norm": 0.546875, "learning_rate": 4.847963281698222e-05, "loss": 0.9556, "step": 1690 }, { "epoch": 0.02, "grad_norm": 0.4375, "learning_rate": 4.862306368330465e-05, "loss": 0.8395, "step": 1695 }, { "epoch": 0.02, "grad_norm": 0.515625, "learning_rate": 4.876649454962709e-05, "loss": 1.0093, "step": 1700 }, { "epoch": 0.02, "grad_norm": 0.4375, "learning_rate": 4.8909925415949514e-05, "loss": 0.9998, "step": 1705 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 4.905335628227195e-05, "loss": 1.0079, "step": 1710 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 4.919678714859438e-05, "loss": 0.9187, "step": 1715 }, { "epoch": 0.02, "grad_norm": 0.4375, "learning_rate": 4.9340218014916814e-05, "loss": 1.0315, "step": 1720 }, { "epoch": 0.02, "grad_norm": 0.474609375, "learning_rate": 4.948364888123925e-05, "loss": 0.833, "step": 1725 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 4.962707974756168e-05, "loss": 1.0263, "step": 1730 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 4.977051061388411e-05, "loss": 0.9684, "step": 1735 }, { "epoch": 0.02, "grad_norm": 0.4921875, "learning_rate": 4.991394148020654e-05, "loss": 0.969, "step": 1740 }, { "epoch": 0.03, "grad_norm": 0.474609375, "learning_rate": 5.0057372346528974e-05, "loss": 1.0403, "step": 1745 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 5.020080321285141e-05, "loss": 0.9688, "step": 1750 }, { "epoch": 0.03, "grad_norm": 0.47265625, "learning_rate": 5.0344234079173834e-05, "loss": 0.928, "step": 1755 }, { "epoch": 0.03, "grad_norm": 0.478515625, "learning_rate": 5.0487664945496275e-05, "loss": 0.8397, "step": 1760 }, { "epoch": 0.03, "grad_norm": 0.494140625, "learning_rate": 5.063109581181871e-05, "loss": 1.1001, "step": 1765 }, { "epoch": 0.03, "grad_norm": 0.53515625, "learning_rate": 5.077452667814114e-05, "loss": 0.9803, "step": 1770 }, { "epoch": 0.03, "grad_norm": 0.4453125, "learning_rate": 5.091795754446357e-05, "loss": 1.1502, "step": 1775 }, { "epoch": 0.03, "grad_norm": 0.48046875, "learning_rate": 5.1061388410786e-05, "loss": 0.9695, "step": 1780 }, { "epoch": 0.03, "grad_norm": 0.5078125, "learning_rate": 5.120481927710844e-05, "loss": 0.969, "step": 1785 }, { "epoch": 0.03, "grad_norm": 0.48828125, "learning_rate": 5.134825014343087e-05, "loss": 1.1124, "step": 1790 }, { "epoch": 0.03, "grad_norm": 0.515625, "learning_rate": 5.14916810097533e-05, "loss": 0.9795, "step": 1795 }, { "epoch": 0.03, "grad_norm": 0.53515625, "learning_rate": 5.1635111876075735e-05, "loss": 1.0894, "step": 1800 }, { "epoch": 0.03, "grad_norm": 0.48828125, "learning_rate": 5.177854274239816e-05, "loss": 0.9983, "step": 1805 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 5.1921973608720595e-05, "loss": 0.9976, "step": 1810 }, { "epoch": 0.03, "grad_norm": 0.474609375, "learning_rate": 5.2065404475043035e-05, "loss": 1.0442, "step": 1815 }, { "epoch": 0.03, "grad_norm": 0.5, "learning_rate": 5.220883534136547e-05, "loss": 1.1065, "step": 1820 }, { "epoch": 0.03, "grad_norm": 0.458984375, "learning_rate": 5.2352266207687895e-05, "loss": 0.9084, "step": 1825 }, { "epoch": 0.03, "grad_norm": 0.45703125, "learning_rate": 5.249569707401033e-05, "loss": 1.0342, "step": 1830 }, { "epoch": 0.03, "grad_norm": 0.58203125, "learning_rate": 5.263912794033277e-05, "loss": 1.0483, "step": 1835 }, { "epoch": 0.03, "grad_norm": 0.439453125, "learning_rate": 5.278255880665519e-05, "loss": 0.9213, "step": 1840 }, { "epoch": 0.03, "grad_norm": 0.451171875, "learning_rate": 5.292598967297763e-05, "loss": 1.0645, "step": 1845 }, { "epoch": 0.03, "grad_norm": 0.6171875, "learning_rate": 5.306942053930006e-05, "loss": 1.0508, "step": 1850 }, { "epoch": 0.03, "grad_norm": 0.427734375, "learning_rate": 5.321285140562249e-05, "loss": 0.8046, "step": 1855 }, { "epoch": 0.03, "grad_norm": 0.50390625, "learning_rate": 5.335628227194492e-05, "loss": 0.8508, "step": 1860 }, { "epoch": 0.03, "grad_norm": 0.4765625, "learning_rate": 5.349971313826736e-05, "loss": 1.0346, "step": 1865 }, { "epoch": 0.03, "grad_norm": 0.4375, "learning_rate": 5.364314400458978e-05, "loss": 0.8905, "step": 1870 }, { "epoch": 0.03, "grad_norm": 0.4921875, "learning_rate": 5.378657487091222e-05, "loss": 1.133, "step": 1875 }, { "epoch": 0.03, "grad_norm": 0.435546875, "learning_rate": 5.3930005737234656e-05, "loss": 1.1266, "step": 1880 }, { "epoch": 0.03, "grad_norm": 0.4140625, "learning_rate": 5.4073436603557096e-05, "loss": 1.0006, "step": 1885 }, { "epoch": 0.03, "grad_norm": 0.451171875, "learning_rate": 5.4216867469879516e-05, "loss": 0.8921, "step": 1890 }, { "epoch": 0.03, "grad_norm": 0.5078125, "learning_rate": 5.4360298336201956e-05, "loss": 1.0013, "step": 1895 }, { "epoch": 0.03, "grad_norm": 0.49609375, "learning_rate": 5.450372920252439e-05, "loss": 0.9136, "step": 1900 }, { "epoch": 0.03, "grad_norm": 0.44921875, "learning_rate": 5.4647160068846816e-05, "loss": 0.8111, "step": 1905 }, { "epoch": 0.03, "grad_norm": 0.4765625, "learning_rate": 5.479059093516925e-05, "loss": 0.9691, "step": 1910 }, { "epoch": 0.03, "grad_norm": 0.359375, "learning_rate": 5.493402180149169e-05, "loss": 0.8102, "step": 1915 }, { "epoch": 0.03, "grad_norm": 0.47265625, "learning_rate": 5.507745266781411e-05, "loss": 0.9446, "step": 1920 }, { "epoch": 0.03, "grad_norm": 0.5234375, "learning_rate": 5.522088353413655e-05, "loss": 0.9894, "step": 1925 }, { "epoch": 0.03, "grad_norm": 0.48046875, "learning_rate": 5.536431440045898e-05, "loss": 0.9952, "step": 1930 }, { "epoch": 0.03, "grad_norm": 0.482421875, "learning_rate": 5.5507745266781416e-05, "loss": 0.8921, "step": 1935 }, { "epoch": 0.03, "grad_norm": 0.50390625, "learning_rate": 5.565117613310384e-05, "loss": 1.0289, "step": 1940 }, { "epoch": 0.03, "grad_norm": 0.5078125, "learning_rate": 5.579460699942628e-05, "loss": 1.0987, "step": 1945 }, { "epoch": 0.03, "grad_norm": 0.455078125, "learning_rate": 5.5938037865748716e-05, "loss": 0.8982, "step": 1950 }, { "epoch": 0.03, "grad_norm": 0.50390625, "learning_rate": 5.608146873207114e-05, "loss": 0.9589, "step": 1955 }, { "epoch": 0.03, "grad_norm": 0.427734375, "learning_rate": 5.6224899598393576e-05, "loss": 1.0076, "step": 1960 }, { "epoch": 0.03, "grad_norm": 0.4765625, "learning_rate": 5.636833046471601e-05, "loss": 1.0098, "step": 1965 }, { "epoch": 0.03, "grad_norm": 0.4140625, "learning_rate": 5.6511761331038436e-05, "loss": 0.9846, "step": 1970 }, { "epoch": 0.03, "grad_norm": 0.5078125, "learning_rate": 5.6655192197360876e-05, "loss": 1.0777, "step": 1975 }, { "epoch": 0.03, "grad_norm": 0.421875, "learning_rate": 5.679862306368331e-05, "loss": 0.9375, "step": 1980 }, { "epoch": 0.03, "grad_norm": 0.48046875, "learning_rate": 5.694205393000574e-05, "loss": 1.0631, "step": 1985 }, { "epoch": 0.03, "grad_norm": 0.4609375, "learning_rate": 5.708548479632817e-05, "loss": 0.9297, "step": 1990 }, { "epoch": 0.03, "grad_norm": 0.4140625, "learning_rate": 5.72289156626506e-05, "loss": 0.7953, "step": 1995 }, { "epoch": 0.03, "grad_norm": 0.46875, "learning_rate": 5.737234652897304e-05, "loss": 1.0055, "step": 2000 }, { "epoch": 0.03, "grad_norm": 0.435546875, "learning_rate": 5.751577739529547e-05, "loss": 1.0526, "step": 2005 }, { "epoch": 0.03, "grad_norm": 0.515625, "learning_rate": 5.76592082616179e-05, "loss": 0.9813, "step": 2010 }, { "epoch": 0.03, "grad_norm": 0.47265625, "learning_rate": 5.780263912794034e-05, "loss": 1.0185, "step": 2015 }, { "epoch": 0.03, "grad_norm": 0.474609375, "learning_rate": 5.794606999426276e-05, "loss": 0.9883, "step": 2020 }, { "epoch": 0.03, "grad_norm": 0.412109375, "learning_rate": 5.80895008605852e-05, "loss": 0.9408, "step": 2025 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 5.823293172690764e-05, "loss": 1.0352, "step": 2030 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 5.8376362593230063e-05, "loss": 0.8128, "step": 2035 }, { "epoch": 0.03, "grad_norm": 0.45703125, "learning_rate": 5.85197934595525e-05, "loss": 1.0551, "step": 2040 }, { "epoch": 0.03, "grad_norm": 0.44921875, "learning_rate": 5.866322432587493e-05, "loss": 0.9347, "step": 2045 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 5.880665519219737e-05, "loss": 0.8477, "step": 2050 }, { "epoch": 0.03, "grad_norm": 0.57421875, "learning_rate": 5.895008605851979e-05, "loss": 0.9384, "step": 2055 }, { "epoch": 0.03, "grad_norm": 0.478515625, "learning_rate": 5.909351692484223e-05, "loss": 1.0149, "step": 2060 }, { "epoch": 0.03, "grad_norm": 0.439453125, "learning_rate": 5.9236947791164664e-05, "loss": 1.0872, "step": 2065 }, { "epoch": 0.03, "grad_norm": 0.443359375, "learning_rate": 5.938037865748709e-05, "loss": 0.8893, "step": 2070 }, { "epoch": 0.03, "grad_norm": 0.40625, "learning_rate": 5.9523809523809524e-05, "loss": 0.9899, "step": 2075 }, { "epoch": 0.03, "grad_norm": 0.447265625, "learning_rate": 5.9667240390131964e-05, "loss": 0.9359, "step": 2080 }, { "epoch": 0.03, "grad_norm": 0.4296875, "learning_rate": 5.9810671256454384e-05, "loss": 0.9945, "step": 2085 }, { "epoch": 0.03, "grad_norm": 0.486328125, "learning_rate": 5.9954102122776824e-05, "loss": 0.8843, "step": 2090 }, { "epoch": 0.03, "grad_norm": 0.423828125, "learning_rate": 6.009753298909926e-05, "loss": 0.9516, "step": 2095 }, { "epoch": 0.03, "grad_norm": 0.484375, "learning_rate": 6.02409638554217e-05, "loss": 0.9624, "step": 2100 }, { "epoch": 0.03, "grad_norm": 0.43359375, "learning_rate": 6.038439472174412e-05, "loss": 0.8447, "step": 2105 }, { "epoch": 0.03, "grad_norm": 0.53515625, "learning_rate": 6.052782558806656e-05, "loss": 0.979, "step": 2110 }, { "epoch": 0.03, "grad_norm": 0.46875, "learning_rate": 6.067125645438899e-05, "loss": 0.9816, "step": 2115 }, { "epoch": 0.03, "grad_norm": 0.453125, "learning_rate": 6.081468732071142e-05, "loss": 1.0292, "step": 2120 }, { "epoch": 0.03, "grad_norm": 0.408203125, "learning_rate": 6.095811818703385e-05, "loss": 1.0244, "step": 2125 }, { "epoch": 0.03, "grad_norm": 0.484375, "learning_rate": 6.110154905335628e-05, "loss": 1.1965, "step": 2130 }, { "epoch": 0.03, "grad_norm": 0.4296875, "learning_rate": 6.124497991967871e-05, "loss": 0.9508, "step": 2135 }, { "epoch": 0.03, "grad_norm": 0.421875, "learning_rate": 6.138841078600115e-05, "loss": 1.2246, "step": 2140 }, { "epoch": 0.03, "grad_norm": 0.3984375, "learning_rate": 6.153184165232359e-05, "loss": 0.9361, "step": 2145 }, { "epoch": 0.03, "grad_norm": 0.46484375, "learning_rate": 6.1675272518646e-05, "loss": 1.0158, "step": 2150 }, { "epoch": 0.03, "grad_norm": 0.47265625, "learning_rate": 6.181870338496844e-05, "loss": 0.885, "step": 2155 }, { "epoch": 0.03, "grad_norm": 0.458984375, "learning_rate": 6.196213425129088e-05, "loss": 0.8979, "step": 2160 }, { "epoch": 0.03, "grad_norm": 0.54296875, "learning_rate": 6.210556511761332e-05, "loss": 1.0512, "step": 2165 }, { "epoch": 0.03, "grad_norm": 0.404296875, "learning_rate": 6.224899598393574e-05, "loss": 0.9623, "step": 2170 }, { "epoch": 0.03, "grad_norm": 0.486328125, "learning_rate": 6.239242685025818e-05, "loss": 0.8594, "step": 2175 }, { "epoch": 0.03, "grad_norm": 0.4453125, "learning_rate": 6.253585771658062e-05, "loss": 0.9633, "step": 2180 }, { "epoch": 0.03, "grad_norm": 0.470703125, "learning_rate": 6.267928858290304e-05, "loss": 0.9947, "step": 2185 }, { "epoch": 0.03, "grad_norm": 0.4609375, "learning_rate": 6.282271944922547e-05, "loss": 0.8975, "step": 2190 }, { "epoch": 0.03, "grad_norm": 0.41796875, "learning_rate": 6.296615031554791e-05, "loss": 0.9396, "step": 2195 }, { "epoch": 0.03, "grad_norm": 0.490234375, "learning_rate": 6.310958118187034e-05, "loss": 1.024, "step": 2200 }, { "epoch": 0.03, "grad_norm": 0.39453125, "learning_rate": 6.325301204819278e-05, "loss": 1.0322, "step": 2205 }, { "epoch": 0.03, "grad_norm": 0.5, "learning_rate": 6.33964429145152e-05, "loss": 0.9946, "step": 2210 }, { "epoch": 0.03, "grad_norm": 0.421875, "learning_rate": 6.353987378083765e-05, "loss": 1.0883, "step": 2215 }, { "epoch": 0.03, "grad_norm": 0.455078125, "learning_rate": 6.368330464716007e-05, "loss": 1.0424, "step": 2220 }, { "epoch": 0.03, "grad_norm": 0.44140625, "learning_rate": 6.382673551348251e-05, "loss": 0.8524, "step": 2225 }, { "epoch": 0.03, "grad_norm": 0.40234375, "learning_rate": 6.397016637980494e-05, "loss": 0.9997, "step": 2230 }, { "epoch": 0.03, "grad_norm": 0.5546875, "learning_rate": 6.411359724612737e-05, "loss": 0.824, "step": 2235 }, { "epoch": 0.03, "grad_norm": 0.435546875, "learning_rate": 6.42570281124498e-05, "loss": 1.0312, "step": 2240 }, { "epoch": 0.03, "grad_norm": 0.451171875, "learning_rate": 6.440045897877223e-05, "loss": 0.92, "step": 2245 }, { "epoch": 0.03, "grad_norm": 0.423828125, "learning_rate": 6.454388984509466e-05, "loss": 0.879, "step": 2250 }, { "epoch": 0.03, "grad_norm": 0.451171875, "learning_rate": 6.46873207114171e-05, "loss": 1.0229, "step": 2255 }, { "epoch": 0.03, "grad_norm": 0.3828125, "learning_rate": 6.483075157773954e-05, "loss": 1.0514, "step": 2260 }, { "epoch": 0.03, "grad_norm": 0.408203125, "learning_rate": 6.497418244406197e-05, "loss": 0.9588, "step": 2265 }, { "epoch": 0.03, "grad_norm": 0.470703125, "learning_rate": 6.511761331038439e-05, "loss": 1.0641, "step": 2270 }, { "epoch": 0.03, "grad_norm": 0.486328125, "learning_rate": 6.526104417670683e-05, "loss": 0.9099, "step": 2275 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 6.540447504302927e-05, "loss": 1.0107, "step": 2280 }, { "epoch": 0.03, "grad_norm": 0.48046875, "learning_rate": 6.55479059093517e-05, "loss": 1.0007, "step": 2285 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 6.569133677567413e-05, "loss": 1.0989, "step": 2290 }, { "epoch": 0.03, "grad_norm": 0.45703125, "learning_rate": 6.583476764199657e-05, "loss": 1.0797, "step": 2295 }, { "epoch": 0.03, "grad_norm": 0.466796875, "learning_rate": 6.597819850831899e-05, "loss": 0.8466, "step": 2300 }, { "epoch": 0.03, "grad_norm": 0.447265625, "learning_rate": 6.612162937464142e-05, "loss": 1.0248, "step": 2305 }, { "epoch": 0.03, "grad_norm": 0.447265625, "learning_rate": 6.626506024096386e-05, "loss": 0.9221, "step": 2310 }, { "epoch": 0.03, "grad_norm": 0.494140625, "learning_rate": 6.640849110728629e-05, "loss": 0.8596, "step": 2315 }, { "epoch": 0.03, "grad_norm": 0.4296875, "learning_rate": 6.655192197360873e-05, "loss": 1.0407, "step": 2320 }, { "epoch": 0.03, "grad_norm": 0.494140625, "learning_rate": 6.669535283993115e-05, "loss": 1.082, "step": 2325 }, { "epoch": 0.03, "grad_norm": 0.4453125, "learning_rate": 6.683878370625359e-05, "loss": 1.0036, "step": 2330 }, { "epoch": 0.03, "grad_norm": 0.4296875, "learning_rate": 6.698221457257602e-05, "loss": 0.9212, "step": 2335 }, { "epoch": 0.03, "grad_norm": 0.44921875, "learning_rate": 6.712564543889846e-05, "loss": 1.2177, "step": 2340 }, { "epoch": 0.03, "grad_norm": 0.419921875, "learning_rate": 6.726907630522089e-05, "loss": 0.9034, "step": 2345 }, { "epoch": 0.03, "grad_norm": 0.40625, "learning_rate": 6.741250717154331e-05, "loss": 0.8041, "step": 2350 }, { "epoch": 0.03, "grad_norm": 0.46484375, "learning_rate": 6.755593803786575e-05, "loss": 1.0537, "step": 2355 }, { "epoch": 0.03, "grad_norm": 0.4375, "learning_rate": 6.769936890418819e-05, "loss": 1.0058, "step": 2360 }, { "epoch": 0.03, "grad_norm": 0.671875, "learning_rate": 6.78427997705106e-05, "loss": 0.8552, "step": 2365 }, { "epoch": 0.03, "grad_norm": 0.443359375, "learning_rate": 6.798623063683305e-05, "loss": 0.9354, "step": 2370 }, { "epoch": 0.03, "grad_norm": 0.466796875, "learning_rate": 6.812966150315549e-05, "loss": 0.9095, "step": 2375 }, { "epoch": 0.03, "grad_norm": 0.41015625, "learning_rate": 6.827309236947793e-05, "loss": 0.9488, "step": 2380 }, { "epoch": 0.03, "grad_norm": 0.515625, "learning_rate": 6.841652323580034e-05, "loss": 0.8641, "step": 2385 }, { "epoch": 0.03, "grad_norm": 0.5078125, "learning_rate": 6.855995410212278e-05, "loss": 1.0436, "step": 2390 }, { "epoch": 0.03, "grad_norm": 0.474609375, "learning_rate": 6.870338496844522e-05, "loss": 0.9638, "step": 2395 }, { "epoch": 0.03, "grad_norm": 0.43359375, "learning_rate": 6.884681583476765e-05, "loss": 0.9359, "step": 2400 }, { "epoch": 0.03, "grad_norm": 0.43359375, "learning_rate": 6.899024670109007e-05, "loss": 1.011, "step": 2405 }, { "epoch": 0.03, "grad_norm": 0.470703125, "learning_rate": 6.913367756741251e-05, "loss": 1.0113, "step": 2410 }, { "epoch": 0.03, "grad_norm": 0.4296875, "learning_rate": 6.927710843373494e-05, "loss": 1.056, "step": 2415 }, { "epoch": 0.03, "grad_norm": 0.474609375, "learning_rate": 6.942053930005738e-05, "loss": 0.9155, "step": 2420 }, { "epoch": 0.03, "grad_norm": 0.51171875, "learning_rate": 6.95639701663798e-05, "loss": 0.9205, "step": 2425 }, { "epoch": 0.03, "grad_norm": 0.423828125, "learning_rate": 6.970740103270223e-05, "loss": 0.9412, "step": 2430 }, { "epoch": 0.03, "grad_norm": 0.455078125, "learning_rate": 6.985083189902467e-05, "loss": 1.0245, "step": 2435 }, { "epoch": 0.04, "grad_norm": 0.421875, "learning_rate": 6.999426276534711e-05, "loss": 0.9259, "step": 2440 }, { "epoch": 0.04, "grad_norm": 0.392578125, "learning_rate": 7.013769363166954e-05, "loss": 0.8526, "step": 2445 }, { "epoch": 0.04, "grad_norm": 0.447265625, "learning_rate": 7.028112449799197e-05, "loss": 0.8853, "step": 2450 }, { "epoch": 0.04, "grad_norm": 0.435546875, "learning_rate": 7.042455536431441e-05, "loss": 0.9233, "step": 2455 }, { "epoch": 0.04, "grad_norm": 0.421875, "learning_rate": 7.056798623063683e-05, "loss": 0.8997, "step": 2460 }, { "epoch": 0.04, "grad_norm": 0.486328125, "learning_rate": 7.071141709695926e-05, "loss": 1.0118, "step": 2465 }, { "epoch": 0.04, "grad_norm": 0.40234375, "learning_rate": 7.08548479632817e-05, "loss": 1.0767, "step": 2470 }, { "epoch": 0.04, "grad_norm": 0.5234375, "learning_rate": 7.099827882960414e-05, "loss": 0.9788, "step": 2475 }, { "epoch": 0.04, "grad_norm": 0.46484375, "learning_rate": 7.114170969592657e-05, "loss": 0.9593, "step": 2480 }, { "epoch": 0.04, "grad_norm": 0.466796875, "learning_rate": 7.1285140562249e-05, "loss": 0.9277, "step": 2485 }, { "epoch": 0.04, "grad_norm": 0.4453125, "learning_rate": 7.142857142857143e-05, "loss": 1.0196, "step": 2490 }, { "epoch": 0.04, "grad_norm": 0.486328125, "learning_rate": 7.157200229489387e-05, "loss": 0.9658, "step": 2495 }, { "epoch": 0.04, "grad_norm": 0.44921875, "learning_rate": 7.17154331612163e-05, "loss": 0.986, "step": 2500 }, { "epoch": 0.04, "grad_norm": 0.412109375, "learning_rate": 7.185886402753873e-05, "loss": 1.0481, "step": 2505 }, { "epoch": 0.04, "grad_norm": 0.470703125, "learning_rate": 7.200229489386117e-05, "loss": 1.1599, "step": 2510 }, { "epoch": 0.04, "grad_norm": 0.408203125, "learning_rate": 7.21457257601836e-05, "loss": 0.8659, "step": 2515 }, { "epoch": 0.04, "grad_norm": 0.4765625, "learning_rate": 7.228915662650602e-05, "loss": 1.0566, "step": 2520 }, { "epoch": 0.04, "grad_norm": 0.41796875, "learning_rate": 7.243258749282846e-05, "loss": 0.8413, "step": 2525 }, { "epoch": 0.04, "grad_norm": 0.4296875, "learning_rate": 7.257601835915089e-05, "loss": 0.9329, "step": 2530 }, { "epoch": 0.04, "grad_norm": 0.484375, "learning_rate": 7.271944922547333e-05, "loss": 0.9888, "step": 2535 }, { "epoch": 0.04, "grad_norm": 0.408203125, "learning_rate": 7.286288009179575e-05, "loss": 0.8587, "step": 2540 }, { "epoch": 0.04, "grad_norm": 0.453125, "learning_rate": 7.30063109581182e-05, "loss": 0.962, "step": 2545 }, { "epoch": 0.04, "grad_norm": 0.447265625, "learning_rate": 7.314974182444062e-05, "loss": 0.9628, "step": 2550 }, { "epoch": 0.04, "grad_norm": 0.392578125, "learning_rate": 7.329317269076306e-05, "loss": 1.0271, "step": 2555 }, { "epoch": 0.04, "grad_norm": 0.40234375, "learning_rate": 7.343660355708549e-05, "loss": 0.8804, "step": 2560 }, { "epoch": 0.04, "grad_norm": 0.380859375, "learning_rate": 7.358003442340791e-05, "loss": 0.8014, "step": 2565 }, { "epoch": 0.04, "grad_norm": 0.416015625, "learning_rate": 7.372346528973035e-05, "loss": 0.9217, "step": 2570 }, { "epoch": 0.04, "grad_norm": 0.451171875, "learning_rate": 7.38668961560528e-05, "loss": 0.7977, "step": 2575 }, { "epoch": 0.04, "grad_norm": 0.484375, "learning_rate": 7.401032702237521e-05, "loss": 1.0367, "step": 2580 }, { "epoch": 0.04, "grad_norm": 0.4921875, "learning_rate": 7.415375788869765e-05, "loss": 0.9485, "step": 2585 }, { "epoch": 0.04, "grad_norm": 0.439453125, "learning_rate": 7.429718875502009e-05, "loss": 1.1512, "step": 2590 }, { "epoch": 0.04, "grad_norm": 0.41015625, "learning_rate": 7.444061962134251e-05, "loss": 0.7874, "step": 2595 }, { "epoch": 0.04, "grad_norm": 0.43359375, "learning_rate": 7.458405048766494e-05, "loss": 1.0102, "step": 2600 }, { "epoch": 0.04, "grad_norm": 0.404296875, "learning_rate": 7.472748135398738e-05, "loss": 1.0031, "step": 2605 }, { "epoch": 0.04, "grad_norm": 0.447265625, "learning_rate": 7.487091222030982e-05, "loss": 0.9819, "step": 2610 }, { "epoch": 0.04, "grad_norm": 0.46875, "learning_rate": 7.501434308663225e-05, "loss": 1.0839, "step": 2615 }, { "epoch": 0.04, "grad_norm": 0.59375, "learning_rate": 7.515777395295467e-05, "loss": 1.0095, "step": 2620 }, { "epoch": 0.04, "grad_norm": 0.44140625, "learning_rate": 7.530120481927712e-05, "loss": 0.87, "step": 2625 }, { "epoch": 0.04, "grad_norm": 0.4609375, "learning_rate": 7.544463568559954e-05, "loss": 0.9675, "step": 2630 }, { "epoch": 0.04, "grad_norm": 0.423828125, "learning_rate": 7.558806655192198e-05, "loss": 0.9699, "step": 2635 }, { "epoch": 0.04, "grad_norm": 0.439453125, "learning_rate": 7.573149741824441e-05, "loss": 1.1126, "step": 2640 }, { "epoch": 0.04, "grad_norm": 0.400390625, "learning_rate": 7.587492828456684e-05, "loss": 0.8104, "step": 2645 }, { "epoch": 0.04, "grad_norm": 0.443359375, "learning_rate": 7.601835915088928e-05, "loss": 0.951, "step": 2650 }, { "epoch": 0.04, "grad_norm": 0.39453125, "learning_rate": 7.616179001721172e-05, "loss": 1.0582, "step": 2655 }, { "epoch": 0.04, "grad_norm": 0.4765625, "learning_rate": 7.630522088353414e-05, "loss": 1.0965, "step": 2660 }, { "epoch": 0.04, "grad_norm": 0.439453125, "learning_rate": 7.644865174985657e-05, "loss": 0.9515, "step": 2665 }, { "epoch": 0.04, "grad_norm": 0.45703125, "learning_rate": 7.659208261617901e-05, "loss": 1.0292, "step": 2670 }, { "epoch": 0.04, "grad_norm": 0.443359375, "learning_rate": 7.673551348250144e-05, "loss": 1.0915, "step": 2675 }, { "epoch": 0.04, "grad_norm": 0.4140625, "learning_rate": 7.687894434882386e-05, "loss": 1.055, "step": 2680 }, { "epoch": 0.04, "grad_norm": 0.52734375, "learning_rate": 7.70223752151463e-05, "loss": 1.0428, "step": 2685 }, { "epoch": 0.04, "grad_norm": 0.4375, "learning_rate": 7.716580608146874e-05, "loss": 0.9814, "step": 2690 }, { "epoch": 0.04, "grad_norm": 0.470703125, "learning_rate": 7.730923694779117e-05, "loss": 0.8716, "step": 2695 }, { "epoch": 0.04, "grad_norm": 0.4375, "learning_rate": 7.74526678141136e-05, "loss": 0.8861, "step": 2700 }, { "epoch": 0.04, "grad_norm": 0.466796875, "learning_rate": 7.759609868043604e-05, "loss": 1.0297, "step": 2705 }, { "epoch": 0.04, "grad_norm": 0.435546875, "learning_rate": 7.773952954675848e-05, "loss": 0.9579, "step": 2710 }, { "epoch": 0.04, "grad_norm": 0.482421875, "learning_rate": 7.78829604130809e-05, "loss": 0.9681, "step": 2715 }, { "epoch": 0.04, "grad_norm": 0.400390625, "learning_rate": 7.802639127940333e-05, "loss": 0.829, "step": 2720 }, { "epoch": 0.04, "grad_norm": 0.388671875, "learning_rate": 7.816982214572577e-05, "loss": 0.9794, "step": 2725 }, { "epoch": 0.04, "grad_norm": 0.46484375, "learning_rate": 7.83132530120482e-05, "loss": 1.0554, "step": 2730 }, { "epoch": 0.04, "grad_norm": 0.41796875, "learning_rate": 7.845668387837064e-05, "loss": 0.8973, "step": 2735 }, { "epoch": 0.04, "grad_norm": 0.42578125, "learning_rate": 7.860011474469306e-05, "loss": 1.0593, "step": 2740 }, { "epoch": 0.04, "grad_norm": 0.4453125, "learning_rate": 7.874354561101549e-05, "loss": 1.0436, "step": 2745 }, { "epoch": 0.04, "grad_norm": 0.462890625, "learning_rate": 7.888697647733793e-05, "loss": 1.091, "step": 2750 }, { "epoch": 0.04, "grad_norm": 0.443359375, "learning_rate": 7.903040734366036e-05, "loss": 1.1119, "step": 2755 }, { "epoch": 0.04, "grad_norm": 0.43359375, "learning_rate": 7.917383820998278e-05, "loss": 1.0251, "step": 2760 }, { "epoch": 0.04, "grad_norm": 0.4453125, "learning_rate": 7.931726907630522e-05, "loss": 0.888, "step": 2765 }, { "epoch": 0.04, "grad_norm": 0.458984375, "learning_rate": 7.946069994262766e-05, "loss": 0.9257, "step": 2770 }, { "epoch": 0.04, "grad_norm": 0.48046875, "learning_rate": 7.960413080895009e-05, "loss": 0.997, "step": 2775 }, { "epoch": 0.04, "grad_norm": 0.376953125, "learning_rate": 7.974756167527252e-05, "loss": 0.9598, "step": 2780 }, { "epoch": 0.04, "grad_norm": 0.443359375, "learning_rate": 7.989099254159496e-05, "loss": 0.8065, "step": 2785 }, { "epoch": 0.04, "grad_norm": 0.43359375, "learning_rate": 8.00344234079174e-05, "loss": 0.9678, "step": 2790 }, { "epoch": 0.04, "grad_norm": 0.38671875, "learning_rate": 8.017785427423982e-05, "loss": 0.9295, "step": 2795 }, { "epoch": 0.04, "grad_norm": 0.45703125, "learning_rate": 8.032128514056225e-05, "loss": 0.8496, "step": 2800 }, { "epoch": 0.04, "grad_norm": 0.453125, "learning_rate": 8.046471600688469e-05, "loss": 0.9453, "step": 2805 }, { "epoch": 0.04, "grad_norm": 0.5625, "learning_rate": 8.060814687320712e-05, "loss": 0.9759, "step": 2810 }, { "epoch": 0.04, "grad_norm": 0.44140625, "learning_rate": 8.075157773952954e-05, "loss": 0.8887, "step": 2815 }, { "epoch": 0.04, "grad_norm": 0.408203125, "learning_rate": 8.089500860585198e-05, "loss": 0.9728, "step": 2820 }, { "epoch": 0.04, "grad_norm": 0.40625, "learning_rate": 8.103843947217442e-05, "loss": 0.8913, "step": 2825 }, { "epoch": 0.04, "grad_norm": 0.365234375, "learning_rate": 8.118187033849685e-05, "loss": 0.9973, "step": 2830 }, { "epoch": 0.04, "grad_norm": 0.412109375, "learning_rate": 8.132530120481928e-05, "loss": 1.0904, "step": 2835 }, { "epoch": 0.04, "grad_norm": 0.43359375, "learning_rate": 8.146873207114172e-05, "loss": 0.8934, "step": 2840 }, { "epoch": 0.04, "grad_norm": 0.458984375, "learning_rate": 8.161216293746414e-05, "loss": 1.0938, "step": 2845 }, { "epoch": 0.04, "grad_norm": 0.41796875, "learning_rate": 8.175559380378658e-05, "loss": 0.9017, "step": 2850 }, { "epoch": 0.04, "grad_norm": 0.470703125, "learning_rate": 8.189902467010901e-05, "loss": 1.0773, "step": 2855 }, { "epoch": 0.04, "grad_norm": 0.39453125, "learning_rate": 8.204245553643144e-05, "loss": 0.9509, "step": 2860 }, { "epoch": 0.04, "grad_norm": 0.458984375, "learning_rate": 8.218588640275388e-05, "loss": 0.8704, "step": 2865 }, { "epoch": 0.04, "grad_norm": 0.416015625, "learning_rate": 8.232931726907632e-05, "loss": 0.9692, "step": 2870 }, { "epoch": 0.04, "grad_norm": 0.5390625, "learning_rate": 8.247274813539873e-05, "loss": 1.0793, "step": 2875 }, { "epoch": 0.04, "grad_norm": 0.46875, "learning_rate": 8.261617900172117e-05, "loss": 0.8818, "step": 2880 }, { "epoch": 0.04, "grad_norm": 0.392578125, "learning_rate": 8.275960986804361e-05, "loss": 1.0164, "step": 2885 }, { "epoch": 0.04, "grad_norm": 0.439453125, "learning_rate": 8.290304073436605e-05, "loss": 0.9334, "step": 2890 }, { "epoch": 0.04, "grad_norm": 0.380859375, "learning_rate": 8.304647160068846e-05, "loss": 1.1717, "step": 2895 }, { "epoch": 0.04, "grad_norm": 0.451171875, "learning_rate": 8.31899024670109e-05, "loss": 1.071, "step": 2900 }, { "epoch": 0.04, "grad_norm": 0.482421875, "learning_rate": 8.333333333333334e-05, "loss": 0.92, "step": 2905 }, { "epoch": 0.04, "grad_norm": 0.458984375, "learning_rate": 8.347676419965577e-05, "loss": 1.0631, "step": 2910 }, { "epoch": 0.04, "grad_norm": 0.412109375, "learning_rate": 8.36201950659782e-05, "loss": 0.9019, "step": 2915 }, { "epoch": 0.04, "grad_norm": 0.42578125, "learning_rate": 8.376362593230064e-05, "loss": 0.8708, "step": 2920 }, { "epoch": 0.04, "grad_norm": 0.42578125, "learning_rate": 8.390705679862306e-05, "loss": 0.9173, "step": 2925 }, { "epoch": 0.04, "grad_norm": 0.49609375, "learning_rate": 8.40504876649455e-05, "loss": 1.0932, "step": 2930 }, { "epoch": 0.04, "grad_norm": 0.46875, "learning_rate": 8.419391853126793e-05, "loss": 0.9486, "step": 2935 }, { "epoch": 0.04, "grad_norm": 0.4140625, "learning_rate": 8.433734939759037e-05, "loss": 1.0511, "step": 2940 }, { "epoch": 0.04, "grad_norm": 0.451171875, "learning_rate": 8.44807802639128e-05, "loss": 0.9129, "step": 2945 }, { "epoch": 0.04, "grad_norm": 0.3828125, "learning_rate": 8.462421113023524e-05, "loss": 1.0939, "step": 2950 }, { "epoch": 0.04, "grad_norm": 0.431640625, "learning_rate": 8.476764199655766e-05, "loss": 0.8591, "step": 2955 }, { "epoch": 0.04, "grad_norm": 0.384765625, "learning_rate": 8.491107286288009e-05, "loss": 1.0119, "step": 2960 }, { "epoch": 0.04, "grad_norm": 0.451171875, "learning_rate": 8.505450372920253e-05, "loss": 1.0331, "step": 2965 }, { "epoch": 0.04, "grad_norm": 0.462890625, "learning_rate": 8.519793459552496e-05, "loss": 1.0119, "step": 2970 }, { "epoch": 0.04, "grad_norm": 0.43359375, "learning_rate": 8.534136546184738e-05, "loss": 0.9088, "step": 2975 }, { "epoch": 0.04, "grad_norm": 0.423828125, "learning_rate": 8.548479632816982e-05, "loss": 0.9238, "step": 2980 }, { "epoch": 0.04, "grad_norm": 0.48828125, "learning_rate": 8.562822719449226e-05, "loss": 0.9416, "step": 2985 }, { "epoch": 0.04, "grad_norm": 0.41796875, "learning_rate": 8.577165806081469e-05, "loss": 1.0492, "step": 2990 }, { "epoch": 0.04, "grad_norm": 0.427734375, "learning_rate": 8.591508892713712e-05, "loss": 1.008, "step": 2995 }, { "epoch": 0.04, "grad_norm": 0.44921875, "learning_rate": 8.605851979345956e-05, "loss": 1.0736, "step": 3000 }, { "epoch": 0.04, "grad_norm": 0.423828125, "learning_rate": 8.6201950659782e-05, "loss": 0.8645, "step": 3005 }, { "epoch": 0.04, "grad_norm": 0.404296875, "learning_rate": 8.634538152610442e-05, "loss": 1.0537, "step": 3010 }, { "epoch": 0.04, "grad_norm": 0.41796875, "learning_rate": 8.648881239242685e-05, "loss": 1.0242, "step": 3015 }, { "epoch": 0.04, "grad_norm": 0.42578125, "learning_rate": 8.663224325874929e-05, "loss": 1.052, "step": 3020 }, { "epoch": 0.04, "grad_norm": 0.484375, "learning_rate": 8.677567412507172e-05, "loss": 1.1053, "step": 3025 }, { "epoch": 0.04, "grad_norm": 0.4609375, "learning_rate": 8.691910499139414e-05, "loss": 1.0429, "step": 3030 }, { "epoch": 0.04, "grad_norm": 0.439453125, "learning_rate": 8.706253585771659e-05, "loss": 0.9297, "step": 3035 }, { "epoch": 0.04, "grad_norm": 0.42578125, "learning_rate": 8.720596672403901e-05, "loss": 0.9675, "step": 3040 }, { "epoch": 0.04, "grad_norm": 0.44921875, "learning_rate": 8.734939759036145e-05, "loss": 1.048, "step": 3045 }, { "epoch": 0.04, "grad_norm": 0.453125, "learning_rate": 8.749282845668388e-05, "loss": 0.8463, "step": 3050 }, { "epoch": 0.04, "grad_norm": 0.369140625, "learning_rate": 8.763625932300632e-05, "loss": 0.8302, "step": 3055 }, { "epoch": 0.04, "grad_norm": 0.484375, "learning_rate": 8.777969018932875e-05, "loss": 0.9625, "step": 3060 }, { "epoch": 0.04, "grad_norm": 0.431640625, "learning_rate": 8.792312105565119e-05, "loss": 1.0129, "step": 3065 }, { "epoch": 0.04, "grad_norm": 0.42578125, "learning_rate": 8.806655192197361e-05, "loss": 0.8039, "step": 3070 }, { "epoch": 0.04, "grad_norm": 0.41015625, "learning_rate": 8.820998278829604e-05, "loss": 0.9156, "step": 3075 }, { "epoch": 0.04, "grad_norm": 0.421875, "learning_rate": 8.835341365461848e-05, "loss": 1.033, "step": 3080 }, { "epoch": 0.04, "grad_norm": 0.44140625, "learning_rate": 8.849684452094092e-05, "loss": 1.03, "step": 3085 }, { "epoch": 0.04, "grad_norm": 0.5390625, "learning_rate": 8.864027538726333e-05, "loss": 1.0026, "step": 3090 }, { "epoch": 0.04, "grad_norm": 0.458984375, "learning_rate": 8.878370625358577e-05, "loss": 1.0639, "step": 3095 }, { "epoch": 0.04, "grad_norm": 0.4609375, "learning_rate": 8.892713711990821e-05, "loss": 0.9634, "step": 3100 }, { "epoch": 0.04, "grad_norm": 0.490234375, "learning_rate": 8.907056798623065e-05, "loss": 0.9578, "step": 3105 }, { "epoch": 0.04, "grad_norm": 0.48828125, "learning_rate": 8.921399885255307e-05, "loss": 0.9326, "step": 3110 }, { "epoch": 0.04, "grad_norm": 0.421875, "learning_rate": 8.93574297188755e-05, "loss": 0.9253, "step": 3115 }, { "epoch": 0.04, "grad_norm": 0.3984375, "learning_rate": 8.950086058519795e-05, "loss": 0.918, "step": 3120 }, { "epoch": 0.04, "grad_norm": 0.546875, "learning_rate": 8.964429145152037e-05, "loss": 1.0224, "step": 3125 }, { "epoch": 0.04, "grad_norm": 0.388671875, "learning_rate": 8.97877223178428e-05, "loss": 0.898, "step": 3130 }, { "epoch": 0.04, "grad_norm": 0.404296875, "learning_rate": 8.993115318416524e-05, "loss": 1.1698, "step": 3135 }, { "epoch": 0.05, "grad_norm": 0.404296875, "learning_rate": 9.007458405048767e-05, "loss": 0.9055, "step": 3140 }, { "epoch": 0.05, "grad_norm": 0.4609375, "learning_rate": 9.02180149168101e-05, "loss": 1.0444, "step": 3145 }, { "epoch": 0.05, "grad_norm": 0.439453125, "learning_rate": 9.036144578313253e-05, "loss": 1.0148, "step": 3150 }, { "epoch": 0.05, "grad_norm": 0.423828125, "learning_rate": 9.050487664945496e-05, "loss": 0.8328, "step": 3155 }, { "epoch": 0.05, "grad_norm": 0.39453125, "learning_rate": 9.06483075157774e-05, "loss": 0.9466, "step": 3160 }, { "epoch": 0.05, "grad_norm": 0.40625, "learning_rate": 9.079173838209984e-05, "loss": 0.9638, "step": 3165 }, { "epoch": 0.05, "grad_norm": 0.421875, "learning_rate": 9.093516924842227e-05, "loss": 0.9799, "step": 3170 }, { "epoch": 0.05, "grad_norm": 0.423828125, "learning_rate": 9.107860011474469e-05, "loss": 1.0191, "step": 3175 }, { "epoch": 0.05, "grad_norm": 0.45703125, "learning_rate": 9.122203098106713e-05, "loss": 1.0255, "step": 3180 }, { "epoch": 0.05, "grad_norm": 0.404296875, "learning_rate": 9.136546184738956e-05, "loss": 0.9925, "step": 3185 }, { "epoch": 0.05, "grad_norm": 0.41796875, "learning_rate": 9.150889271371199e-05, "loss": 0.8852, "step": 3190 }, { "epoch": 0.05, "grad_norm": 0.45703125, "learning_rate": 9.165232358003443e-05, "loss": 1.1207, "step": 3195 }, { "epoch": 0.05, "grad_norm": 0.41015625, "learning_rate": 9.179575444635687e-05, "loss": 1.052, "step": 3200 }, { "epoch": 0.05, "grad_norm": 0.390625, "learning_rate": 9.193918531267929e-05, "loss": 0.918, "step": 3205 }, { "epoch": 0.05, "grad_norm": 0.4375, "learning_rate": 9.208261617900172e-05, "loss": 0.9535, "step": 3210 }, { "epoch": 0.05, "grad_norm": 0.447265625, "learning_rate": 9.222604704532416e-05, "loss": 1.1085, "step": 3215 }, { "epoch": 0.05, "grad_norm": 0.388671875, "learning_rate": 9.23694779116466e-05, "loss": 1.0227, "step": 3220 }, { "epoch": 0.05, "grad_norm": 0.41796875, "learning_rate": 9.251290877796903e-05, "loss": 0.9191, "step": 3225 }, { "epoch": 0.05, "grad_norm": 0.490234375, "learning_rate": 9.265633964429145e-05, "loss": 1.0532, "step": 3230 }, { "epoch": 0.05, "grad_norm": 0.416015625, "learning_rate": 9.27997705106139e-05, "loss": 0.9121, "step": 3235 }, { "epoch": 0.05, "grad_norm": 0.478515625, "learning_rate": 9.294320137693632e-05, "loss": 1.0063, "step": 3240 }, { "epoch": 0.05, "grad_norm": 0.408203125, "learning_rate": 9.308663224325875e-05, "loss": 1.1222, "step": 3245 }, { "epoch": 0.05, "grad_norm": 0.390625, "learning_rate": 9.323006310958119e-05, "loss": 0.8751, "step": 3250 }, { "epoch": 0.05, "grad_norm": 0.3984375, "learning_rate": 9.337349397590361e-05, "loss": 0.9919, "step": 3255 }, { "epoch": 0.05, "grad_norm": 0.462890625, "learning_rate": 9.351692484222605e-05, "loss": 0.965, "step": 3260 }, { "epoch": 0.05, "grad_norm": 0.421875, "learning_rate": 9.366035570854848e-05, "loss": 0.9367, "step": 3265 }, { "epoch": 0.05, "grad_norm": 0.439453125, "learning_rate": 9.380378657487092e-05, "loss": 0.9339, "step": 3270 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 9.394721744119335e-05, "loss": 1.1373, "step": 3275 }, { "epoch": 0.05, "grad_norm": 0.431640625, "learning_rate": 9.409064830751579e-05, "loss": 0.9003, "step": 3280 }, { "epoch": 0.05, "grad_norm": 0.48046875, "learning_rate": 9.423407917383821e-05, "loss": 1.0328, "step": 3285 }, { "epoch": 0.05, "grad_norm": 0.455078125, "learning_rate": 9.437751004016064e-05, "loss": 0.9807, "step": 3290 }, { "epoch": 0.05, "grad_norm": 0.45703125, "learning_rate": 9.452094090648308e-05, "loss": 1.1077, "step": 3295 }, { "epoch": 0.05, "grad_norm": 0.490234375, "learning_rate": 9.466437177280552e-05, "loss": 1.0933, "step": 3300 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 9.480780263912795e-05, "loss": 0.9658, "step": 3305 }, { "epoch": 0.05, "grad_norm": 0.4296875, "learning_rate": 9.495123350545037e-05, "loss": 0.9943, "step": 3310 }, { "epoch": 0.05, "grad_norm": 0.408203125, "learning_rate": 9.509466437177281e-05, "loss": 1.0197, "step": 3315 }, { "epoch": 0.05, "grad_norm": 0.423828125, "learning_rate": 9.523809523809524e-05, "loss": 1.0082, "step": 3320 }, { "epoch": 0.05, "grad_norm": 0.412109375, "learning_rate": 9.538152610441767e-05, "loss": 0.9112, "step": 3325 }, { "epoch": 0.05, "grad_norm": 0.46484375, "learning_rate": 9.552495697074011e-05, "loss": 0.9961, "step": 3330 }, { "epoch": 0.05, "grad_norm": 0.416015625, "learning_rate": 9.566838783706255e-05, "loss": 1.0198, "step": 3335 }, { "epoch": 0.05, "grad_norm": 0.474609375, "learning_rate": 9.581181870338497e-05, "loss": 0.9685, "step": 3340 }, { "epoch": 0.05, "grad_norm": 0.474609375, "learning_rate": 9.59552495697074e-05, "loss": 1.0192, "step": 3345 }, { "epoch": 0.05, "grad_norm": 0.408203125, "learning_rate": 9.609868043602984e-05, "loss": 1.1994, "step": 3350 }, { "epoch": 0.05, "grad_norm": 0.4375, "learning_rate": 9.624211130235227e-05, "loss": 0.954, "step": 3355 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 9.638554216867471e-05, "loss": 0.8963, "step": 3360 }, { "epoch": 0.05, "grad_norm": 0.42578125, "learning_rate": 9.652897303499713e-05, "loss": 0.9157, "step": 3365 }, { "epoch": 0.05, "grad_norm": 0.4296875, "learning_rate": 9.667240390131956e-05, "loss": 0.8979, "step": 3370 }, { "epoch": 0.05, "grad_norm": 0.42578125, "learning_rate": 9.6815834767642e-05, "loss": 1.1164, "step": 3375 }, { "epoch": 0.05, "grad_norm": 0.447265625, "learning_rate": 9.695926563396444e-05, "loss": 1.0539, "step": 3380 }, { "epoch": 0.05, "grad_norm": 0.427734375, "learning_rate": 9.710269650028687e-05, "loss": 0.9551, "step": 3385 }, { "epoch": 0.05, "grad_norm": 0.4140625, "learning_rate": 9.72461273666093e-05, "loss": 1.0195, "step": 3390 }, { "epoch": 0.05, "grad_norm": 0.421875, "learning_rate": 9.738955823293173e-05, "loss": 0.9593, "step": 3395 }, { "epoch": 0.05, "grad_norm": 0.431640625, "learning_rate": 9.753298909925417e-05, "loss": 0.8717, "step": 3400 }, { "epoch": 0.05, "grad_norm": 0.4296875, "learning_rate": 9.767641996557659e-05, "loss": 0.8758, "step": 3405 }, { "epoch": 0.05, "grad_norm": 0.427734375, "learning_rate": 9.781985083189903e-05, "loss": 0.9862, "step": 3410 }, { "epoch": 0.05, "grad_norm": 0.388671875, "learning_rate": 9.796328169822147e-05, "loss": 0.957, "step": 3415 }, { "epoch": 0.05, "grad_norm": 0.42578125, "learning_rate": 9.81067125645439e-05, "loss": 0.894, "step": 3420 }, { "epoch": 0.05, "grad_norm": 0.412109375, "learning_rate": 9.825014343086632e-05, "loss": 0.9824, "step": 3425 }, { "epoch": 0.05, "grad_norm": 0.388671875, "learning_rate": 9.839357429718876e-05, "loss": 1.0242, "step": 3430 }, { "epoch": 0.05, "grad_norm": 0.4375, "learning_rate": 9.853700516351119e-05, "loss": 0.8554, "step": 3435 }, { "epoch": 0.05, "grad_norm": 0.466796875, "learning_rate": 9.868043602983363e-05, "loss": 1.0288, "step": 3440 }, { "epoch": 0.05, "grad_norm": 0.5, "learning_rate": 9.882386689615606e-05, "loss": 1.0798, "step": 3445 }, { "epoch": 0.05, "grad_norm": 0.412109375, "learning_rate": 9.89672977624785e-05, "loss": 0.8537, "step": 3450 }, { "epoch": 0.05, "grad_norm": 0.443359375, "learning_rate": 9.911072862880092e-05, "loss": 0.9823, "step": 3455 }, { "epoch": 0.05, "grad_norm": 0.38671875, "learning_rate": 9.925415949512336e-05, "loss": 0.9169, "step": 3460 }, { "epoch": 0.05, "grad_norm": 0.4296875, "learning_rate": 9.939759036144579e-05, "loss": 0.8519, "step": 3465 }, { "epoch": 0.05, "grad_norm": 0.396484375, "learning_rate": 9.954102122776822e-05, "loss": 0.9385, "step": 3470 }, { "epoch": 0.05, "grad_norm": 0.376953125, "learning_rate": 9.968445209409066e-05, "loss": 0.936, "step": 3475 }, { "epoch": 0.05, "grad_norm": 0.380859375, "learning_rate": 9.982788296041308e-05, "loss": 0.982, "step": 3480 }, { "epoch": 0.05, "grad_norm": 0.453125, "learning_rate": 9.997131382673551e-05, "loss": 0.8798, "step": 3485 }, { "epoch": 0.05, "grad_norm": 0.421875, "learning_rate": 0.00010011474469305795, "loss": 1.1016, "step": 3490 }, { "epoch": 0.05, "grad_norm": 0.439453125, "learning_rate": 0.00010025817555938038, "loss": 1.2804, "step": 3495 }, { "epoch": 0.05, "grad_norm": 0.41796875, "learning_rate": 0.00010040160642570282, "loss": 1.0197, "step": 3500 }, { "epoch": 0.05, "grad_norm": 0.419921875, "learning_rate": 0.00010054503729202524, "loss": 1.0637, "step": 3505 }, { "epoch": 0.05, "grad_norm": 0.41015625, "learning_rate": 0.00010068846815834767, "loss": 0.9281, "step": 3510 }, { "epoch": 0.05, "grad_norm": 0.412109375, "learning_rate": 0.00010083189902467012, "loss": 0.9208, "step": 3515 }, { "epoch": 0.05, "grad_norm": 0.50390625, "learning_rate": 0.00010097532989099255, "loss": 0.9596, "step": 3520 }, { "epoch": 0.05, "grad_norm": 0.47265625, "learning_rate": 0.00010111876075731499, "loss": 0.917, "step": 3525 }, { "epoch": 0.05, "grad_norm": 0.37109375, "learning_rate": 0.00010126219162363742, "loss": 0.9712, "step": 3530 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 0.00010140562248995984, "loss": 1.081, "step": 3535 }, { "epoch": 0.05, "grad_norm": 0.400390625, "learning_rate": 0.00010154905335628228, "loss": 1.0385, "step": 3540 }, { "epoch": 0.05, "grad_norm": 0.41796875, "learning_rate": 0.00010169248422260471, "loss": 0.9869, "step": 3545 }, { "epoch": 0.05, "grad_norm": 0.4140625, "learning_rate": 0.00010183591508892714, "loss": 0.8981, "step": 3550 }, { "epoch": 0.05, "grad_norm": 0.47265625, "learning_rate": 0.00010197934595524959, "loss": 0.8882, "step": 3555 }, { "epoch": 0.05, "grad_norm": 0.400390625, "learning_rate": 0.000102122776821572, "loss": 0.8624, "step": 3560 }, { "epoch": 0.05, "grad_norm": 0.388671875, "learning_rate": 0.00010226620768789443, "loss": 0.9483, "step": 3565 }, { "epoch": 0.05, "grad_norm": 0.51171875, "learning_rate": 0.00010240963855421688, "loss": 0.9354, "step": 3570 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 0.00010255306942053931, "loss": 0.9673, "step": 3575 }, { "epoch": 0.05, "grad_norm": 0.421875, "learning_rate": 0.00010269650028686174, "loss": 0.9585, "step": 3580 }, { "epoch": 0.05, "grad_norm": 0.423828125, "learning_rate": 0.00010283993115318418, "loss": 1.0179, "step": 3585 }, { "epoch": 0.05, "grad_norm": 0.396484375, "learning_rate": 0.0001029833620195066, "loss": 0.9306, "step": 3590 }, { "epoch": 0.05, "grad_norm": 0.546875, "learning_rate": 0.00010312679288582903, "loss": 1.0562, "step": 3595 }, { "epoch": 0.05, "grad_norm": 0.466796875, "learning_rate": 0.00010327022375215147, "loss": 0.8476, "step": 3600 }, { "epoch": 0.05, "grad_norm": 0.4140625, "learning_rate": 0.0001034136546184739, "loss": 1.0539, "step": 3605 }, { "epoch": 0.05, "grad_norm": 0.392578125, "learning_rate": 0.00010355708548479632, "loss": 0.9696, "step": 3610 }, { "epoch": 0.05, "grad_norm": 0.416015625, "learning_rate": 0.00010370051635111878, "loss": 1.0788, "step": 3615 }, { "epoch": 0.05, "grad_norm": 0.4375, "learning_rate": 0.00010384394721744119, "loss": 1.1724, "step": 3620 }, { "epoch": 0.05, "grad_norm": 0.4140625, "learning_rate": 0.00010398737808376362, "loss": 1.0326, "step": 3625 }, { "epoch": 0.05, "grad_norm": 0.416015625, "learning_rate": 0.00010413080895008607, "loss": 0.9767, "step": 3630 }, { "epoch": 0.05, "grad_norm": 0.49609375, "learning_rate": 0.0001042742398164085, "loss": 0.9976, "step": 3635 }, { "epoch": 0.05, "grad_norm": 0.376953125, "learning_rate": 0.00010441767068273094, "loss": 0.898, "step": 3640 }, { "epoch": 0.05, "grad_norm": 0.40625, "learning_rate": 0.00010456110154905336, "loss": 0.8513, "step": 3645 }, { "epoch": 0.05, "grad_norm": 0.439453125, "learning_rate": 0.00010470453241537579, "loss": 1.0365, "step": 3650 }, { "epoch": 0.05, "grad_norm": 0.46875, "learning_rate": 0.00010484796328169823, "loss": 1.0731, "step": 3655 }, { "epoch": 0.05, "grad_norm": 0.478515625, "learning_rate": 0.00010499139414802066, "loss": 0.9298, "step": 3660 }, { "epoch": 0.05, "grad_norm": 0.65625, "learning_rate": 0.00010513482501434308, "loss": 1.0778, "step": 3665 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 0.00010527825588066554, "loss": 0.9948, "step": 3670 }, { "epoch": 0.05, "grad_norm": 0.53515625, "learning_rate": 0.00010542168674698796, "loss": 0.9008, "step": 3675 }, { "epoch": 0.05, "grad_norm": 0.408203125, "learning_rate": 0.00010556511761331038, "loss": 0.9447, "step": 3680 }, { "epoch": 0.05, "grad_norm": 0.51171875, "learning_rate": 0.00010570854847963283, "loss": 0.816, "step": 3685 }, { "epoch": 0.05, "grad_norm": 0.4765625, "learning_rate": 0.00010585197934595526, "loss": 0.9181, "step": 3690 }, { "epoch": 0.05, "grad_norm": 0.453125, "learning_rate": 0.00010599541021227768, "loss": 0.9687, "step": 3695 }, { "epoch": 0.05, "grad_norm": 0.419921875, "learning_rate": 0.00010613884107860012, "loss": 0.8236, "step": 3700 }, { "epoch": 0.05, "grad_norm": 0.447265625, "learning_rate": 0.00010628227194492255, "loss": 0.862, "step": 3705 }, { "epoch": 0.05, "grad_norm": 0.443359375, "learning_rate": 0.00010642570281124498, "loss": 1.0777, "step": 3710 }, { "epoch": 0.05, "grad_norm": 0.44140625, "learning_rate": 0.00010656913367756742, "loss": 0.8843, "step": 3715 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 0.00010671256454388984, "loss": 0.8764, "step": 3720 }, { "epoch": 0.05, "grad_norm": 0.443359375, "learning_rate": 0.00010685599541021227, "loss": 0.9723, "step": 3725 }, { "epoch": 0.05, "grad_norm": 0.431640625, "learning_rate": 0.00010699942627653472, "loss": 0.9971, "step": 3730 }, { "epoch": 0.05, "grad_norm": 0.435546875, "learning_rate": 0.00010714285714285715, "loss": 1.03, "step": 3735 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 0.00010728628800917956, "loss": 1.0012, "step": 3740 }, { "epoch": 0.05, "grad_norm": 0.435546875, "learning_rate": 0.00010742971887550202, "loss": 0.9739, "step": 3745 }, { "epoch": 0.05, "grad_norm": 0.41796875, "learning_rate": 0.00010757314974182444, "loss": 1.0166, "step": 3750 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 0.00010771658060814688, "loss": 1.0096, "step": 3755 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 0.00010786001147446931, "loss": 0.9461, "step": 3760 }, { "epoch": 0.05, "grad_norm": 0.478515625, "learning_rate": 0.00010800344234079174, "loss": 0.802, "step": 3765 }, { "epoch": 0.05, "grad_norm": 0.515625, "learning_rate": 0.00010814687320711419, "loss": 1.0664, "step": 3770 }, { "epoch": 0.05, "grad_norm": 0.408203125, "learning_rate": 0.0001082903040734366, "loss": 0.9477, "step": 3775 }, { "epoch": 0.05, "grad_norm": 0.439453125, "learning_rate": 0.00010843373493975903, "loss": 1.0207, "step": 3780 }, { "epoch": 0.05, "grad_norm": 0.43359375, "learning_rate": 0.00010857716580608148, "loss": 0.979, "step": 3785 }, { "epoch": 0.05, "grad_norm": 0.392578125, "learning_rate": 0.00010872059667240391, "loss": 0.896, "step": 3790 }, { "epoch": 0.05, "grad_norm": 0.423828125, "learning_rate": 0.00010886402753872634, "loss": 1.0602, "step": 3795 }, { "epoch": 0.05, "grad_norm": 0.439453125, "learning_rate": 0.00010900745840504878, "loss": 0.8848, "step": 3800 }, { "epoch": 0.05, "grad_norm": 0.43359375, "learning_rate": 0.0001091508892713712, "loss": 0.9814, "step": 3805 }, { "epoch": 0.05, "grad_norm": 0.4140625, "learning_rate": 0.00010929432013769363, "loss": 0.8746, "step": 3810 }, { "epoch": 0.05, "grad_norm": 0.482421875, "learning_rate": 0.00010943775100401607, "loss": 0.9627, "step": 3815 }, { "epoch": 0.05, "grad_norm": 0.412109375, "learning_rate": 0.0001095811818703385, "loss": 0.943, "step": 3820 }, { "epoch": 0.05, "grad_norm": 0.423828125, "learning_rate": 0.00010972461273666092, "loss": 0.9082, "step": 3825 }, { "epoch": 0.05, "grad_norm": 0.41796875, "learning_rate": 0.00010986804360298338, "loss": 1.1245, "step": 3830 }, { "epoch": 0.06, "grad_norm": 0.4453125, "learning_rate": 0.00011001147446930579, "loss": 1.0683, "step": 3835 }, { "epoch": 0.06, "grad_norm": 0.427734375, "learning_rate": 0.00011015490533562822, "loss": 0.9948, "step": 3840 }, { "epoch": 0.06, "grad_norm": 0.421875, "learning_rate": 0.00011029833620195067, "loss": 0.933, "step": 3845 }, { "epoch": 0.06, "grad_norm": 0.337890625, "learning_rate": 0.0001104417670682731, "loss": 1.0216, "step": 3850 }, { "epoch": 0.06, "grad_norm": 0.41015625, "learning_rate": 0.00011058519793459554, "loss": 0.999, "step": 3855 }, { "epoch": 0.06, "grad_norm": 0.443359375, "learning_rate": 0.00011072862880091797, "loss": 1.0413, "step": 3860 }, { "epoch": 0.06, "grad_norm": 0.4765625, "learning_rate": 0.00011087205966724039, "loss": 1.0061, "step": 3865 }, { "epoch": 0.06, "grad_norm": 0.408203125, "learning_rate": 0.00011101549053356283, "loss": 0.8976, "step": 3870 }, { "epoch": 0.06, "grad_norm": 0.423828125, "learning_rate": 0.00011115892139988526, "loss": 0.9141, "step": 3875 }, { "epoch": 0.06, "grad_norm": 0.447265625, "learning_rate": 0.00011130235226620769, "loss": 1.0766, "step": 3880 }, { "epoch": 0.06, "grad_norm": 0.375, "learning_rate": 0.00011144578313253014, "loss": 0.9397, "step": 3885 }, { "epoch": 0.06, "grad_norm": 0.55078125, "learning_rate": 0.00011158921399885257, "loss": 1.0154, "step": 3890 }, { "epoch": 0.06, "grad_norm": 0.408203125, "learning_rate": 0.00011173264486517498, "loss": 0.9654, "step": 3895 }, { "epoch": 0.06, "grad_norm": 0.4453125, "learning_rate": 0.00011187607573149743, "loss": 1.0002, "step": 3900 }, { "epoch": 0.06, "grad_norm": 0.3828125, "learning_rate": 0.00011201950659781986, "loss": 0.9276, "step": 3905 }, { "epoch": 0.06, "grad_norm": 0.41015625, "learning_rate": 0.00011216293746414229, "loss": 0.9197, "step": 3910 }, { "epoch": 0.06, "grad_norm": 0.41015625, "learning_rate": 0.00011230636833046473, "loss": 1.0522, "step": 3915 }, { "epoch": 0.06, "grad_norm": 0.451171875, "learning_rate": 0.00011244979919678715, "loss": 0.9447, "step": 3920 }, { "epoch": 0.06, "grad_norm": 0.4765625, "learning_rate": 0.00011259323006310958, "loss": 0.9358, "step": 3925 }, { "epoch": 0.06, "grad_norm": 0.46484375, "learning_rate": 0.00011273666092943202, "loss": 1.0351, "step": 3930 }, { "epoch": 0.06, "grad_norm": 0.37109375, "learning_rate": 0.00011288009179575445, "loss": 0.9158, "step": 3935 }, { "epoch": 0.06, "grad_norm": 0.59375, "learning_rate": 0.00011302352266207687, "loss": 0.9975, "step": 3940 }, { "epoch": 0.06, "grad_norm": 0.50390625, "learning_rate": 0.00011316695352839933, "loss": 0.9493, "step": 3945 }, { "epoch": 0.06, "grad_norm": 0.42578125, "learning_rate": 0.00011331038439472175, "loss": 1.0005, "step": 3950 }, { "epoch": 0.06, "grad_norm": 0.4609375, "learning_rate": 0.00011345381526104417, "loss": 1.1862, "step": 3955 }, { "epoch": 0.06, "grad_norm": 0.408203125, "learning_rate": 0.00011359724612736662, "loss": 1.122, "step": 3960 }, { "epoch": 0.06, "grad_norm": 0.447265625, "learning_rate": 0.00011374067699368905, "loss": 1.0702, "step": 3965 }, { "epoch": 0.06, "grad_norm": 0.392578125, "learning_rate": 0.00011388410786001149, "loss": 0.8827, "step": 3970 }, { "epoch": 0.06, "grad_norm": 0.408203125, "learning_rate": 0.00011402753872633391, "loss": 0.8097, "step": 3975 }, { "epoch": 0.06, "grad_norm": 0.412109375, "learning_rate": 0.00011417096959265634, "loss": 0.935, "step": 3980 }, { "epoch": 0.06, "grad_norm": 0.46875, "learning_rate": 0.00011431440045897879, "loss": 0.9727, "step": 3985 }, { "epoch": 0.06, "grad_norm": 0.46484375, "learning_rate": 0.0001144578313253012, "loss": 0.9867, "step": 3990 }, { "epoch": 0.06, "grad_norm": 0.43359375, "learning_rate": 0.00011460126219162363, "loss": 1.0205, "step": 3995 }, { "epoch": 0.06, "grad_norm": 0.4375, "learning_rate": 0.00011474469305794609, "loss": 0.9801, "step": 4000 }, { "epoch": 0.06, "grad_norm": 0.427734375, "learning_rate": 0.00011488812392426851, "loss": 0.9266, "step": 4005 }, { "epoch": 0.06, "grad_norm": 0.50390625, "learning_rate": 0.00011503155479059094, "loss": 0.9448, "step": 4010 }, { "epoch": 0.06, "grad_norm": 0.455078125, "learning_rate": 0.00011517498565691338, "loss": 0.9925, "step": 4015 }, { "epoch": 0.06, "grad_norm": 0.396484375, "learning_rate": 0.0001153184165232358, "loss": 1.0278, "step": 4020 }, { "epoch": 0.06, "grad_norm": 0.4296875, "learning_rate": 0.00011546184738955823, "loss": 0.942, "step": 4025 }, { "epoch": 0.06, "grad_norm": 0.412109375, "learning_rate": 0.00011560527825588067, "loss": 0.9767, "step": 4030 }, { "epoch": 0.06, "grad_norm": 0.416015625, "learning_rate": 0.0001157487091222031, "loss": 0.9365, "step": 4035 }, { "epoch": 0.06, "grad_norm": 0.423828125, "learning_rate": 0.00011589213998852553, "loss": 0.9618, "step": 4040 }, { "epoch": 0.06, "grad_norm": 0.474609375, "learning_rate": 0.00011603557085484798, "loss": 0.8656, "step": 4045 }, { "epoch": 0.06, "grad_norm": 0.44140625, "learning_rate": 0.0001161790017211704, "loss": 0.8396, "step": 4050 }, { "epoch": 0.06, "grad_norm": 0.416015625, "learning_rate": 0.00011632243258749282, "loss": 0.9391, "step": 4055 }, { "epoch": 0.06, "grad_norm": 0.4609375, "learning_rate": 0.00011646586345381527, "loss": 0.9302, "step": 4060 }, { "epoch": 0.06, "grad_norm": 0.455078125, "learning_rate": 0.0001166092943201377, "loss": 1.0018, "step": 4065 }, { "epoch": 0.06, "grad_norm": 0.37109375, "learning_rate": 0.00011675272518646013, "loss": 0.9767, "step": 4070 }, { "epoch": 0.06, "grad_norm": 0.45703125, "learning_rate": 0.00011689615605278257, "loss": 0.9765, "step": 4075 }, { "epoch": 0.06, "grad_norm": 0.42578125, "learning_rate": 0.000117039586919105, "loss": 1.0398, "step": 4080 }, { "epoch": 0.06, "grad_norm": 0.41015625, "learning_rate": 0.00011718301778542743, "loss": 0.8486, "step": 4085 }, { "epoch": 0.06, "grad_norm": 0.439453125, "learning_rate": 0.00011732644865174986, "loss": 0.8443, "step": 4090 }, { "epoch": 0.06, "grad_norm": 0.443359375, "learning_rate": 0.00011746987951807229, "loss": 0.9544, "step": 4095 }, { "epoch": 0.06, "grad_norm": 0.4609375, "learning_rate": 0.00011761331038439474, "loss": 1.1874, "step": 4100 }, { "epoch": 0.06, "grad_norm": 0.41796875, "learning_rate": 0.00011775674125071717, "loss": 1.1332, "step": 4105 }, { "epoch": 0.06, "grad_norm": 0.431640625, "learning_rate": 0.00011790017211703958, "loss": 0.9719, "step": 4110 }, { "epoch": 0.06, "grad_norm": 0.44140625, "learning_rate": 0.00011804360298336203, "loss": 0.909, "step": 4115 }, { "epoch": 0.06, "grad_norm": 0.40234375, "learning_rate": 0.00011818703384968446, "loss": 1.0488, "step": 4120 }, { "epoch": 0.06, "grad_norm": 0.462890625, "learning_rate": 0.00011833046471600689, "loss": 1.1675, "step": 4125 }, { "epoch": 0.06, "grad_norm": 0.396484375, "learning_rate": 0.00011847389558232933, "loss": 0.9763, "step": 4130 }, { "epoch": 0.06, "grad_norm": 0.451171875, "learning_rate": 0.00011861732644865175, "loss": 0.9416, "step": 4135 }, { "epoch": 0.06, "grad_norm": 0.47265625, "learning_rate": 0.00011876075731497418, "loss": 0.9931, "step": 4140 }, { "epoch": 0.06, "grad_norm": 0.439453125, "learning_rate": 0.00011890418818129662, "loss": 0.8678, "step": 4145 }, { "epoch": 0.06, "grad_norm": 0.482421875, "learning_rate": 0.00011904761904761905, "loss": 1.0077, "step": 4150 }, { "epoch": 0.06, "grad_norm": 0.4453125, "learning_rate": 0.00011919104991394147, "loss": 0.8943, "step": 4155 }, { "epoch": 0.06, "grad_norm": 0.44921875, "learning_rate": 0.00011933448078026393, "loss": 1.0197, "step": 4160 }, { "epoch": 0.06, "grad_norm": 0.43359375, "learning_rate": 0.00011947791164658635, "loss": 0.9257, "step": 4165 }, { "epoch": 0.06, "grad_norm": 0.42578125, "learning_rate": 0.00011962134251290877, "loss": 0.9073, "step": 4170 }, { "epoch": 0.06, "grad_norm": 0.412109375, "learning_rate": 0.00011976477337923122, "loss": 0.9657, "step": 4175 }, { "epoch": 0.06, "grad_norm": 0.427734375, "learning_rate": 0.00011990820424555365, "loss": 0.9148, "step": 4180 }, { "epoch": 0.06, "grad_norm": 0.4609375, "learning_rate": 0.00012005163511187607, "loss": 0.9801, "step": 4185 }, { "epoch": 0.06, "grad_norm": 0.578125, "learning_rate": 0.00012019506597819851, "loss": 1.0169, "step": 4190 }, { "epoch": 0.06, "grad_norm": 0.453125, "learning_rate": 0.00012033849684452094, "loss": 0.9711, "step": 4195 }, { "epoch": 0.06, "grad_norm": 0.431640625, "learning_rate": 0.0001204819277108434, "loss": 1.0298, "step": 4200 }, { "epoch": 0.06, "grad_norm": 0.427734375, "learning_rate": 0.00012062535857716581, "loss": 0.9817, "step": 4205 }, { "epoch": 0.06, "grad_norm": 0.361328125, "learning_rate": 0.00012076878944348823, "loss": 0.8817, "step": 4210 }, { "epoch": 0.06, "grad_norm": 0.427734375, "learning_rate": 0.00012091222030981069, "loss": 0.8855, "step": 4215 }, { "epoch": 0.06, "grad_norm": 0.416015625, "learning_rate": 0.00012105565117613311, "loss": 1.0517, "step": 4220 }, { "epoch": 0.06, "grad_norm": 0.37890625, "learning_rate": 0.00012119908204245554, "loss": 0.9723, "step": 4225 }, { "epoch": 0.06, "grad_norm": 0.4140625, "learning_rate": 0.00012134251290877798, "loss": 0.8449, "step": 4230 }, { "epoch": 0.06, "grad_norm": 0.384765625, "learning_rate": 0.00012148594377510041, "loss": 0.9528, "step": 4235 }, { "epoch": 0.06, "grad_norm": 0.451171875, "learning_rate": 0.00012162937464142283, "loss": 1.0405, "step": 4240 }, { "epoch": 0.06, "grad_norm": 0.419921875, "learning_rate": 0.00012177280550774528, "loss": 0.9225, "step": 4245 }, { "epoch": 0.06, "grad_norm": 0.427734375, "learning_rate": 0.0001219162363740677, "loss": 0.8293, "step": 4250 }, { "epoch": 0.06, "grad_norm": 0.41796875, "learning_rate": 0.00012205966724039013, "loss": 0.8751, "step": 4255 }, { "epoch": 0.06, "grad_norm": 0.494140625, "learning_rate": 0.00012220309810671257, "loss": 1.0213, "step": 4260 }, { "epoch": 0.06, "grad_norm": 0.423828125, "learning_rate": 0.000122346528973035, "loss": 0.9466, "step": 4265 }, { "epoch": 0.06, "grad_norm": 0.404296875, "learning_rate": 0.00012248995983935742, "loss": 0.8652, "step": 4270 }, { "epoch": 0.06, "grad_norm": 0.423828125, "learning_rate": 0.00012263339070567986, "loss": 0.9067, "step": 4275 }, { "epoch": 0.06, "grad_norm": 0.375, "learning_rate": 0.0001227768215720023, "loss": 0.8576, "step": 4280 }, { "epoch": 0.06, "grad_norm": 0.53125, "learning_rate": 0.00012292025243832472, "loss": 1.1626, "step": 4285 }, { "epoch": 0.06, "grad_norm": 0.427734375, "learning_rate": 0.00012306368330464718, "loss": 0.9502, "step": 4290 }, { "epoch": 0.06, "grad_norm": 0.435546875, "learning_rate": 0.0001232071141709696, "loss": 1.006, "step": 4295 }, { "epoch": 0.06, "grad_norm": 0.375, "learning_rate": 0.000123350545037292, "loss": 0.9994, "step": 4300 }, { "epoch": 0.06, "grad_norm": 0.43359375, "learning_rate": 0.00012349397590361448, "loss": 1.0563, "step": 4305 }, { "epoch": 0.06, "grad_norm": 0.4375, "learning_rate": 0.0001236374067699369, "loss": 0.9123, "step": 4310 }, { "epoch": 0.06, "grad_norm": 0.43359375, "learning_rate": 0.00012378083763625933, "loss": 0.9732, "step": 4315 }, { "epoch": 0.06, "grad_norm": 0.43359375, "learning_rate": 0.00012392426850258177, "loss": 0.9571, "step": 4320 }, { "epoch": 0.06, "grad_norm": 0.412109375, "learning_rate": 0.00012406769936890418, "loss": 0.9954, "step": 4325 }, { "epoch": 0.06, "grad_norm": 0.4140625, "learning_rate": 0.00012421113023522665, "loss": 1.0072, "step": 4330 }, { "epoch": 0.06, "grad_norm": 0.453125, "learning_rate": 0.00012435456110154906, "loss": 0.9763, "step": 4335 }, { "epoch": 0.06, "grad_norm": 0.42578125, "learning_rate": 0.00012449799196787148, "loss": 1.0008, "step": 4340 }, { "epoch": 0.06, "grad_norm": 0.482421875, "learning_rate": 0.00012464142283419394, "loss": 1.0088, "step": 4345 }, { "epoch": 0.06, "grad_norm": 0.388671875, "learning_rate": 0.00012478485370051636, "loss": 1.1205, "step": 4350 }, { "epoch": 0.06, "grad_norm": 0.421875, "learning_rate": 0.00012492828456683877, "loss": 0.991, "step": 4355 }, { "epoch": 0.06, "grad_norm": 0.48828125, "learning_rate": 0.00012507171543316124, "loss": 0.8901, "step": 4360 }, { "epoch": 0.06, "grad_norm": 0.46484375, "learning_rate": 0.00012521514629948365, "loss": 1.1043, "step": 4365 }, { "epoch": 0.06, "grad_norm": 0.42578125, "learning_rate": 0.0001253585771658061, "loss": 1.0119, "step": 4370 }, { "epoch": 0.06, "grad_norm": 0.4609375, "learning_rate": 0.00012550200803212853, "loss": 1.0429, "step": 4375 }, { "epoch": 0.06, "grad_norm": 1.015625, "learning_rate": 0.00012564543889845094, "loss": 1.0057, "step": 4380 }, { "epoch": 0.06, "grad_norm": 0.43359375, "learning_rate": 0.00012578886976477338, "loss": 0.9982, "step": 4385 }, { "epoch": 0.06, "grad_norm": 0.482421875, "learning_rate": 0.00012593230063109582, "loss": 0.9193, "step": 4390 }, { "epoch": 0.06, "grad_norm": 0.421875, "learning_rate": 0.00012607573149741824, "loss": 0.9164, "step": 4395 }, { "epoch": 0.06, "grad_norm": 0.44921875, "learning_rate": 0.00012621916236374068, "loss": 1.0736, "step": 4400 }, { "epoch": 0.06, "grad_norm": 0.482421875, "learning_rate": 0.00012636259323006312, "loss": 1.0426, "step": 4405 }, { "epoch": 0.06, "grad_norm": 0.419921875, "learning_rate": 0.00012650602409638556, "loss": 0.9148, "step": 4410 }, { "epoch": 0.06, "grad_norm": 0.453125, "learning_rate": 0.000126649454962708, "loss": 1.1345, "step": 4415 }, { "epoch": 0.06, "grad_norm": 0.4453125, "learning_rate": 0.0001267928858290304, "loss": 0.9112, "step": 4420 }, { "epoch": 0.06, "grad_norm": 0.482421875, "learning_rate": 0.00012693631669535285, "loss": 0.9311, "step": 4425 }, { "epoch": 0.06, "grad_norm": 0.455078125, "learning_rate": 0.0001270797475616753, "loss": 1.0584, "step": 4430 }, { "epoch": 0.06, "grad_norm": 0.421875, "learning_rate": 0.0001272231784279977, "loss": 1.0231, "step": 4435 }, { "epoch": 0.06, "grad_norm": 0.48046875, "learning_rate": 0.00012736660929432014, "loss": 1.1119, "step": 4440 }, { "epoch": 0.06, "grad_norm": 0.431640625, "learning_rate": 0.00012751004016064258, "loss": 0.9418, "step": 4445 }, { "epoch": 0.06, "grad_norm": 0.57421875, "learning_rate": 0.00012765347102696502, "loss": 0.9222, "step": 4450 }, { "epoch": 0.06, "grad_norm": 0.4765625, "learning_rate": 0.00012779690189328744, "loss": 1.0592, "step": 4455 }, { "epoch": 0.06, "grad_norm": 0.45703125, "learning_rate": 0.00012794033275960988, "loss": 1.0891, "step": 4460 }, { "epoch": 0.06, "grad_norm": 0.431640625, "learning_rate": 0.00012808376362593232, "loss": 1.0498, "step": 4465 }, { "epoch": 0.06, "grad_norm": 0.59765625, "learning_rate": 0.00012822719449225473, "loss": 1.0035, "step": 4470 }, { "epoch": 0.06, "grad_norm": 0.439453125, "learning_rate": 0.00012837062535857717, "loss": 0.8811, "step": 4475 }, { "epoch": 0.06, "grad_norm": 0.44921875, "learning_rate": 0.0001285140562248996, "loss": 1.0189, "step": 4480 }, { "epoch": 0.06, "grad_norm": 0.439453125, "learning_rate": 0.00012865748709122202, "loss": 1.0349, "step": 4485 }, { "epoch": 0.06, "grad_norm": 0.423828125, "learning_rate": 0.00012880091795754446, "loss": 0.954, "step": 4490 }, { "epoch": 0.06, "grad_norm": 0.5, "learning_rate": 0.0001289443488238669, "loss": 0.8879, "step": 4495 }, { "epoch": 0.06, "grad_norm": 0.453125, "learning_rate": 0.00012908777969018932, "loss": 0.9027, "step": 4500 }, { "epoch": 0.06, "grad_norm": 0.4296875, "learning_rate": 0.00012923121055651178, "loss": 0.8992, "step": 4505 }, { "epoch": 0.06, "grad_norm": 0.48828125, "learning_rate": 0.0001293746414228342, "loss": 1.3137, "step": 4510 }, { "epoch": 0.06, "grad_norm": 0.42578125, "learning_rate": 0.0001295180722891566, "loss": 1.0226, "step": 4515 }, { "epoch": 0.06, "grad_norm": 0.4765625, "learning_rate": 0.00012966150315547908, "loss": 0.9629, "step": 4520 }, { "epoch": 0.06, "grad_norm": 0.443359375, "learning_rate": 0.0001298049340218015, "loss": 0.9313, "step": 4525 }, { "epoch": 0.06, "grad_norm": 0.44921875, "learning_rate": 0.00012994836488812393, "loss": 0.9058, "step": 4530 }, { "epoch": 0.07, "grad_norm": 0.427734375, "learning_rate": 0.00013009179575444637, "loss": 1.0217, "step": 4535 }, { "epoch": 0.07, "grad_norm": 0.4609375, "learning_rate": 0.00013023522662076878, "loss": 0.9383, "step": 4540 }, { "epoch": 0.07, "grad_norm": 0.41015625, "learning_rate": 0.00013037865748709125, "loss": 1.0084, "step": 4545 }, { "epoch": 0.07, "grad_norm": 0.478515625, "learning_rate": 0.00013052208835341366, "loss": 1.0465, "step": 4550 }, { "epoch": 0.07, "grad_norm": 0.333984375, "learning_rate": 0.00013066551921973608, "loss": 0.8307, "step": 4555 }, { "epoch": 0.07, "grad_norm": 0.4453125, "learning_rate": 0.00013080895008605854, "loss": 0.9803, "step": 4560 }, { "epoch": 0.07, "grad_norm": 0.4921875, "learning_rate": 0.00013095238095238096, "loss": 1.0079, "step": 4565 }, { "epoch": 0.07, "grad_norm": 0.4453125, "learning_rate": 0.0001310958118187034, "loss": 1.0062, "step": 4570 }, { "epoch": 0.07, "grad_norm": 0.423828125, "learning_rate": 0.00013123924268502584, "loss": 1.0667, "step": 4575 }, { "epoch": 0.07, "grad_norm": 0.4375, "learning_rate": 0.00013138267355134825, "loss": 0.9888, "step": 4580 }, { "epoch": 0.07, "grad_norm": 0.49609375, "learning_rate": 0.0001315261044176707, "loss": 1.1235, "step": 4585 }, { "epoch": 0.07, "grad_norm": 0.44921875, "learning_rate": 0.00013166953528399313, "loss": 0.998, "step": 4590 }, { "epoch": 0.07, "grad_norm": 0.447265625, "learning_rate": 0.00013181296615031554, "loss": 1.0042, "step": 4595 }, { "epoch": 0.07, "grad_norm": 0.46484375, "learning_rate": 0.00013195639701663798, "loss": 0.9648, "step": 4600 }, { "epoch": 0.07, "grad_norm": 0.4296875, "learning_rate": 0.00013209982788296042, "loss": 0.8943, "step": 4605 }, { "epoch": 0.07, "grad_norm": 0.40234375, "learning_rate": 0.00013224325874928284, "loss": 0.9704, "step": 4610 }, { "epoch": 0.07, "grad_norm": 0.423828125, "learning_rate": 0.00013238668961560528, "loss": 1.0426, "step": 4615 }, { "epoch": 0.07, "grad_norm": 0.41015625, "learning_rate": 0.00013253012048192772, "loss": 0.9591, "step": 4620 }, { "epoch": 0.07, "grad_norm": 0.48828125, "learning_rate": 0.00013267355134825016, "loss": 0.991, "step": 4625 }, { "epoch": 0.07, "grad_norm": 0.455078125, "learning_rate": 0.00013281698221457257, "loss": 0.971, "step": 4630 }, { "epoch": 0.07, "grad_norm": 0.490234375, "learning_rate": 0.000132960413080895, "loss": 1.1203, "step": 4635 }, { "epoch": 0.07, "grad_norm": 0.52734375, "learning_rate": 0.00013310384394721745, "loss": 1.0527, "step": 4640 }, { "epoch": 0.07, "grad_norm": 0.484375, "learning_rate": 0.0001332472748135399, "loss": 1.057, "step": 4645 }, { "epoch": 0.07, "grad_norm": 0.4296875, "learning_rate": 0.0001333907056798623, "loss": 0.889, "step": 4650 }, { "epoch": 0.07, "grad_norm": 0.46484375, "learning_rate": 0.00013353413654618475, "loss": 0.952, "step": 4655 }, { "epoch": 0.07, "grad_norm": 0.41796875, "learning_rate": 0.00013367756741250719, "loss": 0.9844, "step": 4660 }, { "epoch": 0.07, "grad_norm": 0.5234375, "learning_rate": 0.00013382099827882963, "loss": 0.899, "step": 4665 }, { "epoch": 0.07, "grad_norm": 0.427734375, "learning_rate": 0.00013396442914515204, "loss": 1.0083, "step": 4670 }, { "epoch": 0.07, "grad_norm": 0.494140625, "learning_rate": 0.00013410786001147448, "loss": 1.1103, "step": 4675 }, { "epoch": 0.07, "grad_norm": 0.439453125, "learning_rate": 0.00013425129087779692, "loss": 1.0983, "step": 4680 }, { "epoch": 0.07, "grad_norm": 0.42578125, "learning_rate": 0.00013439472174411933, "loss": 0.9257, "step": 4685 }, { "epoch": 0.07, "grad_norm": 0.439453125, "learning_rate": 0.00013453815261044177, "loss": 0.9193, "step": 4690 }, { "epoch": 0.07, "grad_norm": 0.453125, "learning_rate": 0.0001346815834767642, "loss": 0.9014, "step": 4695 }, { "epoch": 0.07, "grad_norm": 0.4296875, "learning_rate": 0.00013482501434308663, "loss": 0.9115, "step": 4700 }, { "epoch": 0.07, "grad_norm": 0.44140625, "learning_rate": 0.00013496844520940907, "loss": 0.9923, "step": 4705 }, { "epoch": 0.07, "grad_norm": 0.423828125, "learning_rate": 0.0001351118760757315, "loss": 0.9589, "step": 4710 }, { "epoch": 0.07, "grad_norm": 0.423828125, "learning_rate": 0.00013525530694205392, "loss": 0.8925, "step": 4715 }, { "epoch": 0.07, "grad_norm": 0.46875, "learning_rate": 0.00013539873780837639, "loss": 1.1727, "step": 4720 }, { "epoch": 0.07, "grad_norm": 0.4296875, "learning_rate": 0.0001355421686746988, "loss": 1.0245, "step": 4725 }, { "epoch": 0.07, "grad_norm": 0.396484375, "learning_rate": 0.0001356855995410212, "loss": 0.9471, "step": 4730 }, { "epoch": 0.07, "grad_norm": 0.388671875, "learning_rate": 0.00013582903040734368, "loss": 0.8213, "step": 4735 }, { "epoch": 0.07, "grad_norm": 0.43359375, "learning_rate": 0.0001359724612736661, "loss": 0.9894, "step": 4740 }, { "epoch": 0.07, "grad_norm": 0.451171875, "learning_rate": 0.00013611589213998853, "loss": 0.9985, "step": 4745 }, { "epoch": 0.07, "grad_norm": 0.443359375, "learning_rate": 0.00013625932300631097, "loss": 0.9631, "step": 4750 }, { "epoch": 0.07, "grad_norm": 0.408203125, "learning_rate": 0.00013640275387263339, "loss": 0.9722, "step": 4755 }, { "epoch": 0.07, "grad_norm": 0.392578125, "learning_rate": 0.00013654618473895585, "loss": 0.9535, "step": 4760 }, { "epoch": 0.07, "grad_norm": 0.41015625, "learning_rate": 0.00013668961560527827, "loss": 0.8753, "step": 4765 }, { "epoch": 0.07, "grad_norm": 0.443359375, "learning_rate": 0.00013683304647160068, "loss": 0.9648, "step": 4770 }, { "epoch": 0.07, "grad_norm": 0.451171875, "learning_rate": 0.00013697647733792315, "loss": 0.8829, "step": 4775 }, { "epoch": 0.07, "grad_norm": 0.48046875, "learning_rate": 0.00013711990820424556, "loss": 1.0206, "step": 4780 }, { "epoch": 0.07, "grad_norm": 0.408203125, "learning_rate": 0.000137263339070568, "loss": 0.9263, "step": 4785 }, { "epoch": 0.07, "grad_norm": 0.453125, "learning_rate": 0.00013740676993689044, "loss": 0.9042, "step": 4790 }, { "epoch": 0.07, "grad_norm": 0.44921875, "learning_rate": 0.00013755020080321285, "loss": 0.9451, "step": 4795 }, { "epoch": 0.07, "grad_norm": 0.51171875, "learning_rate": 0.0001376936316695353, "loss": 1.0014, "step": 4800 }, { "epoch": 0.07, "grad_norm": 0.45703125, "learning_rate": 0.00013783706253585773, "loss": 0.9152, "step": 4805 }, { "epoch": 0.07, "grad_norm": 0.380859375, "learning_rate": 0.00013798049340218015, "loss": 1.0523, "step": 4810 }, { "epoch": 0.07, "grad_norm": 0.51171875, "learning_rate": 0.00013812392426850259, "loss": 1.0063, "step": 4815 }, { "epoch": 0.07, "grad_norm": 0.439453125, "learning_rate": 0.00013826735513482503, "loss": 0.9768, "step": 4820 }, { "epoch": 0.07, "grad_norm": 0.419921875, "learning_rate": 0.00013841078600114744, "loss": 1.2043, "step": 4825 }, { "epoch": 0.07, "grad_norm": 0.458984375, "learning_rate": 0.00013855421686746988, "loss": 0.9456, "step": 4830 }, { "epoch": 0.07, "grad_norm": 0.4375, "learning_rate": 0.00013869764773379232, "loss": 0.8694, "step": 4835 }, { "epoch": 0.07, "grad_norm": 0.447265625, "learning_rate": 0.00013884107860011476, "loss": 0.89, "step": 4840 }, { "epoch": 0.07, "grad_norm": 0.4140625, "learning_rate": 0.00013898450946643717, "loss": 0.9385, "step": 4845 }, { "epoch": 0.07, "grad_norm": 0.423828125, "learning_rate": 0.0001391279403327596, "loss": 0.8744, "step": 4850 }, { "epoch": 0.07, "grad_norm": 0.470703125, "learning_rate": 0.00013927137119908205, "loss": 0.9686, "step": 4855 }, { "epoch": 0.07, "grad_norm": 0.431640625, "learning_rate": 0.00013941480206540447, "loss": 0.9721, "step": 4860 }, { "epoch": 0.07, "grad_norm": 0.44140625, "learning_rate": 0.0001395582329317269, "loss": 0.9944, "step": 4865 }, { "epoch": 0.07, "grad_norm": 0.47265625, "learning_rate": 0.00013970166379804935, "loss": 1.0323, "step": 4870 }, { "epoch": 0.07, "grad_norm": 0.4765625, "learning_rate": 0.0001398450946643718, "loss": 1.0255, "step": 4875 }, { "epoch": 0.07, "grad_norm": 0.4453125, "learning_rate": 0.00013998852553069423, "loss": 0.9475, "step": 4880 }, { "epoch": 0.07, "grad_norm": 0.443359375, "learning_rate": 0.00014013195639701664, "loss": 1.0105, "step": 4885 }, { "epoch": 0.07, "grad_norm": 0.431640625, "learning_rate": 0.00014027538726333908, "loss": 0.9109, "step": 4890 }, { "epoch": 0.07, "grad_norm": 0.439453125, "learning_rate": 0.00014041881812966152, "loss": 0.9437, "step": 4895 }, { "epoch": 0.07, "grad_norm": 0.4453125, "learning_rate": 0.00014056224899598393, "loss": 1.0219, "step": 4900 }, { "epoch": 0.07, "grad_norm": 0.447265625, "learning_rate": 0.00014070567986230637, "loss": 0.9312, "step": 4905 }, { "epoch": 0.07, "grad_norm": 0.4375, "learning_rate": 0.00014084911072862881, "loss": 1.0337, "step": 4910 }, { "epoch": 0.07, "grad_norm": 0.38671875, "learning_rate": 0.00014099254159495123, "loss": 1.1155, "step": 4915 }, { "epoch": 0.07, "grad_norm": 0.51171875, "learning_rate": 0.00014113597246127367, "loss": 1.0321, "step": 4920 }, { "epoch": 0.07, "grad_norm": 0.4375, "learning_rate": 0.0001412794033275961, "loss": 0.9464, "step": 4925 }, { "epoch": 0.07, "grad_norm": 0.55078125, "learning_rate": 0.00014142283419391852, "loss": 1.0167, "step": 4930 }, { "epoch": 0.07, "grad_norm": 0.5546875, "learning_rate": 0.000141566265060241, "loss": 1.1417, "step": 4935 }, { "epoch": 0.07, "grad_norm": 0.44140625, "learning_rate": 0.0001417096959265634, "loss": 1.0258, "step": 4940 }, { "epoch": 0.07, "grad_norm": 0.443359375, "learning_rate": 0.00014185312679288581, "loss": 0.9996, "step": 4945 }, { "epoch": 0.07, "grad_norm": 0.44921875, "learning_rate": 0.00014199655765920828, "loss": 1.102, "step": 4950 }, { "epoch": 0.07, "grad_norm": 0.43359375, "learning_rate": 0.0001421399885255307, "loss": 0.9078, "step": 4955 }, { "epoch": 0.07, "grad_norm": 0.4921875, "learning_rate": 0.00014228341939185313, "loss": 0.9108, "step": 4960 }, { "epoch": 0.07, "grad_norm": 0.427734375, "learning_rate": 0.00014242685025817557, "loss": 0.9866, "step": 4965 }, { "epoch": 0.07, "grad_norm": 0.54296875, "learning_rate": 0.000142570281124498, "loss": 1.0493, "step": 4970 }, { "epoch": 0.07, "grad_norm": 0.4375, "learning_rate": 0.00014271371199082045, "loss": 0.8932, "step": 4975 }, { "epoch": 0.07, "grad_norm": 0.439453125, "learning_rate": 0.00014285714285714287, "loss": 1.0283, "step": 4980 }, { "epoch": 0.07, "grad_norm": 0.384765625, "learning_rate": 0.00014300057372346528, "loss": 0.9218, "step": 4985 }, { "epoch": 0.07, "grad_norm": 0.451171875, "learning_rate": 0.00014314400458978775, "loss": 0.8918, "step": 4990 }, { "epoch": 0.07, "grad_norm": 0.443359375, "learning_rate": 0.00014328743545611016, "loss": 0.9241, "step": 4995 }, { "epoch": 0.07, "grad_norm": 0.5390625, "learning_rate": 0.0001434308663224326, "loss": 1.0046, "step": 5000 }, { "epoch": 0.07, "grad_norm": 0.423828125, "learning_rate": 0.00014357429718875504, "loss": 0.9486, "step": 5005 }, { "epoch": 0.07, "grad_norm": 0.4453125, "learning_rate": 0.00014371772805507745, "loss": 0.8616, "step": 5010 }, { "epoch": 0.07, "grad_norm": 0.451171875, "learning_rate": 0.0001438611589213999, "loss": 0.952, "step": 5015 }, { "epoch": 0.07, "grad_norm": 0.455078125, "learning_rate": 0.00014400458978772233, "loss": 0.867, "step": 5020 }, { "epoch": 0.07, "grad_norm": 0.462890625, "learning_rate": 0.00014414802065404475, "loss": 1.0847, "step": 5025 }, { "epoch": 0.07, "grad_norm": 0.435546875, "learning_rate": 0.0001442914515203672, "loss": 0.8939, "step": 5030 }, { "epoch": 0.07, "grad_norm": 0.453125, "learning_rate": 0.00014443488238668963, "loss": 0.9756, "step": 5035 }, { "epoch": 0.07, "grad_norm": 0.47265625, "learning_rate": 0.00014457831325301204, "loss": 1.0356, "step": 5040 }, { "epoch": 0.07, "grad_norm": 0.458984375, "learning_rate": 0.00014472174411933448, "loss": 0.8602, "step": 5045 }, { "epoch": 0.07, "grad_norm": 0.333984375, "learning_rate": 0.00014486517498565692, "loss": 0.8666, "step": 5050 }, { "epoch": 0.07, "grad_norm": 0.447265625, "learning_rate": 0.00014500860585197936, "loss": 0.9651, "step": 5055 }, { "epoch": 0.07, "grad_norm": 0.447265625, "learning_rate": 0.00014515203671830177, "loss": 1.02, "step": 5060 }, { "epoch": 0.07, "grad_norm": 0.49609375, "learning_rate": 0.00014529546758462422, "loss": 1.0457, "step": 5065 }, { "epoch": 0.07, "grad_norm": 0.4296875, "learning_rate": 0.00014543889845094666, "loss": 1.1339, "step": 5070 }, { "epoch": 0.07, "grad_norm": 0.376953125, "learning_rate": 0.00014558232931726907, "loss": 0.8031, "step": 5075 }, { "epoch": 0.07, "grad_norm": 0.474609375, "learning_rate": 0.0001457257601835915, "loss": 0.9769, "step": 5080 }, { "epoch": 0.07, "grad_norm": 0.43359375, "learning_rate": 0.00014586919104991395, "loss": 0.8861, "step": 5085 }, { "epoch": 0.07, "grad_norm": 0.5703125, "learning_rate": 0.0001460126219162364, "loss": 0.9878, "step": 5090 }, { "epoch": 0.07, "grad_norm": 0.44921875, "learning_rate": 0.00014615605278255883, "loss": 0.904, "step": 5095 }, { "epoch": 0.07, "grad_norm": 0.384765625, "learning_rate": 0.00014629948364888124, "loss": 0.8937, "step": 5100 }, { "epoch": 0.07, "grad_norm": 0.54296875, "learning_rate": 0.00014644291451520368, "loss": 1.0381, "step": 5105 }, { "epoch": 0.07, "grad_norm": 0.5390625, "learning_rate": 0.00014658634538152612, "loss": 1.0496, "step": 5110 }, { "epoch": 0.07, "grad_norm": 0.46875, "learning_rate": 0.00014672977624784854, "loss": 1.0499, "step": 5115 }, { "epoch": 0.07, "grad_norm": 0.4453125, "learning_rate": 0.00014687320711417098, "loss": 1.0203, "step": 5120 }, { "epoch": 0.07, "grad_norm": 0.48046875, "learning_rate": 0.00014701663798049342, "loss": 1.0739, "step": 5125 }, { "epoch": 0.07, "grad_norm": 0.46484375, "learning_rate": 0.00014716006884681583, "loss": 0.9814, "step": 5130 }, { "epoch": 0.07, "grad_norm": 0.462890625, "learning_rate": 0.00014730349971313827, "loss": 1.0477, "step": 5135 }, { "epoch": 0.07, "grad_norm": 0.50390625, "learning_rate": 0.0001474469305794607, "loss": 0.9457, "step": 5140 }, { "epoch": 0.07, "grad_norm": 0.470703125, "learning_rate": 0.00014759036144578312, "loss": 0.869, "step": 5145 }, { "epoch": 0.07, "grad_norm": 0.44921875, "learning_rate": 0.0001477337923121056, "loss": 0.9091, "step": 5150 }, { "epoch": 0.07, "grad_norm": 0.490234375, "learning_rate": 0.000147877223178428, "loss": 0.8955, "step": 5155 }, { "epoch": 0.07, "grad_norm": 0.40625, "learning_rate": 0.00014802065404475042, "loss": 0.9003, "step": 5160 }, { "epoch": 0.07, "grad_norm": 0.453125, "learning_rate": 0.00014816408491107288, "loss": 0.8217, "step": 5165 }, { "epoch": 0.07, "grad_norm": 0.388671875, "learning_rate": 0.0001483075157773953, "loss": 0.9767, "step": 5170 }, { "epoch": 0.07, "grad_norm": 0.46484375, "learning_rate": 0.00014845094664371774, "loss": 1.0299, "step": 5175 }, { "epoch": 0.07, "grad_norm": 0.474609375, "learning_rate": 0.00014859437751004018, "loss": 1.0436, "step": 5180 }, { "epoch": 0.07, "grad_norm": 0.45703125, "learning_rate": 0.0001487378083763626, "loss": 0.9742, "step": 5185 }, { "epoch": 0.07, "grad_norm": 0.470703125, "learning_rate": 0.00014888123924268503, "loss": 0.9181, "step": 5190 }, { "epoch": 0.07, "grad_norm": 0.54296875, "learning_rate": 0.00014902467010900747, "loss": 0.9368, "step": 5195 }, { "epoch": 0.07, "grad_norm": 0.453125, "learning_rate": 0.00014916810097532988, "loss": 0.9426, "step": 5200 }, { "epoch": 0.07, "grad_norm": 0.50390625, "learning_rate": 0.00014931153184165235, "loss": 1.0936, "step": 5205 }, { "epoch": 0.07, "grad_norm": 0.6484375, "learning_rate": 0.00014945496270797476, "loss": 1.0672, "step": 5210 }, { "epoch": 0.07, "grad_norm": 0.412109375, "learning_rate": 0.0001495983935742972, "loss": 0.9419, "step": 5215 }, { "epoch": 0.07, "grad_norm": 0.5859375, "learning_rate": 0.00014974182444061964, "loss": 0.9147, "step": 5220 }, { "epoch": 0.07, "grad_norm": 0.59375, "learning_rate": 0.00014988525530694206, "loss": 1.168, "step": 5225 }, { "epoch": 0.08, "grad_norm": 0.46875, "learning_rate": 0.0001500286861732645, "loss": 1.0776, "step": 5230 }, { "epoch": 0.08, "grad_norm": 0.451171875, "learning_rate": 0.00015017211703958694, "loss": 0.8433, "step": 5235 }, { "epoch": 0.08, "grad_norm": 0.478515625, "learning_rate": 0.00015031554790590935, "loss": 0.959, "step": 5240 }, { "epoch": 0.08, "grad_norm": 0.51171875, "learning_rate": 0.0001504589787722318, "loss": 0.9664, "step": 5245 }, { "epoch": 0.08, "grad_norm": 0.474609375, "learning_rate": 0.00015060240963855423, "loss": 0.9351, "step": 5250 }, { "epoch": 0.08, "grad_norm": 0.435546875, "learning_rate": 0.00015074584050487664, "loss": 1.0872, "step": 5255 }, { "epoch": 0.08, "grad_norm": 0.478515625, "learning_rate": 0.00015088927137119908, "loss": 1.0728, "step": 5260 }, { "epoch": 0.08, "grad_norm": 0.431640625, "learning_rate": 0.00015103270223752152, "loss": 1.07, "step": 5265 }, { "epoch": 0.08, "grad_norm": 0.5078125, "learning_rate": 0.00015117613310384396, "loss": 1.003, "step": 5270 }, { "epoch": 0.08, "grad_norm": 0.421875, "learning_rate": 0.00015131956397016638, "loss": 0.9776, "step": 5275 }, { "epoch": 0.08, "grad_norm": 0.4765625, "learning_rate": 0.00015146299483648882, "loss": 0.908, "step": 5280 }, { "epoch": 0.08, "grad_norm": 0.396484375, "learning_rate": 0.00015160642570281126, "loss": 0.8376, "step": 5285 }, { "epoch": 0.08, "grad_norm": 0.4453125, "learning_rate": 0.00015174985656913367, "loss": 0.9636, "step": 5290 }, { "epoch": 0.08, "grad_norm": 0.5078125, "learning_rate": 0.0001518932874354561, "loss": 0.9067, "step": 5295 }, { "epoch": 0.08, "grad_norm": 0.4765625, "learning_rate": 0.00015203671830177855, "loss": 1.0184, "step": 5300 }, { "epoch": 0.08, "grad_norm": 0.47265625, "learning_rate": 0.00015218014916810096, "loss": 0.9313, "step": 5305 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.00015232358003442343, "loss": 1.1231, "step": 5310 }, { "epoch": 0.08, "grad_norm": 0.390625, "learning_rate": 0.00015246701090074584, "loss": 1.0447, "step": 5315 }, { "epoch": 0.08, "grad_norm": 0.4296875, "learning_rate": 0.00015261044176706828, "loss": 1.0549, "step": 5320 }, { "epoch": 0.08, "grad_norm": 0.48046875, "learning_rate": 0.00015275387263339072, "loss": 0.9707, "step": 5325 }, { "epoch": 0.08, "grad_norm": 0.451171875, "learning_rate": 0.00015289730349971314, "loss": 0.9855, "step": 5330 }, { "epoch": 0.08, "grad_norm": 0.462890625, "learning_rate": 0.00015304073436603558, "loss": 1.1058, "step": 5335 }, { "epoch": 0.08, "grad_norm": 0.462890625, "learning_rate": 0.00015318416523235802, "loss": 0.8579, "step": 5340 }, { "epoch": 0.08, "grad_norm": 0.46484375, "learning_rate": 0.00015332759609868043, "loss": 1.1284, "step": 5345 }, { "epoch": 0.08, "grad_norm": 0.5546875, "learning_rate": 0.00015347102696500287, "loss": 1.0177, "step": 5350 }, { "epoch": 0.08, "grad_norm": 0.52734375, "learning_rate": 0.0001536144578313253, "loss": 0.9577, "step": 5355 }, { "epoch": 0.08, "grad_norm": 0.5, "learning_rate": 0.00015375788869764772, "loss": 1.0579, "step": 5360 }, { "epoch": 0.08, "grad_norm": 0.4296875, "learning_rate": 0.0001539013195639702, "loss": 0.9464, "step": 5365 }, { "epoch": 0.08, "grad_norm": 0.447265625, "learning_rate": 0.0001540447504302926, "loss": 0.9591, "step": 5370 }, { "epoch": 0.08, "grad_norm": 0.490234375, "learning_rate": 0.00015418818129661502, "loss": 0.9479, "step": 5375 }, { "epoch": 0.08, "grad_norm": 0.46484375, "learning_rate": 0.00015433161216293748, "loss": 0.9686, "step": 5380 }, { "epoch": 0.08, "grad_norm": 0.68359375, "learning_rate": 0.0001544750430292599, "loss": 0.9715, "step": 5385 }, { "epoch": 0.08, "grad_norm": 0.3984375, "learning_rate": 0.00015461847389558234, "loss": 0.9483, "step": 5390 }, { "epoch": 0.08, "grad_norm": 0.46484375, "learning_rate": 0.00015476190476190478, "loss": 0.8867, "step": 5395 }, { "epoch": 0.08, "grad_norm": 0.49609375, "learning_rate": 0.0001549053356282272, "loss": 0.9678, "step": 5400 }, { "epoch": 0.08, "grad_norm": 0.47265625, "learning_rate": 0.00015504876649454963, "loss": 1.0233, "step": 5405 }, { "epoch": 0.08, "grad_norm": 0.51171875, "learning_rate": 0.00015519219736087207, "loss": 1.0728, "step": 5410 }, { "epoch": 0.08, "grad_norm": 0.439453125, "learning_rate": 0.00015533562822719448, "loss": 0.9649, "step": 5415 }, { "epoch": 0.08, "grad_norm": 0.44921875, "learning_rate": 0.00015547905909351695, "loss": 1.0888, "step": 5420 }, { "epoch": 0.08, "grad_norm": 0.47265625, "learning_rate": 0.00015562248995983936, "loss": 0.9936, "step": 5425 }, { "epoch": 0.08, "grad_norm": 0.486328125, "learning_rate": 0.0001557659208261618, "loss": 0.9588, "step": 5430 }, { "epoch": 0.08, "grad_norm": 0.439453125, "learning_rate": 0.00015590935169248425, "loss": 0.9805, "step": 5435 }, { "epoch": 0.08, "grad_norm": 0.458984375, "learning_rate": 0.00015605278255880666, "loss": 1.0008, "step": 5440 }, { "epoch": 0.08, "grad_norm": 0.439453125, "learning_rate": 0.0001561962134251291, "loss": 1.0179, "step": 5445 }, { "epoch": 0.08, "grad_norm": 0.466796875, "learning_rate": 0.00015633964429145154, "loss": 0.8855, "step": 5450 }, { "epoch": 0.08, "grad_norm": 0.474609375, "learning_rate": 0.00015648307515777395, "loss": 0.958, "step": 5455 }, { "epoch": 0.08, "grad_norm": 0.451171875, "learning_rate": 0.0001566265060240964, "loss": 0.9418, "step": 5460 }, { "epoch": 0.08, "grad_norm": 0.5, "learning_rate": 0.00015676993689041883, "loss": 0.9573, "step": 5465 }, { "epoch": 0.08, "grad_norm": 0.431640625, "learning_rate": 0.00015691336775674127, "loss": 1.1149, "step": 5470 }, { "epoch": 0.08, "grad_norm": 0.42578125, "learning_rate": 0.00015705679862306369, "loss": 0.995, "step": 5475 }, { "epoch": 0.08, "grad_norm": 0.5234375, "learning_rate": 0.00015720022948938613, "loss": 0.9856, "step": 5480 }, { "epoch": 0.08, "grad_norm": 0.474609375, "learning_rate": 0.00015734366035570857, "loss": 0.9135, "step": 5485 }, { "epoch": 0.08, "grad_norm": 0.36328125, "learning_rate": 0.00015748709122203098, "loss": 0.8655, "step": 5490 }, { "epoch": 0.08, "grad_norm": 0.408203125, "learning_rate": 0.00015763052208835342, "loss": 1.0002, "step": 5495 }, { "epoch": 0.08, "grad_norm": 0.404296875, "learning_rate": 0.00015777395295467586, "loss": 0.9998, "step": 5500 }, { "epoch": 0.08, "grad_norm": 0.51171875, "learning_rate": 0.00015791738382099827, "loss": 0.9179, "step": 5505 }, { "epoch": 0.08, "grad_norm": 0.41015625, "learning_rate": 0.0001580608146873207, "loss": 0.7862, "step": 5510 }, { "epoch": 0.08, "grad_norm": 0.4453125, "learning_rate": 0.00015820424555364315, "loss": 1.0359, "step": 5515 }, { "epoch": 0.08, "grad_norm": 0.466796875, "learning_rate": 0.00015834767641996557, "loss": 0.9956, "step": 5520 }, { "epoch": 0.08, "grad_norm": 0.54296875, "learning_rate": 0.00015849110728628803, "loss": 0.8596, "step": 5525 }, { "epoch": 0.08, "grad_norm": 0.48046875, "learning_rate": 0.00015863453815261045, "loss": 1.1384, "step": 5530 }, { "epoch": 0.08, "grad_norm": 0.412109375, "learning_rate": 0.00015877796901893289, "loss": 1.0153, "step": 5535 }, { "epoch": 0.08, "grad_norm": 0.45703125, "learning_rate": 0.00015892139988525533, "loss": 1.0316, "step": 5540 }, { "epoch": 0.08, "grad_norm": 0.443359375, "learning_rate": 0.00015906483075157774, "loss": 1.0093, "step": 5545 }, { "epoch": 0.08, "grad_norm": 0.4609375, "learning_rate": 0.00015920826161790018, "loss": 1.047, "step": 5550 }, { "epoch": 0.08, "grad_norm": 0.515625, "learning_rate": 0.00015935169248422262, "loss": 0.9798, "step": 5555 }, { "epoch": 0.08, "grad_norm": 0.453125, "learning_rate": 0.00015949512335054503, "loss": 0.9495, "step": 5560 }, { "epoch": 0.08, "grad_norm": 0.44140625, "learning_rate": 0.0001596385542168675, "loss": 0.994, "step": 5565 }, { "epoch": 0.08, "grad_norm": 0.474609375, "learning_rate": 0.0001597819850831899, "loss": 0.97, "step": 5570 }, { "epoch": 0.08, "grad_norm": 0.478515625, "learning_rate": 0.00015992541594951233, "loss": 0.9392, "step": 5575 }, { "epoch": 0.08, "grad_norm": 0.43359375, "learning_rate": 0.0001600688468158348, "loss": 0.9793, "step": 5580 }, { "epoch": 0.08, "grad_norm": 0.453125, "learning_rate": 0.0001602122776821572, "loss": 1.0855, "step": 5585 }, { "epoch": 0.08, "grad_norm": 0.46875, "learning_rate": 0.00016035570854847965, "loss": 1.0543, "step": 5590 }, { "epoch": 0.08, "grad_norm": 0.458984375, "learning_rate": 0.00016049913941480209, "loss": 1.0799, "step": 5595 }, { "epoch": 0.08, "grad_norm": 0.47265625, "learning_rate": 0.0001606425702811245, "loss": 0.9207, "step": 5600 }, { "epoch": 0.08, "grad_norm": 0.423828125, "learning_rate": 0.00016078600114744694, "loss": 0.9722, "step": 5605 }, { "epoch": 0.08, "grad_norm": 0.4765625, "learning_rate": 0.00016092943201376938, "loss": 1.0413, "step": 5610 }, { "epoch": 0.08, "grad_norm": 0.50390625, "learning_rate": 0.0001610728628800918, "loss": 0.9253, "step": 5615 }, { "epoch": 0.08, "grad_norm": 0.6796875, "learning_rate": 0.00016121629374641423, "loss": 1.0691, "step": 5620 }, { "epoch": 0.08, "grad_norm": 0.46484375, "learning_rate": 0.00016135972461273667, "loss": 0.9382, "step": 5625 }, { "epoch": 0.08, "grad_norm": 0.494140625, "learning_rate": 0.00016150315547905909, "loss": 0.9178, "step": 5630 }, { "epoch": 0.08, "grad_norm": 0.515625, "learning_rate": 0.00016164658634538153, "loss": 1.0859, "step": 5635 }, { "epoch": 0.08, "grad_norm": 0.53125, "learning_rate": 0.00016179001721170397, "loss": 0.9187, "step": 5640 }, { "epoch": 0.08, "grad_norm": 0.470703125, "learning_rate": 0.0001619334480780264, "loss": 0.9841, "step": 5645 }, { "epoch": 0.08, "grad_norm": 0.419921875, "learning_rate": 0.00016207687894434885, "loss": 0.9436, "step": 5650 }, { "epoch": 0.08, "grad_norm": 0.40234375, "learning_rate": 0.00016222030981067126, "loss": 0.9063, "step": 5655 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.0001623637406769937, "loss": 1.0031, "step": 5660 }, { "epoch": 0.08, "grad_norm": 0.5390625, "learning_rate": 0.00016250717154331614, "loss": 0.968, "step": 5665 }, { "epoch": 0.08, "grad_norm": 0.482421875, "learning_rate": 0.00016265060240963855, "loss": 1.1923, "step": 5670 }, { "epoch": 0.08, "grad_norm": 0.447265625, "learning_rate": 0.000162794033275961, "loss": 0.85, "step": 5675 }, { "epoch": 0.08, "grad_norm": 0.4609375, "learning_rate": 0.00016293746414228343, "loss": 1.0102, "step": 5680 }, { "epoch": 0.08, "grad_norm": 0.41796875, "learning_rate": 0.00016308089500860587, "loss": 1.0054, "step": 5685 }, { "epoch": 0.08, "grad_norm": 0.486328125, "learning_rate": 0.0001632243258749283, "loss": 0.8971, "step": 5690 }, { "epoch": 0.08, "grad_norm": 0.48046875, "learning_rate": 0.00016336775674125073, "loss": 1.0443, "step": 5695 }, { "epoch": 0.08, "grad_norm": 0.4375, "learning_rate": 0.00016351118760757317, "loss": 1.0344, "step": 5700 }, { "epoch": 0.08, "grad_norm": 0.46875, "learning_rate": 0.00016365461847389558, "loss": 0.9438, "step": 5705 }, { "epoch": 0.08, "grad_norm": 0.453125, "learning_rate": 0.00016379804934021802, "loss": 1.0228, "step": 5710 }, { "epoch": 0.08, "grad_norm": 0.4296875, "learning_rate": 0.00016394148020654046, "loss": 0.9256, "step": 5715 }, { "epoch": 0.08, "grad_norm": 0.4375, "learning_rate": 0.00016408491107286287, "loss": 0.9056, "step": 5720 }, { "epoch": 0.08, "grad_norm": 0.474609375, "learning_rate": 0.00016422834193918531, "loss": 1.0632, "step": 5725 }, { "epoch": 0.08, "grad_norm": 0.5078125, "learning_rate": 0.00016437177280550775, "loss": 0.8901, "step": 5730 }, { "epoch": 0.08, "grad_norm": 0.46875, "learning_rate": 0.00016451520367183017, "loss": 0.9466, "step": 5735 }, { "epoch": 0.08, "grad_norm": 0.48046875, "learning_rate": 0.00016465863453815263, "loss": 1.1376, "step": 5740 }, { "epoch": 0.08, "grad_norm": 0.375, "learning_rate": 0.00016480206540447505, "loss": 0.8577, "step": 5745 }, { "epoch": 0.08, "grad_norm": 0.39453125, "learning_rate": 0.00016494549627079746, "loss": 0.9745, "step": 5750 }, { "epoch": 0.08, "grad_norm": 0.494140625, "learning_rate": 0.00016508892713711993, "loss": 1.0465, "step": 5755 }, { "epoch": 0.08, "grad_norm": 0.5078125, "learning_rate": 0.00016523235800344234, "loss": 1.1213, "step": 5760 }, { "epoch": 0.08, "grad_norm": 0.478515625, "learning_rate": 0.00016537578886976478, "loss": 0.9917, "step": 5765 }, { "epoch": 0.08, "grad_norm": 0.4921875, "learning_rate": 0.00016551921973608722, "loss": 0.8194, "step": 5770 }, { "epoch": 0.08, "grad_norm": 0.486328125, "learning_rate": 0.00016566265060240963, "loss": 0.9969, "step": 5775 }, { "epoch": 0.08, "grad_norm": 0.5078125, "learning_rate": 0.0001658060814687321, "loss": 0.8893, "step": 5780 }, { "epoch": 0.08, "grad_norm": 0.5234375, "learning_rate": 0.00016594951233505451, "loss": 0.9696, "step": 5785 }, { "epoch": 0.08, "grad_norm": 0.474609375, "learning_rate": 0.00016609294320137693, "loss": 1.1031, "step": 5790 }, { "epoch": 0.08, "grad_norm": 0.5078125, "learning_rate": 0.0001662363740676994, "loss": 1.0955, "step": 5795 }, { "epoch": 0.08, "grad_norm": 0.451171875, "learning_rate": 0.0001663798049340218, "loss": 1.0878, "step": 5800 }, { "epoch": 0.08, "grad_norm": 0.46484375, "learning_rate": 0.00016652323580034425, "loss": 0.9839, "step": 5805 }, { "epoch": 0.08, "grad_norm": 0.474609375, "learning_rate": 0.0001666666666666667, "loss": 1.1202, "step": 5810 }, { "epoch": 0.08, "grad_norm": 0.51953125, "learning_rate": 0.0001668100975329891, "loss": 0.9917, "step": 5815 }, { "epoch": 0.08, "grad_norm": 0.421875, "learning_rate": 0.00016695352839931154, "loss": 0.9115, "step": 5820 }, { "epoch": 0.08, "grad_norm": 0.451171875, "learning_rate": 0.00016709695926563398, "loss": 0.9164, "step": 5825 }, { "epoch": 0.08, "grad_norm": 0.43359375, "learning_rate": 0.0001672403901319564, "loss": 0.995, "step": 5830 }, { "epoch": 0.08, "grad_norm": 0.5390625, "learning_rate": 0.00016738382099827883, "loss": 1.091, "step": 5835 }, { "epoch": 0.08, "grad_norm": 0.48046875, "learning_rate": 0.00016752725186460127, "loss": 0.9019, "step": 5840 }, { "epoch": 0.08, "grad_norm": 0.515625, "learning_rate": 0.0001676706827309237, "loss": 0.8586, "step": 5845 }, { "epoch": 0.08, "grad_norm": 0.474609375, "learning_rate": 0.00016781411359724613, "loss": 0.9595, "step": 5850 }, { "epoch": 0.08, "grad_norm": 0.435546875, "learning_rate": 0.00016795754446356857, "loss": 1.1204, "step": 5855 }, { "epoch": 0.08, "grad_norm": 0.458984375, "learning_rate": 0.000168100975329891, "loss": 0.8435, "step": 5860 }, { "epoch": 0.08, "grad_norm": 0.4765625, "learning_rate": 0.00016824440619621342, "loss": 0.9602, "step": 5865 }, { "epoch": 0.08, "grad_norm": 0.5, "learning_rate": 0.00016838783706253586, "loss": 0.9821, "step": 5870 }, { "epoch": 0.08, "grad_norm": 0.478515625, "learning_rate": 0.0001685312679288583, "loss": 0.864, "step": 5875 }, { "epoch": 0.08, "grad_norm": 0.45703125, "learning_rate": 0.00016867469879518074, "loss": 0.979, "step": 5880 }, { "epoch": 0.08, "grad_norm": 0.4609375, "learning_rate": 0.00016881812966150316, "loss": 0.9652, "step": 5885 }, { "epoch": 0.08, "grad_norm": 0.458984375, "learning_rate": 0.0001689615605278256, "loss": 0.9546, "step": 5890 }, { "epoch": 0.08, "grad_norm": 0.474609375, "learning_rate": 0.00016910499139414804, "loss": 0.9793, "step": 5895 }, { "epoch": 0.08, "grad_norm": 0.4609375, "learning_rate": 0.00016924842226047048, "loss": 0.9838, "step": 5900 }, { "epoch": 0.08, "grad_norm": 0.421875, "learning_rate": 0.0001693918531267929, "loss": 0.9815, "step": 5905 }, { "epoch": 0.08, "grad_norm": 0.4765625, "learning_rate": 0.00016953528399311533, "loss": 0.9909, "step": 5910 }, { "epoch": 0.08, "grad_norm": 0.5546875, "learning_rate": 0.00016967871485943777, "loss": 1.0473, "step": 5915 }, { "epoch": 0.08, "grad_norm": 0.486328125, "learning_rate": 0.00016982214572576018, "loss": 1.0577, "step": 5920 }, { "epoch": 0.08, "grad_norm": 0.515625, "learning_rate": 0.00016996557659208262, "loss": 1.0259, "step": 5925 }, { "epoch": 0.09, "grad_norm": 0.51953125, "learning_rate": 0.00017010900745840506, "loss": 0.9825, "step": 5930 }, { "epoch": 0.09, "grad_norm": 0.5, "learning_rate": 0.00017025243832472748, "loss": 1.172, "step": 5935 }, { "epoch": 0.09, "grad_norm": 0.46484375, "learning_rate": 0.00017039586919104992, "loss": 0.8524, "step": 5940 }, { "epoch": 0.09, "grad_norm": 0.490234375, "learning_rate": 0.00017053930005737236, "loss": 0.8916, "step": 5945 }, { "epoch": 0.09, "grad_norm": 0.51953125, "learning_rate": 0.00017068273092369477, "loss": 0.9652, "step": 5950 }, { "epoch": 0.09, "grad_norm": 0.490234375, "learning_rate": 0.00017082616179001724, "loss": 0.9527, "step": 5955 }, { "epoch": 0.09, "grad_norm": 0.50390625, "learning_rate": 0.00017096959265633965, "loss": 0.8899, "step": 5960 }, { "epoch": 0.09, "grad_norm": 0.484375, "learning_rate": 0.00017111302352266206, "loss": 0.9364, "step": 5965 }, { "epoch": 0.09, "grad_norm": 0.490234375, "learning_rate": 0.00017125645438898453, "loss": 0.9621, "step": 5970 }, { "epoch": 0.09, "grad_norm": 0.45703125, "learning_rate": 0.00017139988525530694, "loss": 0.9841, "step": 5975 }, { "epoch": 0.09, "grad_norm": 0.451171875, "learning_rate": 0.00017154331612162938, "loss": 1.0999, "step": 5980 }, { "epoch": 0.09, "grad_norm": 0.447265625, "learning_rate": 0.00017168674698795182, "loss": 0.883, "step": 5985 }, { "epoch": 0.09, "grad_norm": 0.515625, "learning_rate": 0.00017183017785427424, "loss": 1.0271, "step": 5990 }, { "epoch": 0.09, "grad_norm": 0.48828125, "learning_rate": 0.0001719736087205967, "loss": 1.0657, "step": 5995 }, { "epoch": 0.09, "grad_norm": 0.494140625, "learning_rate": 0.00017211703958691912, "loss": 1.0207, "step": 6000 }, { "epoch": 0.09, "grad_norm": 0.47265625, "learning_rate": 0.00017226047045324153, "loss": 1.2019, "step": 6005 }, { "epoch": 0.09, "grad_norm": 0.51171875, "learning_rate": 0.000172403901319564, "loss": 1.1323, "step": 6010 }, { "epoch": 0.09, "grad_norm": 0.478515625, "learning_rate": 0.0001725473321858864, "loss": 0.8854, "step": 6015 }, { "epoch": 0.09, "grad_norm": 0.4609375, "learning_rate": 0.00017269076305220885, "loss": 1.0004, "step": 6020 }, { "epoch": 0.09, "grad_norm": 0.486328125, "learning_rate": 0.0001728341939185313, "loss": 0.9655, "step": 6025 }, { "epoch": 0.09, "grad_norm": 0.515625, "learning_rate": 0.0001729776247848537, "loss": 0.9061, "step": 6030 }, { "epoch": 0.09, "grad_norm": 0.453125, "learning_rate": 0.00017312105565117614, "loss": 0.9962, "step": 6035 }, { "epoch": 0.09, "grad_norm": 0.53515625, "learning_rate": 0.00017326448651749858, "loss": 0.9991, "step": 6040 }, { "epoch": 0.09, "grad_norm": 0.482421875, "learning_rate": 0.000173407917383821, "loss": 1.0361, "step": 6045 }, { "epoch": 0.09, "grad_norm": 0.5, "learning_rate": 0.00017355134825014344, "loss": 0.9285, "step": 6050 }, { "epoch": 0.09, "grad_norm": 0.4609375, "learning_rate": 0.00017369477911646588, "loss": 1.048, "step": 6055 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.0001738382099827883, "loss": 0.8781, "step": 6060 }, { "epoch": 0.09, "grad_norm": 0.46875, "learning_rate": 0.00017398164084911073, "loss": 1.0609, "step": 6065 }, { "epoch": 0.09, "grad_norm": 0.50390625, "learning_rate": 0.00017412507171543317, "loss": 0.8504, "step": 6070 }, { "epoch": 0.09, "grad_norm": 0.53125, "learning_rate": 0.0001742685025817556, "loss": 1.1307, "step": 6075 }, { "epoch": 0.09, "grad_norm": 0.53125, "learning_rate": 0.00017441193344807802, "loss": 1.0649, "step": 6080 }, { "epoch": 0.09, "grad_norm": 0.494140625, "learning_rate": 0.00017455536431440046, "loss": 0.8316, "step": 6085 }, { "epoch": 0.09, "grad_norm": 0.46875, "learning_rate": 0.0001746987951807229, "loss": 0.9775, "step": 6090 }, { "epoch": 0.09, "grad_norm": 0.484375, "learning_rate": 0.00017484222604704534, "loss": 1.0049, "step": 6095 }, { "epoch": 0.09, "grad_norm": 0.462890625, "learning_rate": 0.00017498565691336776, "loss": 0.965, "step": 6100 }, { "epoch": 0.09, "grad_norm": 0.45703125, "learning_rate": 0.0001751290877796902, "loss": 1.0832, "step": 6105 }, { "epoch": 0.09, "grad_norm": 0.3984375, "learning_rate": 0.00017527251864601264, "loss": 0.8216, "step": 6110 }, { "epoch": 0.09, "grad_norm": 0.48046875, "learning_rate": 0.00017541594951233508, "loss": 0.9725, "step": 6115 }, { "epoch": 0.09, "grad_norm": 0.390625, "learning_rate": 0.0001755593803786575, "loss": 0.8486, "step": 6120 }, { "epoch": 0.09, "grad_norm": 0.470703125, "learning_rate": 0.00017570281124497993, "loss": 1.067, "step": 6125 }, { "epoch": 0.09, "grad_norm": 0.51953125, "learning_rate": 0.00017584624211130237, "loss": 0.9053, "step": 6130 }, { "epoch": 0.09, "grad_norm": 0.423828125, "learning_rate": 0.00017598967297762478, "loss": 0.8203, "step": 6135 }, { "epoch": 0.09, "grad_norm": 0.48046875, "learning_rate": 0.00017613310384394722, "loss": 1.0192, "step": 6140 }, { "epoch": 0.09, "grad_norm": 0.53125, "learning_rate": 0.00017627653471026966, "loss": 0.9056, "step": 6145 }, { "epoch": 0.09, "grad_norm": 0.42578125, "learning_rate": 0.00017641996557659208, "loss": 1.0876, "step": 6150 }, { "epoch": 0.09, "grad_norm": 0.390625, "learning_rate": 0.00017656339644291452, "loss": 1.0021, "step": 6155 }, { "epoch": 0.09, "grad_norm": 0.4765625, "learning_rate": 0.00017670682730923696, "loss": 1.0436, "step": 6160 }, { "epoch": 0.09, "grad_norm": 0.451171875, "learning_rate": 0.00017685025817555937, "loss": 0.874, "step": 6165 }, { "epoch": 0.09, "grad_norm": 0.4765625, "learning_rate": 0.00017699368904188184, "loss": 0.9586, "step": 6170 }, { "epoch": 0.09, "grad_norm": 0.466796875, "learning_rate": 0.00017713711990820425, "loss": 1.0107, "step": 6175 }, { "epoch": 0.09, "grad_norm": 0.515625, "learning_rate": 0.00017728055077452666, "loss": 0.9436, "step": 6180 }, { "epoch": 0.09, "grad_norm": 0.53125, "learning_rate": 0.00017742398164084913, "loss": 0.9741, "step": 6185 }, { "epoch": 0.09, "grad_norm": 0.46484375, "learning_rate": 0.00017756741250717154, "loss": 0.9436, "step": 6190 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.00017771084337349398, "loss": 0.9143, "step": 6195 }, { "epoch": 0.09, "grad_norm": 0.546875, "learning_rate": 0.00017785427423981642, "loss": 1.0719, "step": 6200 }, { "epoch": 0.09, "grad_norm": 0.39453125, "learning_rate": 0.00017799770510613884, "loss": 0.795, "step": 6205 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.0001781411359724613, "loss": 0.8589, "step": 6210 }, { "epoch": 0.09, "grad_norm": 0.54296875, "learning_rate": 0.00017828456683878372, "loss": 1.1208, "step": 6215 }, { "epoch": 0.09, "grad_norm": 0.498046875, "learning_rate": 0.00017842799770510613, "loss": 1.0982, "step": 6220 }, { "epoch": 0.09, "grad_norm": 0.443359375, "learning_rate": 0.0001785714285714286, "loss": 0.9124, "step": 6225 }, { "epoch": 0.09, "grad_norm": 0.486328125, "learning_rate": 0.000178714859437751, "loss": 1.1237, "step": 6230 }, { "epoch": 0.09, "grad_norm": 0.458984375, "learning_rate": 0.00017885829030407345, "loss": 0.8831, "step": 6235 }, { "epoch": 0.09, "grad_norm": 0.5, "learning_rate": 0.0001790017211703959, "loss": 0.9529, "step": 6240 }, { "epoch": 0.09, "grad_norm": 0.46875, "learning_rate": 0.0001791451520367183, "loss": 1.0183, "step": 6245 }, { "epoch": 0.09, "grad_norm": 0.47265625, "learning_rate": 0.00017928858290304074, "loss": 1.0009, "step": 6250 }, { "epoch": 0.09, "grad_norm": 0.451171875, "learning_rate": 0.00017943201376936319, "loss": 0.79, "step": 6255 }, { "epoch": 0.09, "grad_norm": 0.515625, "learning_rate": 0.0001795754446356856, "loss": 0.9302, "step": 6260 }, { "epoch": 0.09, "grad_norm": 0.48828125, "learning_rate": 0.00017971887550200804, "loss": 1.0073, "step": 6265 }, { "epoch": 0.09, "grad_norm": 0.50390625, "learning_rate": 0.00017986230636833048, "loss": 0.9439, "step": 6270 }, { "epoch": 0.09, "grad_norm": 0.484375, "learning_rate": 0.0001800057372346529, "loss": 0.9953, "step": 6275 }, { "epoch": 0.09, "grad_norm": 0.484375, "learning_rate": 0.00018014916810097533, "loss": 0.9232, "step": 6280 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.00018029259896729777, "loss": 0.9308, "step": 6285 }, { "epoch": 0.09, "grad_norm": 0.47265625, "learning_rate": 0.0001804360298336202, "loss": 0.9297, "step": 6290 }, { "epoch": 0.09, "grad_norm": 0.470703125, "learning_rate": 0.00018057946069994263, "loss": 0.9201, "step": 6295 }, { "epoch": 0.09, "grad_norm": 0.44921875, "learning_rate": 0.00018072289156626507, "loss": 1.0003, "step": 6300 }, { "epoch": 0.09, "grad_norm": 0.515625, "learning_rate": 0.0001808663224325875, "loss": 1.0814, "step": 6305 }, { "epoch": 0.09, "grad_norm": 0.44921875, "learning_rate": 0.00018100975329890992, "loss": 1.1829, "step": 6310 }, { "epoch": 0.09, "grad_norm": 0.478515625, "learning_rate": 0.00018115318416523236, "loss": 0.7843, "step": 6315 }, { "epoch": 0.09, "grad_norm": 0.48828125, "learning_rate": 0.0001812966150315548, "loss": 1.071, "step": 6320 }, { "epoch": 0.09, "grad_norm": 0.4609375, "learning_rate": 0.00018144004589787724, "loss": 0.766, "step": 6325 }, { "epoch": 0.09, "grad_norm": 0.458984375, "learning_rate": 0.00018158347676419968, "loss": 0.9826, "step": 6330 }, { "epoch": 0.09, "grad_norm": 0.416015625, "learning_rate": 0.0001817269076305221, "loss": 0.9046, "step": 6335 }, { "epoch": 0.09, "grad_norm": 0.51171875, "learning_rate": 0.00018187033849684453, "loss": 0.9938, "step": 6340 }, { "epoch": 0.09, "grad_norm": 0.451171875, "learning_rate": 0.00018201376936316697, "loss": 0.9356, "step": 6345 }, { "epoch": 0.09, "grad_norm": 0.490234375, "learning_rate": 0.00018215720022948939, "loss": 0.9787, "step": 6350 }, { "epoch": 0.09, "grad_norm": 0.482421875, "learning_rate": 0.00018230063109581183, "loss": 0.9114, "step": 6355 }, { "epoch": 0.09, "grad_norm": 0.46875, "learning_rate": 0.00018244406196213427, "loss": 1.0716, "step": 6360 }, { "epoch": 0.09, "grad_norm": 0.50390625, "learning_rate": 0.00018258749282845668, "loss": 0.8913, "step": 6365 }, { "epoch": 0.09, "grad_norm": 0.5234375, "learning_rate": 0.00018273092369477912, "loss": 1.1467, "step": 6370 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.00018287435456110156, "loss": 0.9404, "step": 6375 }, { "epoch": 0.09, "grad_norm": 0.44140625, "learning_rate": 0.00018301778542742397, "loss": 0.9886, "step": 6380 }, { "epoch": 0.09, "grad_norm": 0.54296875, "learning_rate": 0.00018316121629374644, "loss": 0.847, "step": 6385 }, { "epoch": 0.09, "grad_norm": 0.54296875, "learning_rate": 0.00018330464716006885, "loss": 0.8849, "step": 6390 }, { "epoch": 0.09, "grad_norm": 0.515625, "learning_rate": 0.00018344807802639127, "loss": 1.2015, "step": 6395 }, { "epoch": 0.09, "grad_norm": 0.484375, "learning_rate": 0.00018359150889271373, "loss": 0.9451, "step": 6400 }, { "epoch": 0.09, "grad_norm": 0.490234375, "learning_rate": 0.00018373493975903615, "loss": 0.7917, "step": 6405 }, { "epoch": 0.09, "grad_norm": 0.4921875, "learning_rate": 0.00018387837062535859, "loss": 1.0318, "step": 6410 }, { "epoch": 0.09, "grad_norm": 0.43359375, "learning_rate": 0.00018402180149168103, "loss": 0.9183, "step": 6415 }, { "epoch": 0.09, "grad_norm": 0.478515625, "learning_rate": 0.00018416523235800344, "loss": 0.9658, "step": 6420 }, { "epoch": 0.09, "grad_norm": 0.466796875, "learning_rate": 0.0001843086632243259, "loss": 0.9126, "step": 6425 }, { "epoch": 0.09, "grad_norm": 0.470703125, "learning_rate": 0.00018445209409064832, "loss": 0.9077, "step": 6430 }, { "epoch": 0.09, "grad_norm": 0.427734375, "learning_rate": 0.00018459552495697073, "loss": 0.9502, "step": 6435 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.0001847389558232932, "loss": 0.8701, "step": 6440 }, { "epoch": 0.09, "grad_norm": 0.482421875, "learning_rate": 0.0001848823866896156, "loss": 0.9623, "step": 6445 }, { "epoch": 0.09, "grad_norm": 0.5, "learning_rate": 0.00018502581755593805, "loss": 1.0634, "step": 6450 }, { "epoch": 0.09, "grad_norm": 0.490234375, "learning_rate": 0.0001851692484222605, "loss": 0.8987, "step": 6455 }, { "epoch": 0.09, "grad_norm": 0.484375, "learning_rate": 0.0001853126792885829, "loss": 1.0042, "step": 6460 }, { "epoch": 0.09, "grad_norm": 0.5234375, "learning_rate": 0.00018545611015490535, "loss": 0.9661, "step": 6465 }, { "epoch": 0.09, "grad_norm": 0.5234375, "learning_rate": 0.0001855995410212278, "loss": 0.9081, "step": 6470 }, { "epoch": 0.09, "grad_norm": 0.453125, "learning_rate": 0.0001857429718875502, "loss": 1.1281, "step": 6475 }, { "epoch": 0.09, "grad_norm": 0.482421875, "learning_rate": 0.00018588640275387264, "loss": 0.9521, "step": 6480 }, { "epoch": 0.09, "grad_norm": 0.490234375, "learning_rate": 0.00018602983362019508, "loss": 0.8882, "step": 6485 }, { "epoch": 0.09, "grad_norm": 0.482421875, "learning_rate": 0.0001861732644865175, "loss": 0.9865, "step": 6490 }, { "epoch": 0.09, "grad_norm": 0.66015625, "learning_rate": 0.00018631669535283993, "loss": 0.8568, "step": 6495 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.00018646012621916237, "loss": 1.1288, "step": 6500 }, { "epoch": 0.09, "grad_norm": 0.51171875, "learning_rate": 0.00018660355708548481, "loss": 0.9184, "step": 6505 }, { "epoch": 0.09, "grad_norm": 0.51953125, "learning_rate": 0.00018674698795180723, "loss": 0.9487, "step": 6510 }, { "epoch": 0.09, "grad_norm": 0.439453125, "learning_rate": 0.00018689041881812967, "loss": 0.8626, "step": 6515 }, { "epoch": 0.09, "grad_norm": 0.484375, "learning_rate": 0.0001870338496844521, "loss": 0.926, "step": 6520 }, { "epoch": 0.09, "grad_norm": 0.482421875, "learning_rate": 0.00018717728055077452, "loss": 0.9192, "step": 6525 }, { "epoch": 0.09, "grad_norm": 0.49609375, "learning_rate": 0.00018732071141709696, "loss": 0.9383, "step": 6530 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.0001874641422834194, "loss": 0.8595, "step": 6535 }, { "epoch": 0.09, "grad_norm": 0.486328125, "learning_rate": 0.00018760757314974184, "loss": 0.8997, "step": 6540 }, { "epoch": 0.09, "grad_norm": 0.427734375, "learning_rate": 0.00018775100401606428, "loss": 0.9123, "step": 6545 }, { "epoch": 0.09, "grad_norm": 0.4609375, "learning_rate": 0.0001878944348823867, "loss": 0.9546, "step": 6550 }, { "epoch": 0.09, "grad_norm": 0.47265625, "learning_rate": 0.00018803786574870913, "loss": 0.9023, "step": 6555 }, { "epoch": 0.09, "grad_norm": 0.5234375, "learning_rate": 0.00018818129661503157, "loss": 0.9866, "step": 6560 }, { "epoch": 0.09, "grad_norm": 0.54296875, "learning_rate": 0.000188324727481354, "loss": 1.1346, "step": 6565 }, { "epoch": 0.09, "grad_norm": 0.4921875, "learning_rate": 0.00018846815834767643, "loss": 0.946, "step": 6570 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.00018861158921399887, "loss": 0.9296, "step": 6575 }, { "epoch": 0.09, "grad_norm": 0.443359375, "learning_rate": 0.00018875502008032128, "loss": 0.9713, "step": 6580 }, { "epoch": 0.09, "grad_norm": 0.498046875, "learning_rate": 0.00018889845094664375, "loss": 1.0274, "step": 6585 }, { "epoch": 0.09, "grad_norm": 0.53515625, "learning_rate": 0.00018904188181296616, "loss": 0.8749, "step": 6590 }, { "epoch": 0.09, "grad_norm": 0.494140625, "learning_rate": 0.00018918531267928857, "loss": 0.989, "step": 6595 }, { "epoch": 0.09, "grad_norm": 0.478515625, "learning_rate": 0.00018932874354561104, "loss": 0.9316, "step": 6600 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.00018947217441193345, "loss": 0.9591, "step": 6605 }, { "epoch": 0.09, "grad_norm": 0.5546875, "learning_rate": 0.0001896156052782559, "loss": 1.045, "step": 6610 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.00018975903614457833, "loss": 1.0241, "step": 6615 }, { "epoch": 0.09, "grad_norm": 0.51171875, "learning_rate": 0.00018990246701090075, "loss": 1.0077, "step": 6620 }, { "epoch": 0.1, "grad_norm": 0.54296875, "learning_rate": 0.0001900458978772232, "loss": 1.047, "step": 6625 }, { "epoch": 0.1, "grad_norm": 0.515625, "learning_rate": 0.00019018932874354563, "loss": 0.986, "step": 6630 }, { "epoch": 0.1, "grad_norm": 0.46484375, "learning_rate": 0.00019033275960986804, "loss": 0.8676, "step": 6635 }, { "epoch": 0.1, "grad_norm": 0.447265625, "learning_rate": 0.00019047619047619048, "loss": 0.9166, "step": 6640 }, { "epoch": 0.1, "grad_norm": 0.46484375, "learning_rate": 0.00019061962134251292, "loss": 1.1594, "step": 6645 }, { "epoch": 0.1, "grad_norm": 0.4921875, "learning_rate": 0.00019076305220883533, "loss": 0.9954, "step": 6650 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.0001909064830751578, "loss": 0.9052, "step": 6655 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019104991394148021, "loss": 0.9444, "step": 6660 }, { "epoch": 0.1, "grad_norm": 0.482421875, "learning_rate": 0.00019119334480780266, "loss": 0.9136, "step": 6665 }, { "epoch": 0.1, "grad_norm": 0.50390625, "learning_rate": 0.0001913367756741251, "loss": 0.9398, "step": 6670 }, { "epoch": 0.1, "grad_norm": 0.5234375, "learning_rate": 0.0001914802065404475, "loss": 1.1613, "step": 6675 }, { "epoch": 0.1, "grad_norm": 0.5078125, "learning_rate": 0.00019162363740676995, "loss": 1.0539, "step": 6680 }, { "epoch": 0.1, "grad_norm": 0.51171875, "learning_rate": 0.0001917670682730924, "loss": 1.0843, "step": 6685 }, { "epoch": 0.1, "grad_norm": 0.515625, "learning_rate": 0.0001919104991394148, "loss": 0.8968, "step": 6690 }, { "epoch": 0.1, "grad_norm": 0.498046875, "learning_rate": 0.00019205393000573724, "loss": 0.9034, "step": 6695 }, { "epoch": 0.1, "grad_norm": 0.498046875, "learning_rate": 0.00019219736087205968, "loss": 1.011, "step": 6700 }, { "epoch": 0.1, "grad_norm": 0.4765625, "learning_rate": 0.00019234079173838212, "loss": 1.0446, "step": 6705 }, { "epoch": 0.1, "grad_norm": 0.53515625, "learning_rate": 0.00019248422260470454, "loss": 0.8931, "step": 6710 }, { "epoch": 0.1, "grad_norm": 0.478515625, "learning_rate": 0.00019262765347102698, "loss": 0.8736, "step": 6715 }, { "epoch": 0.1, "grad_norm": 0.47265625, "learning_rate": 0.00019277108433734942, "loss": 0.9596, "step": 6720 }, { "epoch": 0.1, "grad_norm": 0.453125, "learning_rate": 0.00019291451520367183, "loss": 0.9351, "step": 6725 }, { "epoch": 0.1, "grad_norm": 0.50390625, "learning_rate": 0.00019305794606999427, "loss": 0.9922, "step": 6730 }, { "epoch": 0.1, "grad_norm": 0.5234375, "learning_rate": 0.0001932013769363167, "loss": 1.0471, "step": 6735 }, { "epoch": 0.1, "grad_norm": 0.4140625, "learning_rate": 0.00019334480780263912, "loss": 0.9003, "step": 6740 }, { "epoch": 0.1, "grad_norm": 0.4375, "learning_rate": 0.00019348823866896156, "loss": 0.8397, "step": 6745 }, { "epoch": 0.1, "grad_norm": 0.50390625, "learning_rate": 0.000193631669535284, "loss": 0.9666, "step": 6750 }, { "epoch": 0.1, "grad_norm": 0.470703125, "learning_rate": 0.00019377510040160642, "loss": 0.9207, "step": 6755 }, { "epoch": 0.1, "grad_norm": 0.4921875, "learning_rate": 0.00019391853126792888, "loss": 0.997, "step": 6760 }, { "epoch": 0.1, "grad_norm": 0.4765625, "learning_rate": 0.0001940619621342513, "loss": 0.9651, "step": 6765 }, { "epoch": 0.1, "grad_norm": 0.71484375, "learning_rate": 0.00019420539300057374, "loss": 0.9569, "step": 6770 }, { "epoch": 0.1, "grad_norm": 0.50390625, "learning_rate": 0.00019434882386689618, "loss": 1.0032, "step": 6775 }, { "epoch": 0.1, "grad_norm": 0.43359375, "learning_rate": 0.0001944922547332186, "loss": 0.8701, "step": 6780 }, { "epoch": 0.1, "grad_norm": 0.515625, "learning_rate": 0.00019463568559954103, "loss": 0.8503, "step": 6785 }, { "epoch": 0.1, "grad_norm": 0.5859375, "learning_rate": 0.00019477911646586347, "loss": 0.9847, "step": 6790 }, { "epoch": 0.1, "grad_norm": 0.44140625, "learning_rate": 0.00019492254733218588, "loss": 0.9694, "step": 6795 }, { "epoch": 0.1, "grad_norm": 0.466796875, "learning_rate": 0.00019506597819850835, "loss": 0.8473, "step": 6800 }, { "epoch": 0.1, "grad_norm": 0.5, "learning_rate": 0.00019520940906483076, "loss": 0.8632, "step": 6805 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019535283993115318, "loss": 1.0169, "step": 6810 }, { "epoch": 0.1, "grad_norm": 0.484375, "learning_rate": 0.00019549627079747564, "loss": 0.9015, "step": 6815 }, { "epoch": 0.1, "grad_norm": 0.61328125, "learning_rate": 0.00019563970166379806, "loss": 1.1592, "step": 6820 }, { "epoch": 0.1, "grad_norm": 0.53515625, "learning_rate": 0.0001957831325301205, "loss": 0.9537, "step": 6825 }, { "epoch": 0.1, "grad_norm": 0.53515625, "learning_rate": 0.00019592656339644294, "loss": 1.0179, "step": 6830 }, { "epoch": 0.1, "grad_norm": 0.470703125, "learning_rate": 0.00019606999426276535, "loss": 0.865, "step": 6835 }, { "epoch": 0.1, "grad_norm": 0.4921875, "learning_rate": 0.0001962134251290878, "loss": 1.0772, "step": 6840 }, { "epoch": 0.1, "grad_norm": 0.455078125, "learning_rate": 0.00019635685599541023, "loss": 0.9969, "step": 6845 }, { "epoch": 0.1, "grad_norm": 0.416015625, "learning_rate": 0.00019650028686173264, "loss": 0.8717, "step": 6850 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.00019664371772805508, "loss": 0.9652, "step": 6855 }, { "epoch": 0.1, "grad_norm": 0.4765625, "learning_rate": 0.00019678714859437752, "loss": 1.0097, "step": 6860 }, { "epoch": 0.1, "grad_norm": 0.546875, "learning_rate": 0.00019693057946069994, "loss": 0.9766, "step": 6865 }, { "epoch": 0.1, "grad_norm": 0.470703125, "learning_rate": 0.00019707401032702238, "loss": 1.0268, "step": 6870 }, { "epoch": 0.1, "grad_norm": 0.484375, "learning_rate": 0.00019721744119334482, "loss": 0.924, "step": 6875 }, { "epoch": 0.1, "grad_norm": 0.462890625, "learning_rate": 0.00019736087205966726, "loss": 0.9296, "step": 6880 }, { "epoch": 0.1, "grad_norm": 0.51171875, "learning_rate": 0.0001975043029259897, "loss": 1.0601, "step": 6885 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.0001976477337923121, "loss": 0.9167, "step": 6890 }, { "epoch": 0.1, "grad_norm": 0.490234375, "learning_rate": 0.00019779116465863455, "loss": 0.9092, "step": 6895 }, { "epoch": 0.1, "grad_norm": 0.5078125, "learning_rate": 0.000197934595524957, "loss": 0.9494, "step": 6900 }, { "epoch": 0.1, "grad_norm": 0.48046875, "learning_rate": 0.0001980780263912794, "loss": 0.8921, "step": 6905 }, { "epoch": 0.1, "grad_norm": 0.482421875, "learning_rate": 0.00019822145725760184, "loss": 0.9716, "step": 6910 }, { "epoch": 0.1, "grad_norm": 0.5234375, "learning_rate": 0.00019836488812392428, "loss": 0.9527, "step": 6915 }, { "epoch": 0.1, "grad_norm": 0.490234375, "learning_rate": 0.00019850831899024672, "loss": 1.0112, "step": 6920 }, { "epoch": 0.1, "grad_norm": 0.51171875, "learning_rate": 0.00019865174985656914, "loss": 1.0535, "step": 6925 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.00019879518072289158, "loss": 0.8874, "step": 6930 }, { "epoch": 0.1, "grad_norm": 0.54296875, "learning_rate": 0.00019893861158921402, "loss": 1.0039, "step": 6935 }, { "epoch": 0.1, "grad_norm": 0.5703125, "learning_rate": 0.00019908204245553643, "loss": 1.0371, "step": 6940 }, { "epoch": 0.1, "grad_norm": 0.486328125, "learning_rate": 0.00019922547332185887, "loss": 1.2052, "step": 6945 }, { "epoch": 0.1, "grad_norm": 0.53515625, "learning_rate": 0.0001993689041881813, "loss": 0.942, "step": 6950 }, { "epoch": 0.1, "grad_norm": 0.5234375, "learning_rate": 0.00019951233505450372, "loss": 1.0566, "step": 6955 }, { "epoch": 0.1, "grad_norm": 0.486328125, "learning_rate": 0.00019965576592082616, "loss": 0.9806, "step": 6960 }, { "epoch": 0.1, "grad_norm": 0.48046875, "learning_rate": 0.0001997991967871486, "loss": 0.8986, "step": 6965 }, { "epoch": 0.1, "grad_norm": 0.451171875, "learning_rate": 0.00019994262765347102, "loss": 1.0299, "step": 6970 }, { "epoch": 0.1, "grad_norm": 0.56640625, "learning_rate": 0.0001999999988717395, "loss": 0.977, "step": 6975 }, { "epoch": 0.1, "grad_norm": 0.470703125, "learning_rate": 0.0001999999919768143, "loss": 0.8963, "step": 6980 }, { "epoch": 0.1, "grad_norm": 0.58203125, "learning_rate": 0.0001999999788137757, "loss": 1.0324, "step": 6985 }, { "epoch": 0.1, "grad_norm": 0.48046875, "learning_rate": 0.00019999995938262455, "loss": 0.9285, "step": 6990 }, { "epoch": 0.1, "grad_norm": 0.484375, "learning_rate": 0.00019999993368336204, "loss": 0.878, "step": 6995 }, { "epoch": 0.1, "grad_norm": 0.50390625, "learning_rate": 0.0001999999017159898, "loss": 1.0065, "step": 7000 }, { "epoch": 0.1, "grad_norm": 0.490234375, "learning_rate": 0.00019999986348050983, "loss": 1.0322, "step": 7005 }, { "epoch": 0.1, "grad_norm": 0.490234375, "learning_rate": 0.00019999981897692452, "loss": 0.9879, "step": 7010 }, { "epoch": 0.1, "grad_norm": 0.515625, "learning_rate": 0.00019999976820523667, "loss": 0.8922, "step": 7015 }, { "epoch": 0.1, "grad_norm": 0.546875, "learning_rate": 0.00019999971116544947, "loss": 1.0516, "step": 7020 }, { "epoch": 0.1, "grad_norm": 0.53515625, "learning_rate": 0.0001999996478575665, "loss": 0.9043, "step": 7025 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.0001999995782815917, "loss": 1.0973, "step": 7030 }, { "epoch": 0.1, "grad_norm": 0.50390625, "learning_rate": 0.0001999995024375294, "loss": 0.9128, "step": 7035 }, { "epoch": 0.1, "grad_norm": 0.51171875, "learning_rate": 0.00019999942032538444, "loss": 1.0025, "step": 7040 }, { "epoch": 0.1, "grad_norm": 0.4765625, "learning_rate": 0.0001999993319451619, "loss": 0.8311, "step": 7045 }, { "epoch": 0.1, "grad_norm": 0.490234375, "learning_rate": 0.00019999923729686737, "loss": 1.0452, "step": 7050 }, { "epoch": 0.1, "grad_norm": 0.470703125, "learning_rate": 0.00019999913638050674, "loss": 1.0823, "step": 7055 }, { "epoch": 0.1, "grad_norm": 0.44140625, "learning_rate": 0.00019999902919608635, "loss": 0.8616, "step": 7060 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.00019999891574361294, "loss": 0.8661, "step": 7065 }, { "epoch": 0.1, "grad_norm": 0.49609375, "learning_rate": 0.00019999879602309358, "loss": 1.0924, "step": 7070 }, { "epoch": 0.1, "grad_norm": 0.474609375, "learning_rate": 0.0001999986700345358, "loss": 0.9282, "step": 7075 }, { "epoch": 0.1, "grad_norm": 0.4296875, "learning_rate": 0.0001999985377779475, "loss": 0.8499, "step": 7080 }, { "epoch": 0.1, "grad_norm": 0.47265625, "learning_rate": 0.00019999839925333697, "loss": 0.9638, "step": 7085 }, { "epoch": 0.1, "grad_norm": 0.484375, "learning_rate": 0.0001999982544607129, "loss": 1.019, "step": 7090 }, { "epoch": 0.1, "grad_norm": 0.466796875, "learning_rate": 0.00019999810340008432, "loss": 1.0765, "step": 7095 }, { "epoch": 0.1, "grad_norm": 0.48046875, "learning_rate": 0.00019999794607146072, "loss": 0.9585, "step": 7100 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.00019999778247485202, "loss": 0.9834, "step": 7105 }, { "epoch": 0.1, "grad_norm": 0.5234375, "learning_rate": 0.00019999761261026838, "loss": 1.0673, "step": 7110 }, { "epoch": 0.1, "grad_norm": 0.5234375, "learning_rate": 0.00019999743647772055, "loss": 0.9441, "step": 7115 }, { "epoch": 0.1, "grad_norm": 0.5078125, "learning_rate": 0.00019999725407721946, "loss": 0.7877, "step": 7120 }, { "epoch": 0.1, "grad_norm": 0.55859375, "learning_rate": 0.0001999970654087766, "loss": 0.9829, "step": 7125 }, { "epoch": 0.1, "grad_norm": 0.50390625, "learning_rate": 0.00019999687047240382, "loss": 0.9581, "step": 7130 }, { "epoch": 0.1, "grad_norm": 0.423828125, "learning_rate": 0.0001999966692681133, "loss": 0.9482, "step": 7135 }, { "epoch": 0.1, "grad_norm": 0.5234375, "learning_rate": 0.00019999646179591767, "loss": 1.0197, "step": 7140 }, { "epoch": 0.1, "grad_norm": 0.74609375, "learning_rate": 0.0001999962480558299, "loss": 0.8861, "step": 7145 }, { "epoch": 0.1, "grad_norm": 0.59375, "learning_rate": 0.00019999602804786343, "loss": 1.0572, "step": 7150 }, { "epoch": 0.1, "grad_norm": 0.51171875, "learning_rate": 0.00019999580177203208, "loss": 0.8916, "step": 7155 }, { "epoch": 0.1, "grad_norm": 0.484375, "learning_rate": 0.00019999556922834993, "loss": 0.8698, "step": 7160 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019999533041683166, "loss": 0.9218, "step": 7165 }, { "epoch": 0.1, "grad_norm": 0.48828125, "learning_rate": 0.00019999508533749216, "loss": 0.8811, "step": 7170 }, { "epoch": 0.1, "grad_norm": 0.482421875, "learning_rate": 0.00019999483399034683, "loss": 1.0427, "step": 7175 }, { "epoch": 0.1, "grad_norm": 0.51171875, "learning_rate": 0.00019999457637541142, "loss": 0.8641, "step": 7180 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.0001999943124927021, "loss": 0.9981, "step": 7185 }, { "epoch": 0.1, "grad_norm": 0.58984375, "learning_rate": 0.00019999404234223537, "loss": 1.0612, "step": 7190 }, { "epoch": 0.1, "grad_norm": 0.4609375, "learning_rate": 0.0001999937659240282, "loss": 0.9003, "step": 7195 }, { "epoch": 0.1, "grad_norm": 0.57421875, "learning_rate": 0.0001999934832380979, "loss": 1.0519, "step": 7200 }, { "epoch": 0.1, "grad_norm": 0.5078125, "learning_rate": 0.00019999319428446217, "loss": 1.1604, "step": 7205 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019999289906313914, "loss": 1.0522, "step": 7210 }, { "epoch": 0.1, "grad_norm": 0.4765625, "learning_rate": 0.00019999259757414733, "loss": 0.9722, "step": 7215 }, { "epoch": 0.1, "grad_norm": 0.490234375, "learning_rate": 0.0001999922898175056, "loss": 1.0899, "step": 7220 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.00019999197579323327, "loss": 0.9977, "step": 7225 }, { "epoch": 0.1, "grad_norm": 0.5390625, "learning_rate": 0.00019999165550135003, "loss": 0.9424, "step": 7230 }, { "epoch": 0.1, "grad_norm": 0.447265625, "learning_rate": 0.0001999913289418759, "loss": 0.8548, "step": 7235 }, { "epoch": 0.1, "grad_norm": 0.5546875, "learning_rate": 0.00019999099611483145, "loss": 0.9436, "step": 7240 }, { "epoch": 0.1, "grad_norm": 0.515625, "learning_rate": 0.00019999065702023744, "loss": 1.0087, "step": 7245 }, { "epoch": 0.1, "grad_norm": 0.44921875, "learning_rate": 0.0001999903116581152, "loss": 1.0426, "step": 7250 }, { "epoch": 0.1, "grad_norm": 0.546875, "learning_rate": 0.00019998996002848634, "loss": 0.9219, "step": 7255 }, { "epoch": 0.1, "grad_norm": 0.458984375, "learning_rate": 0.00019998960213137286, "loss": 0.933, "step": 7260 }, { "epoch": 0.1, "grad_norm": 0.49609375, "learning_rate": 0.0001999892379667973, "loss": 1.0413, "step": 7265 }, { "epoch": 0.1, "grad_norm": 0.76171875, "learning_rate": 0.0001999888675347824, "loss": 0.9412, "step": 7270 }, { "epoch": 0.1, "grad_norm": 0.55078125, "learning_rate": 0.0001999884908353514, "loss": 0.9745, "step": 7275 }, { "epoch": 0.1, "grad_norm": 0.40234375, "learning_rate": 0.00019998810786852795, "loss": 0.8132, "step": 7280 }, { "epoch": 0.1, "grad_norm": 0.4921875, "learning_rate": 0.000199987718634336, "loss": 0.9773, "step": 7285 }, { "epoch": 0.1, "grad_norm": 0.73046875, "learning_rate": 0.00019998732313279994, "loss": 0.8849, "step": 7290 }, { "epoch": 0.1, "grad_norm": 0.5703125, "learning_rate": 0.00019998692136394465, "loss": 0.9524, "step": 7295 }, { "epoch": 0.1, "grad_norm": 0.5625, "learning_rate": 0.0001999865133277952, "loss": 1.0294, "step": 7300 }, { "epoch": 0.1, "grad_norm": 0.6328125, "learning_rate": 0.00019998609902437725, "loss": 1.0705, "step": 7305 }, { "epoch": 0.1, "grad_norm": 0.47265625, "learning_rate": 0.00019998567845371673, "loss": 0.9558, "step": 7310 }, { "epoch": 0.1, "grad_norm": 0.482421875, "learning_rate": 0.00019998525161584, "loss": 1.0696, "step": 7315 }, { "epoch": 0.11, "grad_norm": 0.5, "learning_rate": 0.00019998481851077384, "loss": 0.8552, "step": 7320 }, { "epoch": 0.11, "grad_norm": 0.5, "learning_rate": 0.00019998437913854538, "loss": 0.9012, "step": 7325 }, { "epoch": 0.11, "grad_norm": 0.494140625, "learning_rate": 0.00019998393349918216, "loss": 0.9747, "step": 7330 }, { "epoch": 0.11, "grad_norm": 0.46484375, "learning_rate": 0.0001999834815927121, "loss": 0.9947, "step": 7335 }, { "epoch": 0.11, "grad_norm": 0.58984375, "learning_rate": 0.00019998302341916357, "loss": 0.9596, "step": 7340 }, { "epoch": 0.11, "grad_norm": 0.53125, "learning_rate": 0.00019998255897856523, "loss": 0.9642, "step": 7345 }, { "epoch": 0.11, "grad_norm": 0.4921875, "learning_rate": 0.00019998208827094627, "loss": 1.0481, "step": 7350 }, { "epoch": 0.11, "grad_norm": 0.50390625, "learning_rate": 0.0001999816112963361, "loss": 1.0289, "step": 7355 }, { "epoch": 0.11, "grad_norm": 0.54296875, "learning_rate": 0.0001999811280547647, "loss": 1.0296, "step": 7360 }, { "epoch": 0.11, "grad_norm": 0.443359375, "learning_rate": 0.0001999806385462623, "loss": 0.9311, "step": 7365 }, { "epoch": 0.11, "grad_norm": 0.515625, "learning_rate": 0.0001999801427708596, "loss": 1.0396, "step": 7370 }, { "epoch": 0.11, "grad_norm": 0.466796875, "learning_rate": 0.0001999796407285877, "loss": 0.9284, "step": 7375 }, { "epoch": 0.11, "grad_norm": 0.55078125, "learning_rate": 0.00019997913241947806, "loss": 1.107, "step": 7380 }, { "epoch": 0.11, "grad_norm": 0.48046875, "learning_rate": 0.00019997861784356254, "loss": 0.946, "step": 7385 }, { "epoch": 0.11, "grad_norm": 0.48828125, "learning_rate": 0.00019997809700087336, "loss": 0.9533, "step": 7390 }, { "epoch": 0.11, "grad_norm": 0.482421875, "learning_rate": 0.00019997756989144317, "loss": 0.9806, "step": 7395 }, { "epoch": 0.11, "grad_norm": 0.640625, "learning_rate": 0.0001999770365153051, "loss": 1.0817, "step": 7400 }, { "epoch": 0.11, "grad_norm": 0.51171875, "learning_rate": 0.0001999764968724925, "loss": 0.9468, "step": 7405 }, { "epoch": 0.11, "grad_norm": 0.4765625, "learning_rate": 0.00019997595096303919, "loss": 0.9327, "step": 7410 }, { "epoch": 0.11, "grad_norm": 0.5390625, "learning_rate": 0.00019997539878697943, "loss": 1.0226, "step": 7415 }, { "epoch": 0.11, "grad_norm": 0.50390625, "learning_rate": 0.0001999748403443478, "loss": 1.0015, "step": 7420 }, { "epoch": 0.11, "grad_norm": 0.53125, "learning_rate": 0.0001999742756351793, "loss": 1.029, "step": 7425 }, { "epoch": 0.11, "grad_norm": 0.56640625, "learning_rate": 0.00019997370465950936, "loss": 0.9649, "step": 7430 }, { "epoch": 0.11, "grad_norm": 0.470703125, "learning_rate": 0.00019997312741737376, "loss": 1.0384, "step": 7435 }, { "epoch": 0.11, "grad_norm": 0.5, "learning_rate": 0.00019997254390880867, "loss": 0.9221, "step": 7440 }, { "epoch": 0.11, "grad_norm": 0.578125, "learning_rate": 0.00019997195413385064, "loss": 1.0112, "step": 7445 }, { "epoch": 0.11, "grad_norm": 0.5078125, "learning_rate": 0.00019997135809253668, "loss": 0.8739, "step": 7450 }, { "epoch": 0.11, "grad_norm": 0.486328125, "learning_rate": 0.00019997075578490417, "loss": 1.0613, "step": 7455 }, { "epoch": 0.11, "grad_norm": 0.49609375, "learning_rate": 0.00019997014721099082, "loss": 1.0139, "step": 7460 }, { "epoch": 0.11, "grad_norm": 0.58203125, "learning_rate": 0.00019996953237083474, "loss": 1.0393, "step": 7465 }, { "epoch": 0.11, "grad_norm": 0.51953125, "learning_rate": 0.00019996891126447457, "loss": 1.0109, "step": 7470 }, { "epoch": 0.11, "grad_norm": 0.498046875, "learning_rate": 0.00019996828389194914, "loss": 0.901, "step": 7475 }, { "epoch": 0.11, "grad_norm": 0.515625, "learning_rate": 0.00019996765025329785, "loss": 0.9787, "step": 7480 }, { "epoch": 0.11, "grad_norm": 0.498046875, "learning_rate": 0.0001999670103485604, "loss": 1.0034, "step": 7485 }, { "epoch": 0.11, "grad_norm": 0.65234375, "learning_rate": 0.00019996636417777687, "loss": 1.0497, "step": 7490 }, { "epoch": 0.11, "grad_norm": 0.5546875, "learning_rate": 0.0001999657117409878, "loss": 0.8797, "step": 7495 }, { "epoch": 0.11, "grad_norm": 0.4765625, "learning_rate": 0.00019996505303823402, "loss": 1.0155, "step": 7500 }, { "epoch": 0.11, "grad_norm": 0.53125, "learning_rate": 0.00019996438806955692, "loss": 0.9911, "step": 7505 }, { "epoch": 0.11, "grad_norm": 0.6875, "learning_rate": 0.0001999637168349981, "loss": 1.0781, "step": 7510 }, { "epoch": 0.11, "grad_norm": 0.49609375, "learning_rate": 0.00019996303933459965, "loss": 1.0164, "step": 7515 }, { "epoch": 0.11, "grad_norm": 0.49609375, "learning_rate": 0.00019996235556840405, "loss": 0.9753, "step": 7520 }, { "epoch": 0.11, "grad_norm": 0.55078125, "learning_rate": 0.00019996166553645417, "loss": 0.96, "step": 7525 }, { "epoch": 0.11, "grad_norm": 0.5078125, "learning_rate": 0.00019996096923879323, "loss": 0.9442, "step": 7530 }, { "epoch": 0.11, "grad_norm": 0.494140625, "learning_rate": 0.00019996026667546492, "loss": 0.8743, "step": 7535 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.00019995955784651322, "loss": 0.978, "step": 7540 }, { "epoch": 0.11, "grad_norm": 0.53515625, "learning_rate": 0.0001999588427519826, "loss": 0.969, "step": 7545 }, { "epoch": 0.11, "grad_norm": 0.49609375, "learning_rate": 0.00019995812139191787, "loss": 0.8663, "step": 7550 }, { "epoch": 0.11, "grad_norm": 0.474609375, "learning_rate": 0.00019995739376636425, "loss": 1.0276, "step": 7555 }, { "epoch": 0.11, "grad_norm": 0.62109375, "learning_rate": 0.00019995665987536737, "loss": 1.1052, "step": 7560 }, { "epoch": 0.11, "grad_norm": 0.68359375, "learning_rate": 0.0001999559197189732, "loss": 1.019, "step": 7565 }, { "epoch": 0.11, "grad_norm": 0.62109375, "learning_rate": 0.0001999551732972281, "loss": 0.8811, "step": 7570 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.00019995442061017892, "loss": 0.8933, "step": 7575 }, { "epoch": 0.11, "grad_norm": 0.55078125, "learning_rate": 0.00019995366165787283, "loss": 1.0845, "step": 7580 }, { "epoch": 0.11, "grad_norm": 0.5, "learning_rate": 0.0001999528964403574, "loss": 0.8869, "step": 7585 }, { "epoch": 0.11, "grad_norm": 0.546875, "learning_rate": 0.00019995212495768055, "loss": 0.9254, "step": 7590 }, { "epoch": 0.11, "grad_norm": 0.515625, "learning_rate": 0.0001999513472098907, "loss": 0.938, "step": 7595 }, { "epoch": 0.11, "grad_norm": 0.474609375, "learning_rate": 0.00019995056319703657, "loss": 1.0029, "step": 7600 }, { "epoch": 0.11, "grad_norm": 0.455078125, "learning_rate": 0.00019994977291916727, "loss": 0.9692, "step": 7605 }, { "epoch": 0.11, "grad_norm": 0.59375, "learning_rate": 0.00019994897637633244, "loss": 1.0439, "step": 7610 }, { "epoch": 0.11, "grad_norm": 0.5390625, "learning_rate": 0.00019994817356858187, "loss": 0.8371, "step": 7615 }, { "epoch": 0.11, "grad_norm": 0.49609375, "learning_rate": 0.00019994736449596598, "loss": 0.9537, "step": 7620 }, { "epoch": 0.11, "grad_norm": 0.5078125, "learning_rate": 0.00019994654915853546, "loss": 0.9284, "step": 7625 }, { "epoch": 0.11, "grad_norm": 0.50390625, "learning_rate": 0.00019994572755634138, "loss": 1.0318, "step": 7630 }, { "epoch": 0.11, "grad_norm": 0.5625, "learning_rate": 0.00019994489968943528, "loss": 1.1444, "step": 7635 }, { "epoch": 0.11, "grad_norm": 0.59375, "learning_rate": 0.00019994406555786905, "loss": 0.8694, "step": 7640 }, { "epoch": 0.11, "grad_norm": 0.5, "learning_rate": 0.00019994322516169496, "loss": 1.0167, "step": 7645 }, { "epoch": 0.11, "grad_norm": 0.54296875, "learning_rate": 0.0001999423785009657, "loss": 1.1432, "step": 7650 }, { "epoch": 0.11, "grad_norm": 0.59375, "learning_rate": 0.0001999415255757343, "loss": 0.9968, "step": 7655 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.0001999406663860543, "loss": 0.9843, "step": 7660 }, { "epoch": 0.11, "grad_norm": 0.470703125, "learning_rate": 0.00019993980093197946, "loss": 0.9883, "step": 7665 }, { "epoch": 0.11, "grad_norm": 0.56640625, "learning_rate": 0.0001999389292135641, "loss": 0.8624, "step": 7670 }, { "epoch": 0.11, "grad_norm": 0.59765625, "learning_rate": 0.00019993805123086285, "loss": 1.0464, "step": 7675 }, { "epoch": 0.11, "grad_norm": 0.44921875, "learning_rate": 0.0001999371669839307, "loss": 0.8542, "step": 7680 }, { "epoch": 0.11, "grad_norm": 0.494140625, "learning_rate": 0.0001999362764728231, "loss": 0.9581, "step": 7685 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.00019993537969759592, "loss": 0.935, "step": 7690 }, { "epoch": 0.11, "grad_norm": 0.578125, "learning_rate": 0.0001999344766583053, "loss": 0.8918, "step": 7695 }, { "epoch": 0.11, "grad_norm": 0.5859375, "learning_rate": 0.00019993356735500782, "loss": 1.0286, "step": 7700 }, { "epoch": 0.11, "grad_norm": 0.640625, "learning_rate": 0.0001999326517877606, "loss": 1.0116, "step": 7705 }, { "epoch": 0.11, "grad_norm": 0.55078125, "learning_rate": 0.0001999317299566209, "loss": 0.9422, "step": 7710 }, { "epoch": 0.11, "grad_norm": 0.458984375, "learning_rate": 0.0001999308018616466, "loss": 0.9841, "step": 7715 }, { "epoch": 0.11, "grad_norm": 0.58203125, "learning_rate": 0.00019992986750289577, "loss": 1.05, "step": 7720 }, { "epoch": 0.11, "grad_norm": 0.55859375, "learning_rate": 0.00019992892688042705, "loss": 0.9811, "step": 7725 }, { "epoch": 0.11, "grad_norm": 0.546875, "learning_rate": 0.0001999279799942994, "loss": 1.144, "step": 7730 }, { "epoch": 0.11, "grad_norm": 0.498046875, "learning_rate": 0.00019992702684457216, "loss": 0.8471, "step": 7735 }, { "epoch": 0.11, "grad_norm": 0.474609375, "learning_rate": 0.00019992606743130507, "loss": 0.8222, "step": 7740 }, { "epoch": 0.11, "grad_norm": 0.486328125, "learning_rate": 0.00019992510175455825, "loss": 0.9594, "step": 7745 }, { "epoch": 0.11, "grad_norm": 0.55859375, "learning_rate": 0.00019992412981439226, "loss": 0.9451, "step": 7750 }, { "epoch": 0.11, "grad_norm": 0.51171875, "learning_rate": 0.000199923151610868, "loss": 0.8905, "step": 7755 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.00019992216714404681, "loss": 1.06, "step": 7760 }, { "epoch": 0.11, "grad_norm": 0.462890625, "learning_rate": 0.00019992117641399036, "loss": 0.9044, "step": 7765 }, { "epoch": 0.11, "grad_norm": 0.494140625, "learning_rate": 0.0001999201794207608, "loss": 0.8762, "step": 7770 }, { "epoch": 0.11, "grad_norm": 0.47265625, "learning_rate": 0.00019991917616442057, "loss": 0.9525, "step": 7775 }, { "epoch": 0.11, "grad_norm": 0.515625, "learning_rate": 0.0001999181666450326, "loss": 1.1321, "step": 7780 }, { "epoch": 0.11, "grad_norm": 0.55859375, "learning_rate": 0.0001999171508626601, "loss": 0.9884, "step": 7785 }, { "epoch": 0.11, "grad_norm": 0.515625, "learning_rate": 0.00019991612881736684, "loss": 0.9493, "step": 7790 }, { "epoch": 0.11, "grad_norm": 0.53515625, "learning_rate": 0.0001999151005092168, "loss": 1.0176, "step": 7795 }, { "epoch": 0.11, "grad_norm": 0.54296875, "learning_rate": 0.0001999140659382745, "loss": 0.9507, "step": 7800 }, { "epoch": 0.11, "grad_norm": 0.546875, "learning_rate": 0.00019991302510460472, "loss": 0.9906, "step": 7805 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.00019991197800827275, "loss": 0.9058, "step": 7810 }, { "epoch": 0.11, "grad_norm": 0.5546875, "learning_rate": 0.0001999109246493442, "loss": 0.9245, "step": 7815 }, { "epoch": 0.11, "grad_norm": 0.5625, "learning_rate": 0.0001999098650278851, "loss": 1.0691, "step": 7820 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.00019990879914396189, "loss": 0.9908, "step": 7825 }, { "epoch": 0.11, "grad_norm": 0.396484375, "learning_rate": 0.00019990772699764135, "loss": 0.8805, "step": 7830 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.0001999066485889907, "loss": 0.9424, "step": 7835 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.00019990556391807752, "loss": 0.8939, "step": 7840 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.00019990447298496986, "loss": 1.0941, "step": 7845 }, { "epoch": 0.11, "grad_norm": 0.50390625, "learning_rate": 0.000199903375789736, "loss": 1.064, "step": 7850 }, { "epoch": 0.11, "grad_norm": 0.5078125, "learning_rate": 0.00019990227233244477, "loss": 1.1336, "step": 7855 }, { "epoch": 0.11, "grad_norm": 0.453125, "learning_rate": 0.00019990116261316536, "loss": 0.8951, "step": 7860 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.00019990004663196728, "loss": 0.8861, "step": 7865 }, { "epoch": 0.11, "grad_norm": 0.50390625, "learning_rate": 0.0001998989243889205, "loss": 0.9515, "step": 7870 }, { "epoch": 0.11, "grad_norm": 0.546875, "learning_rate": 0.00019989779588409538, "loss": 0.81, "step": 7875 }, { "epoch": 0.11, "grad_norm": 0.50390625, "learning_rate": 0.00019989666111756263, "loss": 0.937, "step": 7880 }, { "epoch": 0.11, "grad_norm": 0.486328125, "learning_rate": 0.00019989552008939336, "loss": 1.0635, "step": 7885 }, { "epoch": 0.11, "grad_norm": 0.5078125, "learning_rate": 0.00019989437279965917, "loss": 1.0161, "step": 7890 }, { "epoch": 0.11, "grad_norm": 0.5859375, "learning_rate": 0.00019989321924843186, "loss": 1.0589, "step": 7895 }, { "epoch": 0.11, "grad_norm": 0.53515625, "learning_rate": 0.00019989205943578387, "loss": 0.9836, "step": 7900 }, { "epoch": 0.11, "grad_norm": 0.5, "learning_rate": 0.0001998908933617878, "loss": 0.9613, "step": 7905 }, { "epoch": 0.11, "grad_norm": 0.5859375, "learning_rate": 0.00019988972102651675, "loss": 1.0425, "step": 7910 }, { "epoch": 0.11, "grad_norm": 0.609375, "learning_rate": 0.00019988854243004422, "loss": 0.9164, "step": 7915 }, { "epoch": 0.11, "grad_norm": 0.609375, "learning_rate": 0.0001998873575724441, "loss": 0.9471, "step": 7920 }, { "epoch": 0.11, "grad_norm": 0.490234375, "learning_rate": 0.00019988616645379064, "loss": 0.8745, "step": 7925 }, { "epoch": 0.11, "grad_norm": 0.54296875, "learning_rate": 0.0001998849690741585, "loss": 0.8713, "step": 7930 }, { "epoch": 0.11, "grad_norm": 0.51953125, "learning_rate": 0.00019988376543362277, "loss": 1.0146, "step": 7935 }, { "epoch": 0.11, "grad_norm": 0.59375, "learning_rate": 0.00019988255553225886, "loss": 1.0469, "step": 7940 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.00019988133937014256, "loss": 1.032, "step": 7945 }, { "epoch": 0.11, "grad_norm": 0.53125, "learning_rate": 0.0001998801169473502, "loss": 0.9113, "step": 7950 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.00019987888826395836, "loss": 1.0555, "step": 7955 }, { "epoch": 0.11, "grad_norm": 0.466796875, "learning_rate": 0.00019987765332004403, "loss": 1.0353, "step": 7960 }, { "epoch": 0.11, "grad_norm": 0.5078125, "learning_rate": 0.00019987641211568463, "loss": 0.8749, "step": 7965 }, { "epoch": 0.11, "grad_norm": 0.5390625, "learning_rate": 0.00019987516465095798, "loss": 1.0204, "step": 7970 }, { "epoch": 0.11, "grad_norm": 0.515625, "learning_rate": 0.00019987391092594228, "loss": 0.978, "step": 7975 }, { "epoch": 0.11, "grad_norm": 0.4921875, "learning_rate": 0.00019987265094071607, "loss": 1.0604, "step": 7980 }, { "epoch": 0.11, "grad_norm": 0.466796875, "learning_rate": 0.00019987138469535837, "loss": 0.9035, "step": 7985 }, { "epoch": 0.11, "grad_norm": 0.5, "learning_rate": 0.00019987011218994854, "loss": 1.1171, "step": 7990 }, { "epoch": 0.11, "grad_norm": 0.51171875, "learning_rate": 0.00019986883342456633, "loss": 0.9754, "step": 7995 }, { "epoch": 0.11, "grad_norm": 0.578125, "learning_rate": 0.00019986754839929188, "loss": 1.0346, "step": 8000 }, { "epoch": 0.11, "grad_norm": 0.486328125, "learning_rate": 0.00019986625711420578, "loss": 0.9792, "step": 8005 }, { "epoch": 0.11, "grad_norm": 0.63671875, "learning_rate": 0.00019986495956938894, "loss": 1.1406, "step": 8010 }, { "epoch": 0.11, "grad_norm": 0.484375, "learning_rate": 0.00019986365576492266, "loss": 0.9641, "step": 8015 }, { "epoch": 0.12, "grad_norm": 0.5, "learning_rate": 0.00019986234570088876, "loss": 1.0433, "step": 8020 }, { "epoch": 0.12, "grad_norm": 0.59765625, "learning_rate": 0.0001998610293773693, "loss": 1.013, "step": 8025 }, { "epoch": 0.12, "grad_norm": 0.734375, "learning_rate": 0.0001998597067944467, "loss": 1.0205, "step": 8030 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.00019985837795220404, "loss": 0.8346, "step": 8035 }, { "epoch": 0.12, "grad_norm": 0.53515625, "learning_rate": 0.0001998570428507245, "loss": 1.0299, "step": 8040 }, { "epoch": 0.12, "grad_norm": 0.55859375, "learning_rate": 0.00019985570149009177, "loss": 0.8954, "step": 8045 }, { "epoch": 0.12, "grad_norm": 0.53515625, "learning_rate": 0.00019985435387038996, "loss": 1.0256, "step": 8050 }, { "epoch": 0.12, "grad_norm": 0.5859375, "learning_rate": 0.0001998529999917035, "loss": 1.1181, "step": 8055 }, { "epoch": 0.12, "grad_norm": 0.55078125, "learning_rate": 0.0001998516398541173, "loss": 0.9315, "step": 8060 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.00019985027345771657, "loss": 1.0552, "step": 8065 }, { "epoch": 0.12, "grad_norm": 0.5078125, "learning_rate": 0.000199848900802587, "loss": 1.0288, "step": 8070 }, { "epoch": 0.12, "grad_norm": 0.5703125, "learning_rate": 0.0001998475218888146, "loss": 1.0476, "step": 8075 }, { "epoch": 0.12, "grad_norm": 0.5546875, "learning_rate": 0.00019984613671648579, "loss": 1.0866, "step": 8080 }, { "epoch": 0.12, "grad_norm": 0.515625, "learning_rate": 0.00019984474528568746, "loss": 0.9443, "step": 8085 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.00019984334759650676, "loss": 0.9872, "step": 8090 }, { "epoch": 0.12, "grad_norm": 0.60546875, "learning_rate": 0.0001998419436490313, "loss": 0.9306, "step": 8095 }, { "epoch": 0.12, "grad_norm": 0.58984375, "learning_rate": 0.00019984053344334916, "loss": 1.1319, "step": 8100 }, { "epoch": 0.12, "grad_norm": 0.55078125, "learning_rate": 0.00019983911697954863, "loss": 0.9468, "step": 8105 }, { "epoch": 0.12, "grad_norm": 0.49609375, "learning_rate": 0.00019983769425771855, "loss": 1.0312, "step": 8110 }, { "epoch": 0.12, "grad_norm": 0.5546875, "learning_rate": 0.00019983626527794808, "loss": 1.0196, "step": 8115 }, { "epoch": 0.12, "grad_norm": 0.51171875, "learning_rate": 0.0001998348300403268, "loss": 0.8862, "step": 8120 }, { "epoch": 0.12, "grad_norm": 0.5234375, "learning_rate": 0.00019983338854494468, "loss": 0.8766, "step": 8125 }, { "epoch": 0.12, "grad_norm": 0.50390625, "learning_rate": 0.00019983194079189204, "loss": 0.9119, "step": 8130 }, { "epoch": 0.12, "grad_norm": 0.54296875, "learning_rate": 0.0001998304867812597, "loss": 1.1187, "step": 8135 }, { "epoch": 0.12, "grad_norm": 0.51171875, "learning_rate": 0.00019982902651313872, "loss": 0.9005, "step": 8140 }, { "epoch": 0.12, "grad_norm": 0.5390625, "learning_rate": 0.00019982755998762067, "loss": 0.9234, "step": 8145 }, { "epoch": 0.12, "grad_norm": 0.69921875, "learning_rate": 0.00019982608720479745, "loss": 1.0712, "step": 8150 }, { "epoch": 0.12, "grad_norm": 0.6015625, "learning_rate": 0.00019982460816476142, "loss": 0.8635, "step": 8155 }, { "epoch": 0.12, "grad_norm": 0.57421875, "learning_rate": 0.00019982312286760524, "loss": 1.1264, "step": 8160 }, { "epoch": 0.12, "grad_norm": 0.5234375, "learning_rate": 0.00019982163131342201, "loss": 0.9491, "step": 8165 }, { "epoch": 0.12, "grad_norm": 0.57421875, "learning_rate": 0.0001998201335023053, "loss": 0.956, "step": 8170 }, { "epoch": 0.12, "grad_norm": 0.443359375, "learning_rate": 0.00019981862943434887, "loss": 0.983, "step": 8175 }, { "epoch": 0.12, "grad_norm": 0.474609375, "learning_rate": 0.00019981711910964714, "loss": 0.9417, "step": 8180 }, { "epoch": 0.12, "grad_norm": 0.5625, "learning_rate": 0.00019981560252829464, "loss": 1.0731, "step": 8185 }, { "epoch": 0.12, "grad_norm": 0.486328125, "learning_rate": 0.00019981407969038651, "loss": 0.9941, "step": 8190 }, { "epoch": 0.12, "grad_norm": 0.58203125, "learning_rate": 0.0001998125505960182, "loss": 0.935, "step": 8195 }, { "epoch": 0.12, "grad_norm": 0.55078125, "learning_rate": 0.00019981101524528552, "loss": 0.8414, "step": 8200 }, { "epoch": 0.12, "grad_norm": 0.51171875, "learning_rate": 0.00019980947363828472, "loss": 1.0664, "step": 8205 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.00019980792577511247, "loss": 0.88, "step": 8210 }, { "epoch": 0.12, "grad_norm": 0.5625, "learning_rate": 0.00019980637165586574, "loss": 0.9858, "step": 8215 }, { "epoch": 0.12, "grad_norm": 0.54296875, "learning_rate": 0.00019980481128064196, "loss": 0.9296, "step": 8220 }, { "epoch": 0.12, "grad_norm": 0.462890625, "learning_rate": 0.00019980324464953896, "loss": 1.0408, "step": 8225 }, { "epoch": 0.12, "grad_norm": 0.55859375, "learning_rate": 0.0001998016717626549, "loss": 1.1235, "step": 8230 }, { "epoch": 0.12, "grad_norm": 0.54296875, "learning_rate": 0.00019980009262008844, "loss": 0.8829, "step": 8235 }, { "epoch": 0.12, "grad_norm": 0.5, "learning_rate": 0.00019979850722193847, "loss": 1.0015, "step": 8240 }, { "epoch": 0.12, "grad_norm": 0.494140625, "learning_rate": 0.0001997969155683044, "loss": 0.8638, "step": 8245 }, { "epoch": 0.12, "grad_norm": 0.56640625, "learning_rate": 0.000199795317659286, "loss": 0.8831, "step": 8250 }, { "epoch": 0.12, "grad_norm": 0.66796875, "learning_rate": 0.00019979371349498345, "loss": 0.8855, "step": 8255 }, { "epoch": 0.12, "grad_norm": 0.5, "learning_rate": 0.00019979210307549726, "loss": 0.9473, "step": 8260 }, { "epoch": 0.12, "grad_norm": 0.59375, "learning_rate": 0.00019979048640092844, "loss": 0.919, "step": 8265 }, { "epoch": 0.12, "grad_norm": 0.515625, "learning_rate": 0.00019978886347137825, "loss": 0.9902, "step": 8270 }, { "epoch": 0.12, "grad_norm": 0.54296875, "learning_rate": 0.00019978723428694845, "loss": 0.9017, "step": 8275 }, { "epoch": 0.12, "grad_norm": 0.498046875, "learning_rate": 0.00019978559884774116, "loss": 0.8228, "step": 8280 }, { "epoch": 0.12, "grad_norm": 0.5390625, "learning_rate": 0.0001997839571538589, "loss": 0.9621, "step": 8285 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.00019978230920540452, "loss": 0.9191, "step": 8290 }, { "epoch": 0.12, "grad_norm": 0.6015625, "learning_rate": 0.00019978065500248139, "loss": 1.051, "step": 8295 }, { "epoch": 0.12, "grad_norm": 0.5390625, "learning_rate": 0.00019977899454519315, "loss": 0.8845, "step": 8300 }, { "epoch": 0.12, "grad_norm": 0.58984375, "learning_rate": 0.0001997773278336439, "loss": 1.0515, "step": 8305 }, { "epoch": 0.12, "grad_norm": 0.470703125, "learning_rate": 0.0001997756548679381, "loss": 0.9412, "step": 8310 }, { "epoch": 0.12, "grad_norm": 0.5390625, "learning_rate": 0.00019977397564818064, "loss": 1.0884, "step": 8315 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.00019977229017447673, "loss": 1.1093, "step": 8320 }, { "epoch": 0.12, "grad_norm": 0.56640625, "learning_rate": 0.00019977059844693206, "loss": 0.9655, "step": 8325 }, { "epoch": 0.12, "grad_norm": 0.54296875, "learning_rate": 0.0001997689004656526, "loss": 0.9096, "step": 8330 }, { "epoch": 0.12, "grad_norm": 0.515625, "learning_rate": 0.0001997671962307449, "loss": 1.0204, "step": 8335 }, { "epoch": 0.12, "grad_norm": 0.5, "learning_rate": 0.0001997654857423157, "loss": 0.909, "step": 8340 }, { "epoch": 0.12, "grad_norm": 0.54296875, "learning_rate": 0.0001997637690004722, "loss": 0.966, "step": 8345 }, { "epoch": 0.12, "grad_norm": 0.494140625, "learning_rate": 0.00019976204600532206, "loss": 1.0793, "step": 8350 }, { "epoch": 0.12, "grad_norm": 0.734375, "learning_rate": 0.00019976031675697322, "loss": 0.8746, "step": 8355 }, { "epoch": 0.12, "grad_norm": 0.546875, "learning_rate": 0.00019975858125553415, "loss": 1.0465, "step": 8360 }, { "epoch": 0.12, "grad_norm": 0.51171875, "learning_rate": 0.00019975683950111358, "loss": 0.8873, "step": 8365 }, { "epoch": 0.12, "grad_norm": 0.59375, "learning_rate": 0.0001997550914938207, "loss": 0.9766, "step": 8370 }, { "epoch": 0.12, "grad_norm": 0.474609375, "learning_rate": 0.00019975333723376505, "loss": 0.8231, "step": 8375 }, { "epoch": 0.12, "grad_norm": 0.458984375, "learning_rate": 0.00019975157672105665, "loss": 0.8231, "step": 8380 }, { "epoch": 0.12, "grad_norm": 0.5078125, "learning_rate": 0.00019974980995580578, "loss": 0.9867, "step": 8385 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.00019974803693812324, "loss": 0.91, "step": 8390 }, { "epoch": 0.12, "grad_norm": 0.546875, "learning_rate": 0.00019974625766812013, "loss": 0.9998, "step": 8395 }, { "epoch": 0.12, "grad_norm": 0.50390625, "learning_rate": 0.000199744472145908, "loss": 0.9511, "step": 8400 }, { "epoch": 0.12, "grad_norm": 0.4921875, "learning_rate": 0.00019974268037159873, "loss": 1.0058, "step": 8405 }, { "epoch": 0.12, "grad_norm": 0.5234375, "learning_rate": 0.00019974088234530468, "loss": 1.0336, "step": 8410 }, { "epoch": 0.12, "grad_norm": 0.75390625, "learning_rate": 0.00019973907806713852, "loss": 1.1555, "step": 8415 }, { "epoch": 0.12, "grad_norm": 0.5234375, "learning_rate": 0.00019973726753721335, "loss": 0.99, "step": 8420 }, { "epoch": 0.12, "grad_norm": 0.4765625, "learning_rate": 0.00019973545075564268, "loss": 0.9725, "step": 8425 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.00019973362772254035, "loss": 0.9521, "step": 8430 }, { "epoch": 0.12, "grad_norm": 0.44921875, "learning_rate": 0.00019973179843802064, "loss": 0.9352, "step": 8435 }, { "epoch": 0.12, "grad_norm": 0.5859375, "learning_rate": 0.00019972996290219824, "loss": 0.8785, "step": 8440 }, { "epoch": 0.12, "grad_norm": 0.53125, "learning_rate": 0.00019972812111518817, "loss": 0.9064, "step": 8445 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.0001997262730771059, "loss": 0.9741, "step": 8450 }, { "epoch": 0.12, "grad_norm": 0.5625, "learning_rate": 0.00019972441878806721, "loss": 1.0906, "step": 8455 }, { "epoch": 0.12, "grad_norm": 0.498046875, "learning_rate": 0.00019972255824818845, "loss": 0.9678, "step": 8460 }, { "epoch": 0.12, "grad_norm": 0.61328125, "learning_rate": 0.00019972069145758609, "loss": 1.0063, "step": 8465 }, { "epoch": 0.12, "grad_norm": 0.5703125, "learning_rate": 0.00019971881841637727, "loss": 0.9492, "step": 8470 }, { "epoch": 0.12, "grad_norm": 0.53125, "learning_rate": 0.00019971693912467932, "loss": 1.1059, "step": 8475 }, { "epoch": 0.12, "grad_norm": 0.546875, "learning_rate": 0.00019971505358261005, "loss": 0.866, "step": 8480 }, { "epoch": 0.12, "grad_norm": 0.51171875, "learning_rate": 0.00019971316179028765, "loss": 0.9676, "step": 8485 }, { "epoch": 0.12, "grad_norm": 0.51171875, "learning_rate": 0.00019971126374783074, "loss": 0.8942, "step": 8490 }, { "epoch": 0.12, "grad_norm": 0.46484375, "learning_rate": 0.00019970935945535823, "loss": 0.955, "step": 8495 }, { "epoch": 0.12, "grad_norm": 0.515625, "learning_rate": 0.00019970744891298952, "loss": 1.0123, "step": 8500 }, { "epoch": 0.12, "grad_norm": 0.515625, "learning_rate": 0.00019970553212084435, "loss": 1.1463, "step": 8505 }, { "epoch": 0.12, "grad_norm": 0.466796875, "learning_rate": 0.00019970360907904287, "loss": 0.8798, "step": 8510 }, { "epoch": 0.12, "grad_norm": 0.5703125, "learning_rate": 0.0001997016797877056, "loss": 0.9562, "step": 8515 }, { "epoch": 0.12, "grad_norm": 0.5703125, "learning_rate": 0.00019969974424695352, "loss": 0.9939, "step": 8520 }, { "epoch": 0.12, "grad_norm": 0.486328125, "learning_rate": 0.00019969780245690792, "loss": 0.9863, "step": 8525 }, { "epoch": 0.12, "grad_norm": 0.4609375, "learning_rate": 0.00019969585441769052, "loss": 0.9419, "step": 8530 }, { "epoch": 0.12, "grad_norm": 0.5703125, "learning_rate": 0.0001996939001294234, "loss": 0.9845, "step": 8535 }, { "epoch": 0.12, "grad_norm": 0.640625, "learning_rate": 0.0001996919395922291, "loss": 0.9737, "step": 8540 }, { "epoch": 0.12, "grad_norm": 0.486328125, "learning_rate": 0.00019968997280623047, "loss": 0.9785, "step": 8545 }, { "epoch": 0.12, "grad_norm": 0.55859375, "learning_rate": 0.00019968799977155083, "loss": 0.963, "step": 8550 }, { "epoch": 0.12, "grad_norm": 0.5859375, "learning_rate": 0.0001996860204883138, "loss": 0.8494, "step": 8555 }, { "epoch": 0.12, "grad_norm": 0.609375, "learning_rate": 0.00019968403495664348, "loss": 0.8966, "step": 8560 }, { "epoch": 0.12, "grad_norm": 0.625, "learning_rate": 0.00019968204317666436, "loss": 0.8522, "step": 8565 }, { "epoch": 0.12, "grad_norm": 0.5078125, "learning_rate": 0.0001996800451485012, "loss": 1.0312, "step": 8570 }, { "epoch": 0.12, "grad_norm": 0.5703125, "learning_rate": 0.00019967804087227928, "loss": 1.0109, "step": 8575 }, { "epoch": 0.12, "grad_norm": 0.625, "learning_rate": 0.00019967603034812425, "loss": 0.8581, "step": 8580 }, { "epoch": 0.12, "grad_norm": 0.5390625, "learning_rate": 0.0001996740135761621, "loss": 0.9733, "step": 8585 }, { "epoch": 0.12, "grad_norm": 0.5703125, "learning_rate": 0.00019967199055651928, "loss": 0.9642, "step": 8590 }, { "epoch": 0.12, "grad_norm": 0.5390625, "learning_rate": 0.00019966996128932256, "loss": 0.9924, "step": 8595 }, { "epoch": 0.12, "grad_norm": 0.44140625, "learning_rate": 0.00019966792577469917, "loss": 0.7756, "step": 8600 }, { "epoch": 0.12, "grad_norm": 0.515625, "learning_rate": 0.00019966588401277666, "loss": 0.9274, "step": 8605 }, { "epoch": 0.12, "grad_norm": 0.494140625, "learning_rate": 0.00019966383600368307, "loss": 1.006, "step": 8610 }, { "epoch": 0.12, "grad_norm": 0.482421875, "learning_rate": 0.00019966178174754667, "loss": 0.9224, "step": 8615 }, { "epoch": 0.12, "grad_norm": 0.453125, "learning_rate": 0.00019965972124449634, "loss": 0.8353, "step": 8620 }, { "epoch": 0.12, "grad_norm": 0.65625, "learning_rate": 0.00019965765449466116, "loss": 0.9543, "step": 8625 }, { "epoch": 0.12, "grad_norm": 0.5703125, "learning_rate": 0.00019965558149817069, "loss": 0.9076, "step": 8630 }, { "epoch": 0.12, "grad_norm": 0.56640625, "learning_rate": 0.00019965350225515486, "loss": 0.9186, "step": 8635 }, { "epoch": 0.12, "grad_norm": 0.58203125, "learning_rate": 0.00019965141676574403, "loss": 1.0729, "step": 8640 }, { "epoch": 0.12, "grad_norm": 0.54296875, "learning_rate": 0.00019964932503006893, "loss": 1.0694, "step": 8645 }, { "epoch": 0.12, "grad_norm": 0.55078125, "learning_rate": 0.0001996472270482606, "loss": 0.9504, "step": 8650 }, { "epoch": 0.12, "grad_norm": 0.50390625, "learning_rate": 0.0001996451228204506, "loss": 0.9101, "step": 8655 }, { "epoch": 0.12, "grad_norm": 0.51171875, "learning_rate": 0.00019964301234677082, "loss": 1.0089, "step": 8660 }, { "epoch": 0.12, "grad_norm": 0.546875, "learning_rate": 0.00019964089562735356, "loss": 1.0573, "step": 8665 }, { "epoch": 0.12, "grad_norm": 0.58984375, "learning_rate": 0.00019963877266233147, "loss": 0.9161, "step": 8670 }, { "epoch": 0.12, "grad_norm": 0.52734375, "learning_rate": 0.0001996366434518376, "loss": 0.8975, "step": 8675 }, { "epoch": 0.12, "grad_norm": 0.48046875, "learning_rate": 0.0001996345079960055, "loss": 0.986, "step": 8680 }, { "epoch": 0.12, "grad_norm": 0.54296875, "learning_rate": 0.00019963236629496893, "loss": 1.1205, "step": 8685 }, { "epoch": 0.12, "grad_norm": 0.58984375, "learning_rate": 0.00019963021834886217, "loss": 0.9446, "step": 8690 }, { "epoch": 0.12, "grad_norm": 0.482421875, "learning_rate": 0.00019962806415781988, "loss": 0.8427, "step": 8695 }, { "epoch": 0.12, "grad_norm": 0.58984375, "learning_rate": 0.00019962590372197701, "loss": 1.0005, "step": 8700 }, { "epoch": 0.12, "grad_norm": 0.52734375, "learning_rate": 0.00019962373704146907, "loss": 1.1333, "step": 8705 }, { "epoch": 0.12, "grad_norm": 0.515625, "learning_rate": 0.0001996215641164318, "loss": 1.1934, "step": 8710 }, { "epoch": 0.13, "grad_norm": 0.57421875, "learning_rate": 0.00019961938494700147, "loss": 0.916, "step": 8715 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.00019961719953331462, "loss": 1.0266, "step": 8720 }, { "epoch": 0.13, "grad_norm": 0.56640625, "learning_rate": 0.00019961500787550823, "loss": 1.1552, "step": 8725 }, { "epoch": 0.13, "grad_norm": 0.470703125, "learning_rate": 0.0001996128099737197, "loss": 1.0242, "step": 8730 }, { "epoch": 0.13, "grad_norm": 0.50390625, "learning_rate": 0.00019961060582808683, "loss": 1.0109, "step": 8735 }, { "epoch": 0.13, "grad_norm": 0.59375, "learning_rate": 0.0001996083954387477, "loss": 1.0398, "step": 8740 }, { "epoch": 0.13, "grad_norm": 0.53515625, "learning_rate": 0.00019960617880584091, "loss": 0.8758, "step": 8745 }, { "epoch": 0.13, "grad_norm": 0.7578125, "learning_rate": 0.0001996039559295054, "loss": 1.0004, "step": 8750 }, { "epoch": 0.13, "grad_norm": 0.53125, "learning_rate": 0.00019960172680988047, "loss": 0.9889, "step": 8755 }, { "epoch": 0.13, "grad_norm": 0.515625, "learning_rate": 0.00019959949144710587, "loss": 0.8624, "step": 8760 }, { "epoch": 0.13, "grad_norm": 0.51953125, "learning_rate": 0.00019959724984132174, "loss": 0.8611, "step": 8765 }, { "epoch": 0.13, "grad_norm": 0.546875, "learning_rate": 0.00019959500199266854, "loss": 0.9709, "step": 8770 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019959274790128716, "loss": 1.0855, "step": 8775 }, { "epoch": 0.13, "grad_norm": 0.6796875, "learning_rate": 0.00019959048756731896, "loss": 1.0042, "step": 8780 }, { "epoch": 0.13, "grad_norm": 0.470703125, "learning_rate": 0.00019958822099090556, "loss": 0.9496, "step": 8785 }, { "epoch": 0.13, "grad_norm": 0.55859375, "learning_rate": 0.00019958594817218902, "loss": 0.9307, "step": 8790 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.00019958366911131188, "loss": 0.9806, "step": 8795 }, { "epoch": 0.13, "grad_norm": 0.5625, "learning_rate": 0.0001995813838084169, "loss": 0.8629, "step": 8800 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.0001995790922636474, "loss": 1.0448, "step": 8805 }, { "epoch": 0.13, "grad_norm": 0.486328125, "learning_rate": 0.00019957679447714697, "loss": 0.9303, "step": 8810 }, { "epoch": 0.13, "grad_norm": 0.54296875, "learning_rate": 0.00019957449044905964, "loss": 1.0203, "step": 8815 }, { "epoch": 0.13, "grad_norm": 0.5546875, "learning_rate": 0.00019957218017952987, "loss": 0.8014, "step": 8820 }, { "epoch": 0.13, "grad_norm": 0.546875, "learning_rate": 0.00019956986366870242, "loss": 1.0335, "step": 8825 }, { "epoch": 0.13, "grad_norm": 0.7265625, "learning_rate": 0.00019956754091672254, "loss": 1.1559, "step": 8830 }, { "epoch": 0.13, "grad_norm": 0.51953125, "learning_rate": 0.0001995652119237358, "loss": 0.9023, "step": 8835 }, { "epoch": 0.13, "grad_norm": 0.515625, "learning_rate": 0.00019956287668988814, "loss": 0.9111, "step": 8840 }, { "epoch": 0.13, "grad_norm": 0.494140625, "learning_rate": 0.00019956053521532602, "loss": 1.0862, "step": 8845 }, { "epoch": 0.13, "grad_norm": 0.5546875, "learning_rate": 0.00019955818750019613, "loss": 1.0293, "step": 8850 }, { "epoch": 0.13, "grad_norm": 0.59765625, "learning_rate": 0.00019955583354464568, "loss": 1.2116, "step": 8855 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.0001995534733488222, "loss": 0.877, "step": 8860 }, { "epoch": 0.13, "grad_norm": 0.494140625, "learning_rate": 0.00019955110691287362, "loss": 0.9599, "step": 8865 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.00019954873423694825, "loss": 0.965, "step": 8870 }, { "epoch": 0.13, "grad_norm": 0.57421875, "learning_rate": 0.00019954635532119487, "loss": 0.9688, "step": 8875 }, { "epoch": 0.13, "grad_norm": 0.447265625, "learning_rate": 0.00019954397016576258, "loss": 0.9937, "step": 8880 }, { "epoch": 0.13, "grad_norm": 0.5078125, "learning_rate": 0.00019954157877080086, "loss": 0.9031, "step": 8885 }, { "epoch": 0.13, "grad_norm": 0.625, "learning_rate": 0.0001995391811364596, "loss": 1.0922, "step": 8890 }, { "epoch": 0.13, "grad_norm": 0.486328125, "learning_rate": 0.00019953677726288914, "loss": 1.0146, "step": 8895 }, { "epoch": 0.13, "grad_norm": 0.57421875, "learning_rate": 0.00019953436715024008, "loss": 1.1008, "step": 8900 }, { "epoch": 0.13, "grad_norm": 0.5, "learning_rate": 0.00019953195079866354, "loss": 1.0151, "step": 8905 }, { "epoch": 0.13, "grad_norm": 0.65625, "learning_rate": 0.00019952952820831093, "loss": 0.9264, "step": 8910 }, { "epoch": 0.13, "grad_norm": 0.60546875, "learning_rate": 0.0001995270993793342, "loss": 0.9496, "step": 8915 }, { "epoch": 0.13, "grad_norm": 0.55078125, "learning_rate": 0.0001995246643118855, "loss": 0.9531, "step": 8920 }, { "epoch": 0.13, "grad_norm": 0.65234375, "learning_rate": 0.00019952222300611747, "loss": 0.9197, "step": 8925 }, { "epoch": 0.13, "grad_norm": 0.5859375, "learning_rate": 0.00019951977546218318, "loss": 1.096, "step": 8930 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019951732168023602, "loss": 0.9807, "step": 8935 }, { "epoch": 0.13, "grad_norm": 0.54296875, "learning_rate": 0.00019951486166042978, "loss": 0.9146, "step": 8940 }, { "epoch": 0.13, "grad_norm": 0.5078125, "learning_rate": 0.0001995123954029187, "loss": 0.9749, "step": 8945 }, { "epoch": 0.13, "grad_norm": 0.51953125, "learning_rate": 0.00019950992290785732, "loss": 0.9829, "step": 8950 }, { "epoch": 0.13, "grad_norm": 0.5, "learning_rate": 0.00019950744417540067, "loss": 0.8879, "step": 8955 }, { "epoch": 0.13, "grad_norm": 0.56640625, "learning_rate": 0.00019950495920570408, "loss": 0.8371, "step": 8960 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.00019950246799892328, "loss": 1.046, "step": 8965 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.0001994999705552145, "loss": 0.9569, "step": 8970 }, { "epoch": 0.13, "grad_norm": 0.50390625, "learning_rate": 0.00019949746687473422, "loss": 0.9587, "step": 8975 }, { "epoch": 0.13, "grad_norm": 0.5078125, "learning_rate": 0.00019949495695763942, "loss": 1.0237, "step": 8980 }, { "epoch": 0.13, "grad_norm": 0.6015625, "learning_rate": 0.0001994924408040874, "loss": 1.0305, "step": 8985 }, { "epoch": 0.13, "grad_norm": 0.5078125, "learning_rate": 0.00019948991841423587, "loss": 0.9636, "step": 8990 }, { "epoch": 0.13, "grad_norm": 0.48046875, "learning_rate": 0.00019948738978824295, "loss": 1.0234, "step": 8995 }, { "epoch": 0.13, "grad_norm": 0.5, "learning_rate": 0.0001994848549262671, "loss": 0.9714, "step": 9000 }, { "epoch": 0.13, "grad_norm": 0.55078125, "learning_rate": 0.0001994823138284673, "loss": 0.9048, "step": 9005 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.00019947976649500274, "loss": 0.996, "step": 9010 }, { "epoch": 0.13, "grad_norm": 0.45703125, "learning_rate": 0.00019947721292603313, "loss": 0.7694, "step": 9015 }, { "epoch": 0.13, "grad_norm": 0.5703125, "learning_rate": 0.00019947465312171846, "loss": 0.8679, "step": 9020 }, { "epoch": 0.13, "grad_norm": 0.5546875, "learning_rate": 0.00019947208708221933, "loss": 1.1444, "step": 9025 }, { "epoch": 0.13, "grad_norm": 0.546875, "learning_rate": 0.00019946951480769642, "loss": 1.1269, "step": 9030 }, { "epoch": 0.13, "grad_norm": 0.59375, "learning_rate": 0.00019946693629831105, "loss": 1.1152, "step": 9035 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.00019946435155422486, "loss": 0.9622, "step": 9040 }, { "epoch": 0.13, "grad_norm": 0.462890625, "learning_rate": 0.00019946176057559982, "loss": 0.9847, "step": 9045 }, { "epoch": 0.13, "grad_norm": 0.458984375, "learning_rate": 0.00019945916336259833, "loss": 0.8883, "step": 9050 }, { "epoch": 0.13, "grad_norm": 0.55078125, "learning_rate": 0.00019945655991538322, "loss": 0.9141, "step": 9055 }, { "epoch": 0.13, "grad_norm": 0.53125, "learning_rate": 0.00019945395023411767, "loss": 1.0952, "step": 9060 }, { "epoch": 0.13, "grad_norm": 0.498046875, "learning_rate": 0.00019945133431896524, "loss": 1.006, "step": 9065 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.0001994487121700899, "loss": 0.9484, "step": 9070 }, { "epoch": 0.13, "grad_norm": 0.50390625, "learning_rate": 0.00019944608378765603, "loss": 0.8655, "step": 9075 }, { "epoch": 0.13, "grad_norm": 0.5, "learning_rate": 0.0001994434491718284, "loss": 1.0319, "step": 9080 }, { "epoch": 0.13, "grad_norm": 0.57421875, "learning_rate": 0.00019944080832277208, "loss": 1.0843, "step": 9085 }, { "epoch": 0.13, "grad_norm": 0.53125, "learning_rate": 0.00019943816124065266, "loss": 0.9651, "step": 9090 }, { "epoch": 0.13, "grad_norm": 0.515625, "learning_rate": 0.00019943550792563604, "loss": 0.9859, "step": 9095 }, { "epoch": 0.13, "grad_norm": 0.51953125, "learning_rate": 0.0001994328483778885, "loss": 1.0562, "step": 9100 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.00019943018259757685, "loss": 1.0792, "step": 9105 }, { "epoch": 0.13, "grad_norm": 0.60546875, "learning_rate": 0.00019942751058486807, "loss": 0.9782, "step": 9110 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019942483233992967, "loss": 0.8319, "step": 9115 }, { "epoch": 0.13, "grad_norm": 0.5078125, "learning_rate": 0.00019942214786292957, "loss": 1.0149, "step": 9120 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019941945715403598, "loss": 0.9469, "step": 9125 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019941676021341764, "loss": 0.9073, "step": 9130 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.0001994140570412435, "loss": 1.0213, "step": 9135 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019941134763768305, "loss": 1.0976, "step": 9140 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.0001994086320029061, "loss": 0.9995, "step": 9145 }, { "epoch": 0.13, "grad_norm": 0.52734375, "learning_rate": 0.0001994059101370829, "loss": 0.9779, "step": 9150 }, { "epoch": 0.13, "grad_norm": 0.52734375, "learning_rate": 0.00019940318204038406, "loss": 0.931, "step": 9155 }, { "epoch": 0.13, "grad_norm": 0.62890625, "learning_rate": 0.0001994004477129805, "loss": 0.9962, "step": 9160 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019939770715504373, "loss": 0.9804, "step": 9165 }, { "epoch": 0.13, "grad_norm": 0.546875, "learning_rate": 0.00019939496036674542, "loss": 1.0479, "step": 9170 }, { "epoch": 0.13, "grad_norm": 0.65625, "learning_rate": 0.00019939220734825784, "loss": 1.0682, "step": 9175 }, { "epoch": 0.13, "grad_norm": 0.48828125, "learning_rate": 0.0001993894480997535, "loss": 0.8179, "step": 9180 }, { "epoch": 0.13, "grad_norm": 0.54296875, "learning_rate": 0.0001993866826214053, "loss": 1.1365, "step": 9185 }, { "epoch": 0.13, "grad_norm": 0.50390625, "learning_rate": 0.00019938391091338668, "loss": 1.0454, "step": 9190 }, { "epoch": 0.13, "grad_norm": 0.54296875, "learning_rate": 0.00019938113297587133, "loss": 0.9802, "step": 9195 }, { "epoch": 0.13, "grad_norm": 0.5859375, "learning_rate": 0.00019937834880903342, "loss": 0.9689, "step": 9200 }, { "epoch": 0.13, "grad_norm": 0.58203125, "learning_rate": 0.00019937555841304736, "loss": 0.9684, "step": 9205 }, { "epoch": 0.13, "grad_norm": 0.640625, "learning_rate": 0.00019937276178808814, "loss": 0.9603, "step": 9210 }, { "epoch": 0.13, "grad_norm": 0.490234375, "learning_rate": 0.00019936995893433105, "loss": 1.0888, "step": 9215 }, { "epoch": 0.13, "grad_norm": 0.546875, "learning_rate": 0.00019936714985195177, "loss": 0.991, "step": 9220 }, { "epoch": 0.13, "grad_norm": 0.546875, "learning_rate": 0.00019936433454112635, "loss": 0.9825, "step": 9225 }, { "epoch": 0.13, "grad_norm": 0.53515625, "learning_rate": 0.00019936151300203127, "loss": 0.9383, "step": 9230 }, { "epoch": 0.13, "grad_norm": 0.486328125, "learning_rate": 0.0001993586852348434, "loss": 0.9222, "step": 9235 }, { "epoch": 0.13, "grad_norm": 0.625, "learning_rate": 0.00019935585123973998, "loss": 0.9562, "step": 9240 }, { "epoch": 0.13, "grad_norm": 0.58203125, "learning_rate": 0.00019935301101689864, "loss": 1.1302, "step": 9245 }, { "epoch": 0.13, "grad_norm": 0.5859375, "learning_rate": 0.0001993501645664974, "loss": 0.8784, "step": 9250 }, { "epoch": 0.13, "grad_norm": 0.53125, "learning_rate": 0.00019934731188871473, "loss": 1.0054, "step": 9255 }, { "epoch": 0.13, "grad_norm": 0.55078125, "learning_rate": 0.0001993444529837294, "loss": 0.9292, "step": 9260 }, { "epoch": 0.13, "grad_norm": 0.54296875, "learning_rate": 0.00019934158785172058, "loss": 0.8246, "step": 9265 }, { "epoch": 0.13, "grad_norm": 0.5078125, "learning_rate": 0.00019933871649286796, "loss": 0.8338, "step": 9270 }, { "epoch": 0.13, "grad_norm": 0.54296875, "learning_rate": 0.00019933583890735138, "loss": 0.8329, "step": 9275 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.0001993329550953513, "loss": 1.0388, "step": 9280 }, { "epoch": 0.13, "grad_norm": 0.490234375, "learning_rate": 0.0001993300650570485, "loss": 0.952, "step": 9285 }, { "epoch": 0.13, "grad_norm": 0.671875, "learning_rate": 0.00019932716879262404, "loss": 1.2071, "step": 9290 }, { "epoch": 0.13, "grad_norm": 0.52734375, "learning_rate": 0.00019932426630225956, "loss": 0.9085, "step": 9295 }, { "epoch": 0.13, "grad_norm": 0.55859375, "learning_rate": 0.00019932135758613694, "loss": 0.9346, "step": 9300 }, { "epoch": 0.13, "grad_norm": 0.494140625, "learning_rate": 0.0001993184426444385, "loss": 0.9858, "step": 9305 }, { "epoch": 0.13, "grad_norm": 0.55078125, "learning_rate": 0.00019931552147734697, "loss": 1.0675, "step": 9310 }, { "epoch": 0.13, "grad_norm": 0.5546875, "learning_rate": 0.00019931259408504545, "loss": 1.0765, "step": 9315 }, { "epoch": 0.13, "grad_norm": 0.484375, "learning_rate": 0.0001993096604677174, "loss": 1.045, "step": 9320 }, { "epoch": 0.13, "grad_norm": 0.53515625, "learning_rate": 0.00019930672062554674, "loss": 0.7794, "step": 9325 }, { "epoch": 0.13, "grad_norm": 0.5859375, "learning_rate": 0.00019930377455871771, "loss": 0.8622, "step": 9330 }, { "epoch": 0.13, "grad_norm": 0.5625, "learning_rate": 0.00019930082226741504, "loss": 0.9834, "step": 9335 }, { "epoch": 0.13, "grad_norm": 0.62109375, "learning_rate": 0.0001992978637518237, "loss": 0.9345, "step": 9340 }, { "epoch": 0.13, "grad_norm": 0.55859375, "learning_rate": 0.00019929489901212918, "loss": 0.9969, "step": 9345 }, { "epoch": 0.13, "grad_norm": 0.578125, "learning_rate": 0.0001992919280485173, "loss": 1.1695, "step": 9350 }, { "epoch": 0.13, "grad_norm": 0.578125, "learning_rate": 0.0001992889508611743, "loss": 1.2468, "step": 9355 }, { "epoch": 0.13, "grad_norm": 0.5703125, "learning_rate": 0.00019928596745028677, "loss": 1.0997, "step": 9360 }, { "epoch": 0.13, "grad_norm": 0.56640625, "learning_rate": 0.0001992829778160417, "loss": 0.8832, "step": 9365 }, { "epoch": 0.13, "grad_norm": 0.59765625, "learning_rate": 0.0001992799819586265, "loss": 1.0756, "step": 9370 }, { "epoch": 0.13, "grad_norm": 0.515625, "learning_rate": 0.000199276979878229, "loss": 0.8576, "step": 9375 }, { "epoch": 0.13, "grad_norm": 0.59765625, "learning_rate": 0.0001992739715750373, "loss": 1.2125, "step": 9380 }, { "epoch": 0.13, "grad_norm": 0.57421875, "learning_rate": 0.00019927095704924005, "loss": 0.8716, "step": 9385 }, { "epoch": 0.13, "grad_norm": 0.515625, "learning_rate": 0.00019926793630102612, "loss": 1.0045, "step": 9390 }, { "epoch": 0.13, "grad_norm": 0.52734375, "learning_rate": 0.0001992649093305849, "loss": 0.9902, "step": 9395 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.00019926187613810608, "loss": 0.9739, "step": 9400 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019925883672377982, "loss": 0.9858, "step": 9405 }, { "epoch": 0.13, "grad_norm": 0.56640625, "learning_rate": 0.00019925579108779663, "loss": 1.0679, "step": 9410 }, { "epoch": 0.14, "grad_norm": 0.53515625, "learning_rate": 0.00019925273923034742, "loss": 0.9112, "step": 9415 }, { "epoch": 0.14, "grad_norm": 0.57421875, "learning_rate": 0.00019924968115162347, "loss": 0.9754, "step": 9420 }, { "epoch": 0.14, "grad_norm": 0.498046875, "learning_rate": 0.0001992466168518165, "loss": 0.8947, "step": 9425 }, { "epoch": 0.14, "grad_norm": 0.53125, "learning_rate": 0.00019924354633111852, "loss": 1.0981, "step": 9430 }, { "epoch": 0.14, "grad_norm": 0.53125, "learning_rate": 0.00019924046958972206, "loss": 0.8397, "step": 9435 }, { "epoch": 0.14, "grad_norm": 0.478515625, "learning_rate": 0.00019923738662781988, "loss": 0.7539, "step": 9440 }, { "epoch": 0.14, "grad_norm": 0.484375, "learning_rate": 0.00019923429744560532, "loss": 0.8619, "step": 9445 }, { "epoch": 0.14, "grad_norm": 0.5390625, "learning_rate": 0.000199231202043272, "loss": 0.9305, "step": 9450 }, { "epoch": 0.14, "grad_norm": 0.54296875, "learning_rate": 0.00019922810042101387, "loss": 0.8837, "step": 9455 }, { "epoch": 0.14, "grad_norm": 0.486328125, "learning_rate": 0.00019922499257902544, "loss": 1.0377, "step": 9460 }, { "epoch": 0.14, "grad_norm": 0.5546875, "learning_rate": 0.00019922187851750144, "loss": 1.0431, "step": 9465 }, { "epoch": 0.14, "grad_norm": 0.48828125, "learning_rate": 0.0001992187582366371, "loss": 1.0584, "step": 9470 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.000199215631736628, "loss": 0.913, "step": 9475 }, { "epoch": 0.14, "grad_norm": 0.5859375, "learning_rate": 0.0001992124990176701, "loss": 0.8658, "step": 9480 }, { "epoch": 0.14, "grad_norm": 0.474609375, "learning_rate": 0.0001992093600799598, "loss": 1.0133, "step": 9485 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.00019920621492369375, "loss": 0.9382, "step": 9490 }, { "epoch": 0.14, "grad_norm": 0.57421875, "learning_rate": 0.0001992030635490692, "loss": 0.9738, "step": 9495 }, { "epoch": 0.14, "grad_norm": 0.52734375, "learning_rate": 0.00019919990595628363, "loss": 1.0165, "step": 9500 }, { "epoch": 0.14, "grad_norm": 0.482421875, "learning_rate": 0.000199196742145535, "loss": 0.8768, "step": 9505 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.00019919357211702158, "loss": 0.8994, "step": 9510 }, { "epoch": 0.14, "grad_norm": 0.58203125, "learning_rate": 0.00019919039587094211, "loss": 0.9562, "step": 9515 }, { "epoch": 0.14, "grad_norm": 0.5390625, "learning_rate": 0.00019918721340749564, "loss": 0.9853, "step": 9520 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.00019918402472688166, "loss": 1.0501, "step": 9525 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.00019918082982930008, "loss": 1.0426, "step": 9530 }, { "epoch": 0.14, "grad_norm": 0.53125, "learning_rate": 0.00019917762871495112, "loss": 0.9506, "step": 9535 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.00019917442138403543, "loss": 0.9277, "step": 9540 }, { "epoch": 0.14, "grad_norm": 0.58984375, "learning_rate": 0.000199171207836754, "loss": 1.0632, "step": 9545 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 0.00019916798807330838, "loss": 0.9718, "step": 9550 }, { "epoch": 0.14, "grad_norm": 0.6171875, "learning_rate": 0.00019916476209390035, "loss": 0.9782, "step": 9555 }, { "epoch": 0.14, "grad_norm": 0.484375, "learning_rate": 0.00019916152989873204, "loss": 0.9661, "step": 9560 }, { "epoch": 0.14, "grad_norm": 0.58203125, "learning_rate": 0.00019915829148800613, "loss": 0.8995, "step": 9565 }, { "epoch": 0.14, "grad_norm": 0.66796875, "learning_rate": 0.00019915504686192557, "loss": 0.9292, "step": 9570 }, { "epoch": 0.14, "grad_norm": 0.50390625, "learning_rate": 0.00019915179602069373, "loss": 0.9884, "step": 9575 }, { "epoch": 0.14, "grad_norm": 0.5546875, "learning_rate": 0.0001991485389645144, "loss": 0.9105, "step": 9580 }, { "epoch": 0.14, "grad_norm": 0.49609375, "learning_rate": 0.00019914527569359173, "loss": 1.0626, "step": 9585 }, { "epoch": 0.14, "grad_norm": 0.5625, "learning_rate": 0.00019914200620813025, "loss": 1.0535, "step": 9590 }, { "epoch": 0.14, "grad_norm": 0.55859375, "learning_rate": 0.00019913873050833494, "loss": 1.0736, "step": 9595 }, { "epoch": 0.14, "grad_norm": 0.5546875, "learning_rate": 0.00019913544859441107, "loss": 0.8547, "step": 9600 }, { "epoch": 0.14, "grad_norm": 0.69140625, "learning_rate": 0.00019913216046656436, "loss": 1.1305, "step": 9605 }, { "epoch": 0.14, "grad_norm": 0.54296875, "learning_rate": 0.00019912886612500095, "loss": 1.0175, "step": 9610 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.00019912556556992732, "loss": 0.9938, "step": 9615 }, { "epoch": 0.14, "grad_norm": 0.53125, "learning_rate": 0.00019912225880155032, "loss": 0.8953, "step": 9620 }, { "epoch": 0.14, "grad_norm": 0.52734375, "learning_rate": 0.0001991189458200773, "loss": 0.9436, "step": 9625 }, { "epoch": 0.14, "grad_norm": 0.5859375, "learning_rate": 0.00019911562662571581, "loss": 1.0913, "step": 9630 }, { "epoch": 0.14, "grad_norm": 0.5703125, "learning_rate": 0.00019911230121867396, "loss": 0.9126, "step": 9635 }, { "epoch": 0.14, "grad_norm": 0.4921875, "learning_rate": 0.00019910896959916024, "loss": 1.0492, "step": 9640 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.0001991056317673834, "loss": 1.0146, "step": 9645 }, { "epoch": 0.14, "grad_norm": 0.53515625, "learning_rate": 0.00019910228772355268, "loss": 0.9471, "step": 9650 }, { "epoch": 0.14, "grad_norm": 0.69140625, "learning_rate": 0.00019909893746787772, "loss": 0.9236, "step": 9655 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.00019909558100056847, "loss": 0.8822, "step": 9660 }, { "epoch": 0.14, "grad_norm": 0.53515625, "learning_rate": 0.00019909221832183538, "loss": 1.0455, "step": 9665 }, { "epoch": 0.14, "grad_norm": 0.53515625, "learning_rate": 0.00019908884943188915, "loss": 1.0162, "step": 9670 }, { "epoch": 0.14, "grad_norm": 0.53515625, "learning_rate": 0.000199085474330941, "loss": 0.9446, "step": 9675 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.00019908209301920248, "loss": 0.9998, "step": 9680 }, { "epoch": 0.14, "grad_norm": 0.55859375, "learning_rate": 0.0001990787054968855, "loss": 0.977, "step": 9685 }, { "epoch": 0.14, "grad_norm": 0.57421875, "learning_rate": 0.00019907531176420245, "loss": 0.9763, "step": 9690 }, { "epoch": 0.14, "grad_norm": 0.5234375, "learning_rate": 0.00019907191182136596, "loss": 0.7974, "step": 9695 }, { "epoch": 0.14, "grad_norm": 0.5703125, "learning_rate": 0.00019906850566858928, "loss": 0.9493, "step": 9700 }, { "epoch": 0.14, "grad_norm": 0.53125, "learning_rate": 0.0001990650933060858, "loss": 0.8047, "step": 9705 }, { "epoch": 0.14, "grad_norm": 0.5859375, "learning_rate": 0.00019906167473406945, "loss": 0.9946, "step": 9710 }, { "epoch": 0.14, "grad_norm": 0.52734375, "learning_rate": 0.00019905824995275452, "loss": 0.928, "step": 9715 }, { "epoch": 0.14, "grad_norm": 0.54296875, "learning_rate": 0.00019905481896235566, "loss": 1.0442, "step": 9720 }, { "epoch": 0.14, "grad_norm": 0.498046875, "learning_rate": 0.00019905138176308791, "loss": 0.8989, "step": 9725 }, { "epoch": 0.14, "grad_norm": 0.56640625, "learning_rate": 0.00019904793835516676, "loss": 1.0771, "step": 9730 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.000199044488738808, "loss": 1.0894, "step": 9735 }, { "epoch": 0.14, "grad_norm": 0.55859375, "learning_rate": 0.0001990410329142279, "loss": 1.1339, "step": 9740 }, { "epoch": 0.14, "grad_norm": 0.55859375, "learning_rate": 0.00019903757088164306, "loss": 0.8538, "step": 9745 }, { "epoch": 0.14, "grad_norm": 0.6015625, "learning_rate": 0.0001990341026412705, "loss": 1.1117, "step": 9750 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.0001990306281933276, "loss": 0.9435, "step": 9755 }, { "epoch": 0.14, "grad_norm": 0.578125, "learning_rate": 0.00019902714753803212, "loss": 1.0701, "step": 9760 }, { "epoch": 0.14, "grad_norm": 0.6484375, "learning_rate": 0.00019902366067560222, "loss": 1.0426, "step": 9765 }, { "epoch": 0.14, "grad_norm": 0.5546875, "learning_rate": 0.00019902016760625654, "loss": 1.1211, "step": 9770 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.00019901666833021397, "loss": 0.9565, "step": 9775 }, { "epoch": 0.14, "grad_norm": 0.546875, "learning_rate": 0.00019901316284769385, "loss": 1.0333, "step": 9780 }, { "epoch": 0.14, "grad_norm": 0.59375, "learning_rate": 0.0001990096511589159, "loss": 1.0048, "step": 9785 }, { "epoch": 0.14, "grad_norm": 0.609375, "learning_rate": 0.00019900613326410027, "loss": 1.0639, "step": 9790 }, { "epoch": 0.14, "grad_norm": 0.5859375, "learning_rate": 0.00019900260916346743, "loss": 1.0796, "step": 9795 }, { "epoch": 0.14, "grad_norm": 0.5390625, "learning_rate": 0.00019899907885723834, "loss": 0.9319, "step": 9800 }, { "epoch": 0.14, "grad_norm": 0.56640625, "learning_rate": 0.0001989955423456342, "loss": 0.9127, "step": 9805 }, { "epoch": 0.14, "grad_norm": 0.546875, "learning_rate": 0.00019899199962887672, "loss": 0.9493, "step": 9810 }, { "epoch": 0.14, "grad_norm": 0.53125, "learning_rate": 0.00019898845070718797, "loss": 0.9051, "step": 9815 }, { "epoch": 0.14, "grad_norm": 0.56640625, "learning_rate": 0.00019898489558079039, "loss": 0.9273, "step": 9820 }, { "epoch": 0.14, "grad_norm": 0.49609375, "learning_rate": 0.00019898133424990682, "loss": 0.9571, "step": 9825 }, { "epoch": 0.14, "grad_norm": 0.5078125, "learning_rate": 0.0001989777667147605, "loss": 0.9934, "step": 9830 }, { "epoch": 0.14, "grad_norm": 0.56640625, "learning_rate": 0.00019897419297557504, "loss": 0.9298, "step": 9835 }, { "epoch": 0.14, "grad_norm": 0.55859375, "learning_rate": 0.0001989706130325744, "loss": 0.9945, "step": 9840 }, { "epoch": 0.14, "grad_norm": 0.58203125, "learning_rate": 0.00019896702688598306, "loss": 1.0029, "step": 9845 }, { "epoch": 0.14, "grad_norm": 0.61328125, "learning_rate": 0.00019896343453602576, "loss": 0.9952, "step": 9850 }, { "epoch": 0.14, "grad_norm": 0.54296875, "learning_rate": 0.00019895983598292762, "loss": 0.9123, "step": 9855 }, { "epoch": 0.14, "grad_norm": 0.53515625, "learning_rate": 0.0001989562312269143, "loss": 0.9369, "step": 9860 }, { "epoch": 0.14, "grad_norm": 0.6640625, "learning_rate": 0.0001989526202682117, "loss": 1.1773, "step": 9865 }, { "epoch": 0.14, "grad_norm": 0.55859375, "learning_rate": 0.0001989490031070462, "loss": 0.9184, "step": 9870 }, { "epoch": 0.14, "grad_norm": 0.546875, "learning_rate": 0.00019894537974364442, "loss": 0.9259, "step": 9875 }, { "epoch": 0.14, "grad_norm": 0.59765625, "learning_rate": 0.00019894175017823358, "loss": 0.9172, "step": 9880 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.00019893811441104115, "loss": 0.9146, "step": 9885 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.00019893447244229503, "loss": 1.0027, "step": 9890 }, { "epoch": 0.14, "grad_norm": 0.5546875, "learning_rate": 0.00019893082427222352, "loss": 0.9167, "step": 9895 }, { "epoch": 0.14, "grad_norm": 0.5234375, "learning_rate": 0.00019892716990105528, "loss": 0.9703, "step": 9900 }, { "epoch": 0.14, "grad_norm": 0.56640625, "learning_rate": 0.0001989235093290193, "loss": 0.9928, "step": 9905 }, { "epoch": 0.14, "grad_norm": 0.546875, "learning_rate": 0.00019891984255634513, "loss": 0.9418, "step": 9910 }, { "epoch": 0.14, "grad_norm": 0.57421875, "learning_rate": 0.00019891616958326257, "loss": 0.8257, "step": 9915 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 0.00019891249041000184, "loss": 0.8762, "step": 9920 }, { "epoch": 0.14, "grad_norm": 0.58984375, "learning_rate": 0.00019890880503679358, "loss": 0.9428, "step": 9925 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.00019890511346386873, "loss": 1.0612, "step": 9930 }, { "epoch": 0.14, "grad_norm": 0.59375, "learning_rate": 0.00019890141569145877, "loss": 1.0748, "step": 9935 }, { "epoch": 0.14, "grad_norm": 0.6171875, "learning_rate": 0.0001988977117197954, "loss": 1.0498, "step": 9940 }, { "epoch": 0.14, "grad_norm": 0.546875, "learning_rate": 0.00019889400154911085, "loss": 0.9022, "step": 9945 }, { "epoch": 0.14, "grad_norm": 0.5625, "learning_rate": 0.00019889028517963762, "loss": 0.9875, "step": 9950 }, { "epoch": 0.14, "grad_norm": 0.5390625, "learning_rate": 0.00019888656261160873, "loss": 0.8687, "step": 9955 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 0.00019888283384525743, "loss": 0.9364, "step": 9960 }, { "epoch": 0.14, "grad_norm": 0.5234375, "learning_rate": 0.00019887909888081752, "loss": 0.904, "step": 9965 }, { "epoch": 0.14, "grad_norm": 0.55859375, "learning_rate": 0.00019887535771852307, "loss": 1.2971, "step": 9970 }, { "epoch": 0.14, "grad_norm": 0.54296875, "learning_rate": 0.00019887161035860859, "loss": 0.9255, "step": 9975 }, { "epoch": 0.14, "grad_norm": 0.5234375, "learning_rate": 0.00019886785680130892, "loss": 0.9778, "step": 9980 }, { "epoch": 0.14, "grad_norm": 0.5703125, "learning_rate": 0.00019886409704685944, "loss": 0.9391, "step": 9985 }, { "epoch": 0.14, "grad_norm": 0.5703125, "learning_rate": 0.00019886033109549575, "loss": 0.9114, "step": 9990 }, { "epoch": 0.14, "grad_norm": 0.61328125, "learning_rate": 0.0001988565589474539, "loss": 1.144, "step": 9995 }, { "epoch": 0.14, "grad_norm": 0.5703125, "learning_rate": 0.00019885278060297038, "loss": 1.0354, "step": 10000 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 0.00019884899606228195, "loss": 1.0957, "step": 10005 }, { "epoch": 0.14, "grad_norm": 0.5234375, "learning_rate": 0.0001988452053256259, "loss": 1.0502, "step": 10010 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.00019884140839323977, "loss": 0.8835, "step": 10015 }, { "epoch": 0.14, "grad_norm": 0.5390625, "learning_rate": 0.00019883760526536161, "loss": 0.9943, "step": 10020 }, { "epoch": 0.14, "grad_norm": 0.63671875, "learning_rate": 0.0001988337959422298, "loss": 0.9613, "step": 10025 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 0.00019882998042408307, "loss": 0.9662, "step": 10030 }, { "epoch": 0.14, "grad_norm": 0.703125, "learning_rate": 0.00019882615871116062, "loss": 0.8121, "step": 10035 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.000198822330803702, "loss": 0.813, "step": 10040 }, { "epoch": 0.14, "grad_norm": 0.7421875, "learning_rate": 0.00019881849670194712, "loss": 0.9225, "step": 10045 }, { "epoch": 0.14, "grad_norm": 0.6796875, "learning_rate": 0.00019881465640613635, "loss": 0.9777, "step": 10050 }, { "epoch": 0.14, "grad_norm": 0.57421875, "learning_rate": 0.00019881080991651033, "loss": 0.9568, "step": 10055 }, { "epoch": 0.14, "grad_norm": 0.5390625, "learning_rate": 0.00019880695723331024, "loss": 0.9948, "step": 10060 }, { "epoch": 0.14, "grad_norm": 0.60546875, "learning_rate": 0.00019880309835677753, "loss": 1.3049, "step": 10065 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.00019879923328715407, "loss": 0.945, "step": 10070 }, { "epoch": 0.14, "grad_norm": 0.5859375, "learning_rate": 0.0001987953620246822, "loss": 1.1085, "step": 10075 }, { "epoch": 0.14, "grad_norm": 0.59765625, "learning_rate": 0.00019879148456960447, "loss": 1.068, "step": 10080 }, { "epoch": 0.14, "grad_norm": 0.59765625, "learning_rate": 0.000198787600922164, "loss": 0.9367, "step": 10085 }, { "epoch": 0.14, "grad_norm": 0.5859375, "learning_rate": 0.00019878371108260416, "loss": 1.0229, "step": 10090 }, { "epoch": 0.14, "grad_norm": 0.65625, "learning_rate": 0.00019877981505116884, "loss": 0.9013, "step": 10095 }, { "epoch": 0.14, "grad_norm": 0.53515625, "learning_rate": 0.0001987759128281022, "loss": 1.0527, "step": 10100 }, { "epoch": 0.14, "grad_norm": 0.6171875, "learning_rate": 0.00019877200441364884, "loss": 1.0512, "step": 10105 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019876808980805375, "loss": 0.9033, "step": 10110 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.0001987641690115623, "loss": 0.9431, "step": 10115 }, { "epoch": 0.15, "grad_norm": 0.5234375, "learning_rate": 0.00019876024202442028, "loss": 1.0798, "step": 10120 }, { "epoch": 0.15, "grad_norm": 0.625, "learning_rate": 0.0001987563088468738, "loss": 1.0832, "step": 10125 }, { "epoch": 0.15, "grad_norm": 0.5546875, "learning_rate": 0.0001987523694791694, "loss": 1.0411, "step": 10130 }, { "epoch": 0.15, "grad_norm": 0.734375, "learning_rate": 0.000198748423921554, "loss": 0.9704, "step": 10135 }, { "epoch": 0.15, "grad_norm": 0.59375, "learning_rate": 0.00019874447217427493, "loss": 1.0958, "step": 10140 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.0001987405142375799, "loss": 1.013, "step": 10145 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.00019873655011171698, "loss": 0.7886, "step": 10150 }, { "epoch": 0.15, "grad_norm": 0.5546875, "learning_rate": 0.00019873257979693463, "loss": 0.8691, "step": 10155 }, { "epoch": 0.15, "grad_norm": 0.57421875, "learning_rate": 0.00019872860329348173, "loss": 0.9006, "step": 10160 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.00019872462060160754, "loss": 0.8719, "step": 10165 }, { "epoch": 0.15, "grad_norm": 0.65234375, "learning_rate": 0.0001987206317215617, "loss": 0.8805, "step": 10170 }, { "epoch": 0.15, "grad_norm": 0.53125, "learning_rate": 0.00019871663665359422, "loss": 0.8455, "step": 10175 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.00019871263539795555, "loss": 1.0638, "step": 10180 }, { "epoch": 0.15, "grad_norm": 0.55078125, "learning_rate": 0.00019870862795489646, "loss": 0.9571, "step": 10185 }, { "epoch": 0.15, "grad_norm": 0.5625, "learning_rate": 0.00019870461432466814, "loss": 0.9332, "step": 10190 }, { "epoch": 0.15, "grad_norm": 0.53125, "learning_rate": 0.00019870059450752216, "loss": 1.0385, "step": 10195 }, { "epoch": 0.15, "grad_norm": 0.5546875, "learning_rate": 0.00019869656850371055, "loss": 1.0117, "step": 10200 }, { "epoch": 0.15, "grad_norm": 0.5859375, "learning_rate": 0.00019869253631348558, "loss": 0.9248, "step": 10205 }, { "epoch": 0.15, "grad_norm": 0.466796875, "learning_rate": 0.00019868849793710006, "loss": 0.9542, "step": 10210 }, { "epoch": 0.15, "grad_norm": 0.515625, "learning_rate": 0.00019868445337480707, "loss": 0.9228, "step": 10215 }, { "epoch": 0.15, "grad_norm": 0.58203125, "learning_rate": 0.00019868040262686017, "loss": 0.8607, "step": 10220 }, { "epoch": 0.15, "grad_norm": 0.53125, "learning_rate": 0.00019867634569351324, "loss": 0.9858, "step": 10225 }, { "epoch": 0.15, "grad_norm": 0.5, "learning_rate": 0.00019867228257502056, "loss": 0.9653, "step": 10230 }, { "epoch": 0.15, "grad_norm": 0.4921875, "learning_rate": 0.00019866821327163689, "loss": 0.9971, "step": 10235 }, { "epoch": 0.15, "grad_norm": 0.59765625, "learning_rate": 0.00019866413778361718, "loss": 1.0423, "step": 10240 }, { "epoch": 0.15, "grad_norm": 0.50390625, "learning_rate": 0.00019866005611121694, "loss": 1.0405, "step": 10245 }, { "epoch": 0.15, "grad_norm": 0.56640625, "learning_rate": 0.00019865596825469206, "loss": 1.1111, "step": 10250 }, { "epoch": 0.15, "grad_norm": 0.515625, "learning_rate": 0.00019865187421429868, "loss": 0.8356, "step": 10255 }, { "epoch": 0.15, "grad_norm": 0.578125, "learning_rate": 0.00019864777399029353, "loss": 0.8764, "step": 10260 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.00019864366758293352, "loss": 0.9006, "step": 10265 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.0001986395549924761, "loss": 0.9196, "step": 10270 }, { "epoch": 0.15, "grad_norm": 0.5, "learning_rate": 0.00019863543621917898, "loss": 1.0036, "step": 10275 }, { "epoch": 0.15, "grad_norm": 0.55859375, "learning_rate": 0.00019863131126330043, "loss": 0.8909, "step": 10280 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.00019862718012509897, "loss": 1.0645, "step": 10285 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.00019862304280483347, "loss": 1.0257, "step": 10290 }, { "epoch": 0.15, "grad_norm": 0.5703125, "learning_rate": 0.00019861889930276338, "loss": 0.9767, "step": 10295 }, { "epoch": 0.15, "grad_norm": 0.5625, "learning_rate": 0.00019861474961914834, "loss": 0.9614, "step": 10300 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 0.00019861059375424848, "loss": 0.9972, "step": 10305 }, { "epoch": 0.15, "grad_norm": 0.53515625, "learning_rate": 0.00019860643170832432, "loss": 0.9258, "step": 10310 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.0001986022634816367, "loss": 1.049, "step": 10315 }, { "epoch": 0.15, "grad_norm": 0.6015625, "learning_rate": 0.0001985980890744469, "loss": 1.1258, "step": 10320 }, { "epoch": 0.15, "grad_norm": 0.59375, "learning_rate": 0.0001985939084870166, "loss": 1.0743, "step": 10325 }, { "epoch": 0.15, "grad_norm": 0.55078125, "learning_rate": 0.00019858972171960782, "loss": 0.882, "step": 10330 }, { "epoch": 0.15, "grad_norm": 0.5703125, "learning_rate": 0.00019858552877248298, "loss": 1.1144, "step": 10335 }, { "epoch": 0.15, "grad_norm": 0.50390625, "learning_rate": 0.00019858132964590495, "loss": 1.0556, "step": 10340 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.00019857712434013687, "loss": 1.0406, "step": 10345 }, { "epoch": 0.15, "grad_norm": 0.58984375, "learning_rate": 0.00019857291285544238, "loss": 0.9308, "step": 10350 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 0.00019856869519208544, "loss": 0.9467, "step": 10355 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.00019856447135033046, "loss": 0.9573, "step": 10360 }, { "epoch": 0.15, "grad_norm": 0.50390625, "learning_rate": 0.00019856024133044214, "loss": 1.1789, "step": 10365 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.0001985560051326856, "loss": 0.9767, "step": 10370 }, { "epoch": 0.15, "grad_norm": 0.58203125, "learning_rate": 0.00019855176275732647, "loss": 0.9285, "step": 10375 }, { "epoch": 0.15, "grad_norm": 0.6015625, "learning_rate": 0.0001985475142046306, "loss": 0.999, "step": 10380 }, { "epoch": 0.15, "grad_norm": 0.6015625, "learning_rate": 0.00019854325947486428, "loss": 1.1369, "step": 10385 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.00019853899856829424, "loss": 0.9692, "step": 10390 }, { "epoch": 0.15, "grad_norm": 0.5859375, "learning_rate": 0.00019853473148518755, "loss": 0.9785, "step": 10395 }, { "epoch": 0.15, "grad_norm": 0.5625, "learning_rate": 0.00019853045822581166, "loss": 1.0419, "step": 10400 }, { "epoch": 0.15, "grad_norm": 0.578125, "learning_rate": 0.00019852617879043442, "loss": 1.0083, "step": 10405 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 0.0001985218931793241, "loss": 1.0458, "step": 10410 }, { "epoch": 0.15, "grad_norm": 0.64453125, "learning_rate": 0.00019851760139274932, "loss": 0.999, "step": 10415 }, { "epoch": 0.15, "grad_norm": 0.57421875, "learning_rate": 0.00019851330343097903, "loss": 0.8451, "step": 10420 }, { "epoch": 0.15, "grad_norm": 0.50390625, "learning_rate": 0.00019850899929428276, "loss": 1.2424, "step": 10425 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019850468898293015, "loss": 0.8905, "step": 10430 }, { "epoch": 0.15, "grad_norm": 0.515625, "learning_rate": 0.00019850037249719149, "loss": 0.9937, "step": 10435 }, { "epoch": 0.15, "grad_norm": 0.55078125, "learning_rate": 0.0001984960498373373, "loss": 0.9661, "step": 10440 }, { "epoch": 0.15, "grad_norm": 0.62109375, "learning_rate": 0.00019849172100363851, "loss": 1.0552, "step": 10445 }, { "epoch": 0.15, "grad_norm": 0.59375, "learning_rate": 0.00019848738599636647, "loss": 0.9607, "step": 10450 }, { "epoch": 0.15, "grad_norm": 0.54296875, "learning_rate": 0.00019848304481579293, "loss": 0.8086, "step": 10455 }, { "epoch": 0.15, "grad_norm": 0.53125, "learning_rate": 0.00019847869746218995, "loss": 0.9596, "step": 10460 }, { "epoch": 0.15, "grad_norm": 0.609375, "learning_rate": 0.00019847434393583006, "loss": 0.9708, "step": 10465 }, { "epoch": 0.15, "grad_norm": 0.6015625, "learning_rate": 0.00019846998423698616, "loss": 0.9375, "step": 10470 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.00019846561836593148, "loss": 1.0335, "step": 10475 }, { "epoch": 0.15, "grad_norm": 0.51953125, "learning_rate": 0.00019846124632293973, "loss": 0.9171, "step": 10480 }, { "epoch": 0.15, "grad_norm": 0.5859375, "learning_rate": 0.00019845686810828487, "loss": 0.9829, "step": 10485 }, { "epoch": 0.15, "grad_norm": 0.62109375, "learning_rate": 0.00019845248372224144, "loss": 0.9931, "step": 10490 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019844809316508418, "loss": 0.9137, "step": 10495 }, { "epoch": 0.15, "grad_norm": 0.51953125, "learning_rate": 0.00019844369643708828, "loss": 0.9316, "step": 10500 }, { "epoch": 0.15, "grad_norm": 0.54296875, "learning_rate": 0.00019843929353852944, "loss": 0.9184, "step": 10505 }, { "epoch": 0.15, "grad_norm": 0.578125, "learning_rate": 0.0001984348844696835, "loss": 0.9606, "step": 10510 }, { "epoch": 0.15, "grad_norm": 0.54296875, "learning_rate": 0.00019843046923082692, "loss": 0.9036, "step": 10515 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019842604782223643, "loss": 0.9305, "step": 10520 }, { "epoch": 0.15, "grad_norm": 0.54296875, "learning_rate": 0.00019842162024418918, "loss": 0.8998, "step": 10525 }, { "epoch": 0.15, "grad_norm": 0.5703125, "learning_rate": 0.00019841718649696267, "loss": 1.0292, "step": 10530 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.00019841274658083483, "loss": 1.0275, "step": 10535 }, { "epoch": 0.15, "grad_norm": 0.51953125, "learning_rate": 0.00019840830049608395, "loss": 0.8424, "step": 10540 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.00019840384824298867, "loss": 0.9166, "step": 10545 }, { "epoch": 0.15, "grad_norm": 0.6015625, "learning_rate": 0.00019839938982182815, "loss": 1.1204, "step": 10550 }, { "epoch": 0.15, "grad_norm": 0.4921875, "learning_rate": 0.00019839492523288183, "loss": 0.8521, "step": 10555 }, { "epoch": 0.15, "grad_norm": 0.578125, "learning_rate": 0.0001983904544764295, "loss": 0.9154, "step": 10560 }, { "epoch": 0.15, "grad_norm": 0.51953125, "learning_rate": 0.00019838597755275143, "loss": 1.0765, "step": 10565 }, { "epoch": 0.15, "grad_norm": 0.54296875, "learning_rate": 0.00019838149446212825, "loss": 0.899, "step": 10570 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 0.00019837700520484094, "loss": 0.8723, "step": 10575 }, { "epoch": 0.15, "grad_norm": 0.58203125, "learning_rate": 0.0001983725097811709, "loss": 1.09, "step": 10580 }, { "epoch": 0.15, "grad_norm": 0.67578125, "learning_rate": 0.0001983680081913999, "loss": 1.0788, "step": 10585 }, { "epoch": 0.15, "grad_norm": 0.59375, "learning_rate": 0.00019836350043581013, "loss": 0.9981, "step": 10590 }, { "epoch": 0.15, "grad_norm": 0.57421875, "learning_rate": 0.0001983589865146841, "loss": 0.9492, "step": 10595 }, { "epoch": 0.15, "grad_norm": 0.60546875, "learning_rate": 0.00019835446642830484, "loss": 1.0889, "step": 10600 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.00019834994017695556, "loss": 1.0074, "step": 10605 }, { "epoch": 0.15, "grad_norm": 0.53515625, "learning_rate": 0.00019834540776092, "loss": 1.0849, "step": 10610 }, { "epoch": 0.15, "grad_norm": 0.60546875, "learning_rate": 0.0001983408691804823, "loss": 1.0903, "step": 10615 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.00019833632443592693, "loss": 1.02, "step": 10620 }, { "epoch": 0.15, "grad_norm": 0.625, "learning_rate": 0.00019833177352753873, "loss": 0.9941, "step": 10625 }, { "epoch": 0.15, "grad_norm": 0.59375, "learning_rate": 0.000198327216455603, "loss": 1.003, "step": 10630 }, { "epoch": 0.15, "grad_norm": 0.5, "learning_rate": 0.00019832265322040533, "loss": 0.9198, "step": 10635 }, { "epoch": 0.15, "grad_norm": 0.640625, "learning_rate": 0.00019831808382223177, "loss": 1.0734, "step": 10640 }, { "epoch": 0.15, "grad_norm": 0.59375, "learning_rate": 0.00019831350826136877, "loss": 1.1687, "step": 10645 }, { "epoch": 0.15, "grad_norm": 0.482421875, "learning_rate": 0.00019830892653810306, "loss": 1.0696, "step": 10650 }, { "epoch": 0.15, "grad_norm": 0.4921875, "learning_rate": 0.00019830433865272192, "loss": 1.0275, "step": 10655 }, { "epoch": 0.15, "grad_norm": 0.54296875, "learning_rate": 0.00019829974460551286, "loss": 1.0318, "step": 10660 }, { "epoch": 0.15, "grad_norm": 0.56640625, "learning_rate": 0.00019829514439676383, "loss": 0.8166, "step": 10665 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.00019829053802676322, "loss": 1.0499, "step": 10670 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019828592549579974, "loss": 1.0607, "step": 10675 }, { "epoch": 0.15, "grad_norm": 0.498046875, "learning_rate": 0.0001982813068041625, "loss": 1.0365, "step": 10680 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.00019827668195214105, "loss": 0.9428, "step": 10685 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019827205094002524, "loss": 0.9886, "step": 10690 }, { "epoch": 0.15, "grad_norm": 0.58203125, "learning_rate": 0.00019826741376810533, "loss": 1.0607, "step": 10695 }, { "epoch": 0.15, "grad_norm": 0.62890625, "learning_rate": 0.000198262770436672, "loss": 1.0827, "step": 10700 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.00019825812094601632, "loss": 0.9896, "step": 10705 }, { "epoch": 0.15, "grad_norm": 0.62890625, "learning_rate": 0.0001982534652964297, "loss": 0.9955, "step": 10710 }, { "epoch": 0.15, "grad_norm": 0.58984375, "learning_rate": 0.000198248803488204, "loss": 0.8947, "step": 10715 }, { "epoch": 0.15, "grad_norm": 0.578125, "learning_rate": 0.00019824413552163141, "loss": 1.035, "step": 10720 }, { "epoch": 0.15, "grad_norm": 0.5234375, "learning_rate": 0.0001982394613970045, "loss": 0.9582, "step": 10725 }, { "epoch": 0.15, "grad_norm": 0.60546875, "learning_rate": 0.00019823478111461625, "loss": 1.1495, "step": 10730 }, { "epoch": 0.15, "grad_norm": 0.59765625, "learning_rate": 0.00019823009467476006, "loss": 0.8319, "step": 10735 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019822540207772965, "loss": 0.9966, "step": 10740 }, { "epoch": 0.15, "grad_norm": 0.51953125, "learning_rate": 0.00019822070332381917, "loss": 0.9219, "step": 10745 }, { "epoch": 0.15, "grad_norm": 0.53125, "learning_rate": 0.00019821599841332314, "loss": 0.9079, "step": 10750 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.0001982112873465365, "loss": 0.9619, "step": 10755 }, { "epoch": 0.15, "grad_norm": 0.5234375, "learning_rate": 0.00019820657012375445, "loss": 0.9418, "step": 10760 }, { "epoch": 0.15, "grad_norm": 0.58984375, "learning_rate": 0.00019820184674527278, "loss": 1.1247, "step": 10765 }, { "epoch": 0.15, "grad_norm": 0.50390625, "learning_rate": 0.00019819711721138751, "loss": 1.0533, "step": 10770 }, { "epoch": 0.15, "grad_norm": 0.546875, "learning_rate": 0.0001981923815223951, "loss": 0.879, "step": 10775 }, { "epoch": 0.15, "grad_norm": 0.6171875, "learning_rate": 0.0001981876396785924, "loss": 1.1022, "step": 10780 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.0001981828916802766, "loss": 0.9689, "step": 10785 }, { "epoch": 0.15, "grad_norm": 0.53125, "learning_rate": 0.0001981781375277453, "loss": 0.852, "step": 10790 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019817337722129657, "loss": 1.0256, "step": 10795 }, { "epoch": 0.15, "grad_norm": 0.578125, "learning_rate": 0.00019816861076122873, "loss": 0.9881, "step": 10800 }, { "epoch": 0.15, "grad_norm": 0.57421875, "learning_rate": 0.00019816383814784055, "loss": 0.9893, "step": 10805 }, { "epoch": 0.16, "grad_norm": 0.54296875, "learning_rate": 0.0001981590593814312, "loss": 1.0052, "step": 10810 }, { "epoch": 0.16, "grad_norm": 0.6171875, "learning_rate": 0.00019815427446230022, "loss": 1.0588, "step": 10815 }, { "epoch": 0.16, "grad_norm": 0.5546875, "learning_rate": 0.00019814948339074752, "loss": 0.9212, "step": 10820 }, { "epoch": 0.16, "grad_norm": 0.58203125, "learning_rate": 0.00019814468616707344, "loss": 1.0306, "step": 10825 }, { "epoch": 0.16, "grad_norm": 0.53515625, "learning_rate": 0.00019813988279157862, "loss": 0.963, "step": 10830 }, { "epoch": 0.16, "grad_norm": 0.57421875, "learning_rate": 0.0001981350732645642, "loss": 1.1285, "step": 10835 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.0001981302575863316, "loss": 0.8325, "step": 10840 }, { "epoch": 0.16, "grad_norm": 0.6015625, "learning_rate": 0.00019812543575718273, "loss": 0.8524, "step": 10845 }, { "epoch": 0.16, "grad_norm": 0.55078125, "learning_rate": 0.00019812060777741976, "loss": 0.9555, "step": 10850 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.00019811577364734536, "loss": 0.8622, "step": 10855 }, { "epoch": 0.16, "grad_norm": 0.61328125, "learning_rate": 0.0001981109333672625, "loss": 0.9831, "step": 10860 }, { "epoch": 0.16, "grad_norm": 0.5625, "learning_rate": 0.00019810608693747464, "loss": 0.8993, "step": 10865 }, { "epoch": 0.16, "grad_norm": 0.5078125, "learning_rate": 0.0001981012343582855, "loss": 0.905, "step": 10870 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.00019809637562999925, "loss": 1.0481, "step": 10875 }, { "epoch": 0.16, "grad_norm": 0.59375, "learning_rate": 0.00019809151075292046, "loss": 0.9381, "step": 10880 }, { "epoch": 0.16, "grad_norm": 0.59375, "learning_rate": 0.00019808663972735406, "loss": 0.9586, "step": 10885 }, { "epoch": 0.16, "grad_norm": 0.5546875, "learning_rate": 0.00019808176255360537, "loss": 0.9594, "step": 10890 }, { "epoch": 0.16, "grad_norm": 0.63671875, "learning_rate": 0.00019807687923198007, "loss": 1.0916, "step": 10895 }, { "epoch": 0.16, "grad_norm": 0.546875, "learning_rate": 0.00019807198976278434, "loss": 0.9862, "step": 10900 }, { "epoch": 0.16, "grad_norm": 0.494140625, "learning_rate": 0.00019806709414632457, "loss": 0.8475, "step": 10905 }, { "epoch": 0.16, "grad_norm": 0.51171875, "learning_rate": 0.0001980621923829076, "loss": 1.0222, "step": 10910 }, { "epoch": 0.16, "grad_norm": 0.51953125, "learning_rate": 0.00019805728447284078, "loss": 0.9598, "step": 10915 }, { "epoch": 0.16, "grad_norm": 0.546875, "learning_rate": 0.0001980523704164317, "loss": 0.9676, "step": 10920 }, { "epoch": 0.16, "grad_norm": 0.49609375, "learning_rate": 0.00019804745021398835, "loss": 0.9503, "step": 10925 }, { "epoch": 0.16, "grad_norm": 0.6953125, "learning_rate": 0.00019804252386581913, "loss": 0.967, "step": 10930 }, { "epoch": 0.16, "grad_norm": 0.55078125, "learning_rate": 0.0001980375913722329, "loss": 0.8711, "step": 10935 }, { "epoch": 0.16, "grad_norm": 0.5546875, "learning_rate": 0.00019803265273353877, "loss": 0.8866, "step": 10940 }, { "epoch": 0.16, "grad_norm": 0.53125, "learning_rate": 0.0001980277079500463, "loss": 1.1107, "step": 10945 }, { "epoch": 0.16, "grad_norm": 0.578125, "learning_rate": 0.00019802275702206546, "loss": 1.0041, "step": 10950 }, { "epoch": 0.16, "grad_norm": 0.53125, "learning_rate": 0.00019801779994990657, "loss": 0.9462, "step": 10955 }, { "epoch": 0.16, "grad_norm": 0.52734375, "learning_rate": 0.00019801283673388036, "loss": 0.8799, "step": 10960 }, { "epoch": 0.16, "grad_norm": 0.5703125, "learning_rate": 0.00019800786737429792, "loss": 1.0314, "step": 10965 }, { "epoch": 0.16, "grad_norm": 0.578125, "learning_rate": 0.00019800289187147068, "loss": 0.9071, "step": 10970 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.0001979979102257106, "loss": 1.0473, "step": 10975 }, { "epoch": 0.16, "grad_norm": 0.546875, "learning_rate": 0.00019799292243732994, "loss": 0.8929, "step": 10980 }, { "epoch": 0.16, "grad_norm": 0.59375, "learning_rate": 0.00019798792850664123, "loss": 0.9483, "step": 10985 }, { "epoch": 0.16, "grad_norm": 0.65234375, "learning_rate": 0.0001979829284339576, "loss": 1.109, "step": 10990 }, { "epoch": 0.16, "grad_norm": 0.6171875, "learning_rate": 0.00019797792221959242, "loss": 0.9952, "step": 10995 }, { "epoch": 0.16, "grad_norm": 0.53515625, "learning_rate": 0.00019797290986385945, "loss": 0.9381, "step": 11000 }, { "epoch": 0.16, "grad_norm": 0.53125, "learning_rate": 0.00019796789136707296, "loss": 1.0971, "step": 11005 }, { "epoch": 0.16, "grad_norm": 0.49609375, "learning_rate": 0.0001979628667295474, "loss": 0.9669, "step": 11010 }, { "epoch": 0.16, "grad_norm": 0.80859375, "learning_rate": 0.00019795783595159784, "loss": 1.0967, "step": 11015 }, { "epoch": 0.16, "grad_norm": 0.546875, "learning_rate": 0.00019795279903353955, "loss": 0.9798, "step": 11020 }, { "epoch": 0.16, "grad_norm": 0.58203125, "learning_rate": 0.00019794775597568824, "loss": 0.9328, "step": 11025 }, { "epoch": 0.16, "grad_norm": 0.625, "learning_rate": 0.00019794270677836004, "loss": 0.8322, "step": 11030 }, { "epoch": 0.16, "grad_norm": 0.5234375, "learning_rate": 0.00019793765144187145, "loss": 0.8911, "step": 11035 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.0001979325899665393, "loss": 0.9504, "step": 11040 }, { "epoch": 0.16, "grad_norm": 0.61328125, "learning_rate": 0.00019792752235268086, "loss": 1.0508, "step": 11045 }, { "epoch": 0.16, "grad_norm": 0.55078125, "learning_rate": 0.00019792244860061383, "loss": 0.9632, "step": 11050 }, { "epoch": 0.16, "grad_norm": 0.51953125, "learning_rate": 0.00019791736871065617, "loss": 1.0252, "step": 11055 }, { "epoch": 0.16, "grad_norm": 0.69921875, "learning_rate": 0.00019791228268312635, "loss": 0.9486, "step": 11060 }, { "epoch": 0.16, "grad_norm": 0.53515625, "learning_rate": 0.0001979071905183431, "loss": 1.2584, "step": 11065 }, { "epoch": 0.16, "grad_norm": 0.51953125, "learning_rate": 0.00019790209221662566, "loss": 0.8252, "step": 11070 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.00019789698777829357, "loss": 0.9679, "step": 11075 }, { "epoch": 0.16, "grad_norm": 0.53125, "learning_rate": 0.0001978918772036668, "loss": 1.0461, "step": 11080 }, { "epoch": 0.16, "grad_norm": 0.58984375, "learning_rate": 0.0001978867604930657, "loss": 1.0895, "step": 11085 }, { "epoch": 0.16, "grad_norm": 0.55859375, "learning_rate": 0.00019788163764681093, "loss": 1.0478, "step": 11090 }, { "epoch": 0.16, "grad_norm": 0.55078125, "learning_rate": 0.00019787650866522364, "loss": 0.9994, "step": 11095 }, { "epoch": 0.16, "grad_norm": 0.5859375, "learning_rate": 0.00019787137354862532, "loss": 1.0482, "step": 11100 }, { "epoch": 0.16, "grad_norm": 0.61328125, "learning_rate": 0.00019786623229733785, "loss": 1.0325, "step": 11105 }, { "epoch": 0.16, "grad_norm": 0.49609375, "learning_rate": 0.00019786108491168347, "loss": 0.9032, "step": 11110 }, { "epoch": 0.16, "grad_norm": 0.53515625, "learning_rate": 0.00019785593139198484, "loss": 0.9816, "step": 11115 }, { "epoch": 0.16, "grad_norm": 0.5546875, "learning_rate": 0.00019785077173856496, "loss": 0.9775, "step": 11120 }, { "epoch": 0.16, "grad_norm": 0.4609375, "learning_rate": 0.00019784560595174732, "loss": 0.9643, "step": 11125 }, { "epoch": 0.16, "grad_norm": 0.6171875, "learning_rate": 0.00019784043403185558, "loss": 0.98, "step": 11130 }, { "epoch": 0.16, "grad_norm": 0.6328125, "learning_rate": 0.00019783525597921408, "loss": 1.0203, "step": 11135 }, { "epoch": 0.16, "grad_norm": 0.76171875, "learning_rate": 0.00019783007179414728, "loss": 1.126, "step": 11140 }, { "epoch": 0.16, "grad_norm": 0.62890625, "learning_rate": 0.00019782488147698015, "loss": 1.0501, "step": 11145 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.00019781968502803805, "loss": 0.8757, "step": 11150 }, { "epoch": 0.16, "grad_norm": 0.58203125, "learning_rate": 0.00019781448244764665, "loss": 0.9667, "step": 11155 }, { "epoch": 0.16, "grad_norm": 0.53125, "learning_rate": 0.00019780927373613217, "loss": 0.8694, "step": 11160 }, { "epoch": 0.16, "grad_norm": 0.53515625, "learning_rate": 0.00019780405889382094, "loss": 1.0174, "step": 11165 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.00019779883792103996, "loss": 1.0074, "step": 11170 }, { "epoch": 0.16, "grad_norm": 0.5625, "learning_rate": 0.0001977936108181164, "loss": 0.9958, "step": 11175 }, { "epoch": 0.16, "grad_norm": 0.859375, "learning_rate": 0.00019778837758537793, "loss": 1.0806, "step": 11180 }, { "epoch": 0.16, "grad_norm": 0.5390625, "learning_rate": 0.0001977831382231526, "loss": 1.0919, "step": 11185 }, { "epoch": 0.16, "grad_norm": 0.6015625, "learning_rate": 0.0001977778927317688, "loss": 1.0725, "step": 11190 }, { "epoch": 0.16, "grad_norm": 0.6171875, "learning_rate": 0.00019777264111155534, "loss": 1.0368, "step": 11195 }, { "epoch": 0.16, "grad_norm": 0.609375, "learning_rate": 0.00019776738336284134, "loss": 0.9784, "step": 11200 }, { "epoch": 0.16, "grad_norm": 0.6171875, "learning_rate": 0.00019776211948595646, "loss": 1.0625, "step": 11205 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.00019775684948123052, "loss": 1.036, "step": 11210 }, { "epoch": 0.16, "grad_norm": 0.46484375, "learning_rate": 0.00019775157334899396, "loss": 0.8934, "step": 11215 }, { "epoch": 0.16, "grad_norm": 0.62890625, "learning_rate": 0.00019774629108957746, "loss": 0.9584, "step": 11220 }, { "epoch": 0.16, "grad_norm": 0.55078125, "learning_rate": 0.0001977410027033121, "loss": 1.0195, "step": 11225 }, { "epoch": 0.16, "grad_norm": 0.578125, "learning_rate": 0.00019773570819052938, "loss": 1.1691, "step": 11230 }, { "epoch": 0.16, "grad_norm": 0.5546875, "learning_rate": 0.00019773040755156115, "loss": 0.9637, "step": 11235 }, { "epoch": 0.16, "grad_norm": 0.55078125, "learning_rate": 0.00019772510078673965, "loss": 1.0448, "step": 11240 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.00019771978789639758, "loss": 0.9052, "step": 11245 }, { "epoch": 0.16, "grad_norm": 0.50390625, "learning_rate": 0.00019771446888086787, "loss": 1.0487, "step": 11250 }, { "epoch": 0.16, "grad_norm": 0.59765625, "learning_rate": 0.00019770914374048397, "loss": 1.0113, "step": 11255 }, { "epoch": 0.16, "grad_norm": 0.59375, "learning_rate": 0.00019770381247557968, "loss": 0.9715, "step": 11260 }, { "epoch": 0.16, "grad_norm": 0.52734375, "learning_rate": 0.00019769847508648911, "loss": 0.8075, "step": 11265 }, { "epoch": 0.16, "grad_norm": 0.60546875, "learning_rate": 0.0001976931315735469, "loss": 0.871, "step": 11270 }, { "epoch": 0.16, "grad_norm": 0.52734375, "learning_rate": 0.00019768778193708793, "loss": 1.0295, "step": 11275 }, { "epoch": 0.16, "grad_norm": 0.57421875, "learning_rate": 0.0001976824261774475, "loss": 1.0336, "step": 11280 }, { "epoch": 0.16, "grad_norm": 0.6171875, "learning_rate": 0.0001976770642949614, "loss": 1.0173, "step": 11285 }, { "epoch": 0.16, "grad_norm": 0.5625, "learning_rate": 0.00019767169628996558, "loss": 0.9169, "step": 11290 }, { "epoch": 0.16, "grad_norm": 0.55859375, "learning_rate": 0.00019766632216279664, "loss": 1.0464, "step": 11295 }, { "epoch": 0.16, "grad_norm": 0.64453125, "learning_rate": 0.0001976609419137914, "loss": 1.0582, "step": 11300 }, { "epoch": 0.16, "grad_norm": 0.5859375, "learning_rate": 0.00019765555554328713, "loss": 0.9371, "step": 11305 }, { "epoch": 0.16, "grad_norm": 0.51171875, "learning_rate": 0.0001976501630516214, "loss": 0.9651, "step": 11310 }, { "epoch": 0.16, "grad_norm": 0.55078125, "learning_rate": 0.00019764476443913221, "loss": 1.0393, "step": 11315 }, { "epoch": 0.16, "grad_norm": 0.671875, "learning_rate": 0.00019763935970615798, "loss": 1.0131, "step": 11320 }, { "epoch": 0.16, "grad_norm": 0.67578125, "learning_rate": 0.00019763394885303747, "loss": 1.0477, "step": 11325 }, { "epoch": 0.16, "grad_norm": 0.65234375, "learning_rate": 0.00019762853188010987, "loss": 0.9775, "step": 11330 }, { "epoch": 0.16, "grad_norm": 0.578125, "learning_rate": 0.00019762310878771472, "loss": 1.0786, "step": 11335 }, { "epoch": 0.16, "grad_norm": 0.54296875, "learning_rate": 0.00019761767957619187, "loss": 0.9287, "step": 11340 }, { "epoch": 0.16, "grad_norm": 0.6171875, "learning_rate": 0.00019761224424588174, "loss": 1.0053, "step": 11345 }, { "epoch": 0.16, "grad_norm": 0.5390625, "learning_rate": 0.00019760680279712497, "loss": 0.963, "step": 11350 }, { "epoch": 0.16, "grad_norm": 0.5390625, "learning_rate": 0.00019760135523026263, "loss": 0.9512, "step": 11355 }, { "epoch": 0.16, "grad_norm": 0.5625, "learning_rate": 0.00019759590154563617, "loss": 0.987, "step": 11360 }, { "epoch": 0.16, "grad_norm": 0.5703125, "learning_rate": 0.00019759044174358744, "loss": 0.9325, "step": 11365 }, { "epoch": 0.16, "grad_norm": 0.53515625, "learning_rate": 0.0001975849758244587, "loss": 0.9854, "step": 11370 }, { "epoch": 0.16, "grad_norm": 0.5390625, "learning_rate": 0.00019757950378859252, "loss": 1.0722, "step": 11375 }, { "epoch": 0.16, "grad_norm": 0.5625, "learning_rate": 0.00019757402563633193, "loss": 1.0457, "step": 11380 }, { "epoch": 0.16, "grad_norm": 0.5234375, "learning_rate": 0.00019756854136802025, "loss": 0.8436, "step": 11385 }, { "epoch": 0.16, "grad_norm": 0.64453125, "learning_rate": 0.00019756305098400133, "loss": 1.1211, "step": 11390 }, { "epoch": 0.16, "grad_norm": 0.6640625, "learning_rate": 0.0001975575544846192, "loss": 1.0195, "step": 11395 }, { "epoch": 0.16, "grad_norm": 0.57421875, "learning_rate": 0.00019755205187021847, "loss": 1.0107, "step": 11400 }, { "epoch": 0.16, "grad_norm": 0.578125, "learning_rate": 0.00019754654314114404, "loss": 0.9498, "step": 11405 }, { "epoch": 0.16, "grad_norm": 0.5625, "learning_rate": 0.00019754102829774118, "loss": 1.0059, "step": 11410 }, { "epoch": 0.16, "grad_norm": 0.51953125, "learning_rate": 0.00019753550734035558, "loss": 0.8315, "step": 11415 }, { "epoch": 0.16, "grad_norm": 0.55078125, "learning_rate": 0.00019752998026933326, "loss": 1.0347, "step": 11420 }, { "epoch": 0.16, "grad_norm": 0.53515625, "learning_rate": 0.00019752444708502073, "loss": 0.9961, "step": 11425 }, { "epoch": 0.16, "grad_norm": 0.546875, "learning_rate": 0.0001975189077877648, "loss": 1.0512, "step": 11430 }, { "epoch": 0.16, "grad_norm": 0.69140625, "learning_rate": 0.00019751336237791263, "loss": 1.0064, "step": 11435 }, { "epoch": 0.16, "grad_norm": 0.6015625, "learning_rate": 0.0001975078108558119, "loss": 0.987, "step": 11440 }, { "epoch": 0.16, "grad_norm": 0.55078125, "learning_rate": 0.0001975022532218105, "loss": 1.0732, "step": 11445 }, { "epoch": 0.16, "grad_norm": 0.5390625, "learning_rate": 0.00019749668947625682, "loss": 0.9559, "step": 11450 }, { "epoch": 0.16, "grad_norm": 0.5546875, "learning_rate": 0.0001974911196194996, "loss": 0.9522, "step": 11455 }, { "epoch": 0.16, "grad_norm": 0.69921875, "learning_rate": 0.00019748554365188798, "loss": 0.9707, "step": 11460 }, { "epoch": 0.16, "grad_norm": 0.55859375, "learning_rate": 0.00019747996157377148, "loss": 0.8298, "step": 11465 }, { "epoch": 0.16, "grad_norm": 0.5859375, "learning_rate": 0.00019747437338549993, "loss": 0.9397, "step": 11470 }, { "epoch": 0.16, "grad_norm": 0.5546875, "learning_rate": 0.00019746877908742363, "loss": 0.8971, "step": 11475 }, { "epoch": 0.16, "grad_norm": 0.58203125, "learning_rate": 0.00019746317867989327, "loss": 1.0147, "step": 11480 }, { "epoch": 0.16, "grad_norm": 0.5390625, "learning_rate": 0.0001974575721632599, "loss": 0.9297, "step": 11485 }, { "epoch": 0.16, "grad_norm": 0.53515625, "learning_rate": 0.00019745195953787487, "loss": 0.9276, "step": 11490 }, { "epoch": 0.16, "grad_norm": 0.52734375, "learning_rate": 0.00019744634080409005, "loss": 0.8326, "step": 11495 }, { "epoch": 0.16, "grad_norm": 0.498046875, "learning_rate": 0.00019744071596225759, "loss": 0.9892, "step": 11500 }, { "epoch": 0.17, "grad_norm": 0.56640625, "learning_rate": 0.0001974350850127301, "loss": 1.118, "step": 11505 }, { "epoch": 0.17, "grad_norm": 0.56640625, "learning_rate": 0.0001974294479558605, "loss": 0.8706, "step": 11510 }, { "epoch": 0.17, "grad_norm": 0.52734375, "learning_rate": 0.00019742380479200212, "loss": 0.9876, "step": 11515 }, { "epoch": 0.17, "grad_norm": 0.6328125, "learning_rate": 0.00019741815552150874, "loss": 1.0086, "step": 11520 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.0001974125001447344, "loss": 0.8989, "step": 11525 }, { "epoch": 0.17, "grad_norm": 0.58203125, "learning_rate": 0.0001974068386620336, "loss": 0.7927, "step": 11530 }, { "epoch": 0.17, "grad_norm": 0.5703125, "learning_rate": 0.0001974011710737612, "loss": 1.1665, "step": 11535 }, { "epoch": 0.17, "grad_norm": 0.6640625, "learning_rate": 0.00019739549738027252, "loss": 0.921, "step": 11540 }, { "epoch": 0.17, "grad_norm": 0.6328125, "learning_rate": 0.0001973898175819231, "loss": 1.1424, "step": 11545 }, { "epoch": 0.17, "grad_norm": 0.482421875, "learning_rate": 0.000197384131679069, "loss": 1.1059, "step": 11550 }, { "epoch": 0.17, "grad_norm": 0.56640625, "learning_rate": 0.0001973784396720666, "loss": 1.1901, "step": 11555 }, { "epoch": 0.17, "grad_norm": 0.64453125, "learning_rate": 0.00019737274156127272, "loss": 0.9919, "step": 11560 }, { "epoch": 0.17, "grad_norm": 0.53125, "learning_rate": 0.0001973670373470445, "loss": 0.9388, "step": 11565 }, { "epoch": 0.17, "grad_norm": 0.51953125, "learning_rate": 0.0001973613270297395, "loss": 1.0375, "step": 11570 }, { "epoch": 0.17, "grad_norm": 0.625, "learning_rate": 0.00019735561060971565, "loss": 0.942, "step": 11575 }, { "epoch": 0.17, "grad_norm": 0.55078125, "learning_rate": 0.0001973498880873312, "loss": 1.0162, "step": 11580 }, { "epoch": 0.17, "grad_norm": 0.486328125, "learning_rate": 0.00019734415946294492, "loss": 1.0539, "step": 11585 }, { "epoch": 0.17, "grad_norm": 0.60546875, "learning_rate": 0.00019733842473691583, "loss": 1.2076, "step": 11590 }, { "epoch": 0.17, "grad_norm": 0.498046875, "learning_rate": 0.00019733268390960344, "loss": 0.9424, "step": 11595 }, { "epoch": 0.17, "grad_norm": 0.62109375, "learning_rate": 0.00019732693698136758, "loss": 0.8787, "step": 11600 }, { "epoch": 0.17, "grad_norm": 0.546875, "learning_rate": 0.00019732118395256846, "loss": 1.0662, "step": 11605 }, { "epoch": 0.17, "grad_norm": 0.59375, "learning_rate": 0.0001973154248235667, "loss": 1.0387, "step": 11610 }, { "epoch": 0.17, "grad_norm": 0.578125, "learning_rate": 0.00019730965959472325, "loss": 1.0715, "step": 11615 }, { "epoch": 0.17, "grad_norm": 0.6484375, "learning_rate": 0.00019730388826639953, "loss": 0.9547, "step": 11620 }, { "epoch": 0.17, "grad_norm": 0.51171875, "learning_rate": 0.00019729811083895723, "loss": 0.8558, "step": 11625 }, { "epoch": 0.17, "grad_norm": 0.55078125, "learning_rate": 0.00019729232731275858, "loss": 0.8651, "step": 11630 }, { "epoch": 0.17, "grad_norm": 0.60546875, "learning_rate": 0.00019728653768816604, "loss": 1.0039, "step": 11635 }, { "epoch": 0.17, "grad_norm": 0.51953125, "learning_rate": 0.0001972807419655425, "loss": 0.8732, "step": 11640 }, { "epoch": 0.17, "grad_norm": 0.52734375, "learning_rate": 0.0001972749401452513, "loss": 1.0182, "step": 11645 }, { "epoch": 0.17, "grad_norm": 0.60546875, "learning_rate": 0.00019726913222765601, "loss": 0.9004, "step": 11650 }, { "epoch": 0.17, "grad_norm": 0.59375, "learning_rate": 0.00019726331821312077, "loss": 1.0023, "step": 11655 }, { "epoch": 0.17, "grad_norm": 0.5078125, "learning_rate": 0.00019725749810200994, "loss": 0.8979, "step": 11660 }, { "epoch": 0.17, "grad_norm": 0.625, "learning_rate": 0.00019725167189468842, "loss": 1.0709, "step": 11665 }, { "epoch": 0.17, "grad_norm": 0.6015625, "learning_rate": 0.00019724583959152128, "loss": 1.1703, "step": 11670 }, { "epoch": 0.17, "grad_norm": 0.54296875, "learning_rate": 0.0001972400011928742, "loss": 1.0387, "step": 11675 }, { "epoch": 0.17, "grad_norm": 0.58203125, "learning_rate": 0.00019723415669911307, "loss": 1.0629, "step": 11680 }, { "epoch": 0.17, "grad_norm": 0.62890625, "learning_rate": 0.0001972283061106043, "loss": 0.961, "step": 11685 }, { "epoch": 0.17, "grad_norm": 0.52734375, "learning_rate": 0.00019722244942771452, "loss": 1.0338, "step": 11690 }, { "epoch": 0.17, "grad_norm": 0.62109375, "learning_rate": 0.0001972165866508109, "loss": 0.9863, "step": 11695 }, { "epoch": 0.17, "grad_norm": 0.55859375, "learning_rate": 0.0001972107177802609, "loss": 0.982, "step": 11700 }, { "epoch": 0.17, "grad_norm": 0.71484375, "learning_rate": 0.0001972048428164324, "loss": 1.0622, "step": 11705 }, { "epoch": 0.17, "grad_norm": 0.546875, "learning_rate": 0.00019719896175969369, "loss": 0.9321, "step": 11710 }, { "epoch": 0.17, "grad_norm": 0.60546875, "learning_rate": 0.00019719307461041333, "loss": 0.9296, "step": 11715 }, { "epoch": 0.17, "grad_norm": 0.5859375, "learning_rate": 0.00019718718136896033, "loss": 0.9618, "step": 11720 }, { "epoch": 0.17, "grad_norm": 0.61328125, "learning_rate": 0.00019718128203570414, "loss": 1.0673, "step": 11725 }, { "epoch": 0.17, "grad_norm": 0.57421875, "learning_rate": 0.0001971753766110145, "loss": 0.816, "step": 11730 }, { "epoch": 0.17, "grad_norm": 0.71484375, "learning_rate": 0.00019716946509526156, "loss": 0.9263, "step": 11735 }, { "epoch": 0.17, "grad_norm": 0.62109375, "learning_rate": 0.00019716354748881593, "loss": 1.0468, "step": 11740 }, { "epoch": 0.17, "grad_norm": 0.51953125, "learning_rate": 0.00019715762379204845, "loss": 0.867, "step": 11745 }, { "epoch": 0.17, "grad_norm": 0.5703125, "learning_rate": 0.00019715169400533048, "loss": 1.0149, "step": 11750 }, { "epoch": 0.17, "grad_norm": 0.58984375, "learning_rate": 0.0001971457581290337, "loss": 0.7832, "step": 11755 }, { "epoch": 0.17, "grad_norm": 0.55078125, "learning_rate": 0.0001971398161635301, "loss": 1.0403, "step": 11760 }, { "epoch": 0.17, "grad_norm": 0.44921875, "learning_rate": 0.00019713386810919222, "loss": 1.0341, "step": 11765 }, { "epoch": 0.17, "grad_norm": 0.546875, "learning_rate": 0.00019712791396639287, "loss": 0.9868, "step": 11770 }, { "epoch": 0.17, "grad_norm": 0.62890625, "learning_rate": 0.00019712195373550526, "loss": 1.0789, "step": 11775 }, { "epoch": 0.17, "grad_norm": 0.62109375, "learning_rate": 0.00019711598741690295, "loss": 1.1115, "step": 11780 }, { "epoch": 0.17, "grad_norm": 0.61328125, "learning_rate": 0.00019711001501096, "loss": 1.047, "step": 11785 }, { "epoch": 0.17, "grad_norm": 0.50390625, "learning_rate": 0.00019710403651805063, "loss": 1.0612, "step": 11790 }, { "epoch": 0.17, "grad_norm": 0.65234375, "learning_rate": 0.0001970980519385497, "loss": 1.044, "step": 11795 }, { "epoch": 0.17, "grad_norm": 0.62109375, "learning_rate": 0.0001970920612728323, "loss": 1.0137, "step": 11800 }, { "epoch": 0.17, "grad_norm": 0.6796875, "learning_rate": 0.0001970860645212739, "loss": 1.017, "step": 11805 }, { "epoch": 0.17, "grad_norm": 0.6171875, "learning_rate": 0.0001970800616842504, "loss": 1.0174, "step": 11810 }, { "epoch": 0.17, "grad_norm": 0.578125, "learning_rate": 0.00019707405276213807, "loss": 1.0831, "step": 11815 }, { "epoch": 0.17, "grad_norm": 0.51171875, "learning_rate": 0.00019706803775531358, "loss": 1.0993, "step": 11820 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.0001970620166641539, "loss": 1.0144, "step": 11825 }, { "epoch": 0.17, "grad_norm": 0.439453125, "learning_rate": 0.00019705598948903649, "loss": 0.8345, "step": 11830 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.00019704995623033914, "loss": 0.954, "step": 11835 }, { "epoch": 0.17, "grad_norm": 0.51953125, "learning_rate": 0.00019704391688843997, "loss": 0.9742, "step": 11840 }, { "epoch": 0.17, "grad_norm": 0.55859375, "learning_rate": 0.00019703787146371757, "loss": 0.858, "step": 11845 }, { "epoch": 0.17, "grad_norm": 0.5390625, "learning_rate": 0.0001970318199565509, "loss": 0.9053, "step": 11850 }, { "epoch": 0.17, "grad_norm": 0.58984375, "learning_rate": 0.0001970257623673192, "loss": 0.9909, "step": 11855 }, { "epoch": 0.17, "grad_norm": 0.66015625, "learning_rate": 0.0001970196986964022, "loss": 1.0612, "step": 11860 }, { "epoch": 0.17, "grad_norm": 0.55078125, "learning_rate": 0.00019701362894418005, "loss": 0.9934, "step": 11865 }, { "epoch": 0.17, "grad_norm": 0.52734375, "learning_rate": 0.00019700755311103315, "loss": 0.9206, "step": 11870 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.0001970014711973423, "loss": 0.8881, "step": 11875 }, { "epoch": 0.17, "grad_norm": 0.625, "learning_rate": 0.00019699538320348876, "loss": 0.99, "step": 11880 }, { "epoch": 0.17, "grad_norm": 0.65234375, "learning_rate": 0.00019698928912985415, "loss": 1.1218, "step": 11885 }, { "epoch": 0.17, "grad_norm": 0.58984375, "learning_rate": 0.00019698318897682043, "loss": 0.842, "step": 11890 }, { "epoch": 0.17, "grad_norm": 0.734375, "learning_rate": 0.00019697708274476995, "loss": 0.9669, "step": 11895 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.0001969709704340855, "loss": 1.0681, "step": 11900 }, { "epoch": 0.17, "grad_norm": 0.54296875, "learning_rate": 0.0001969648520451502, "loss": 0.9509, "step": 11905 }, { "epoch": 0.17, "grad_norm": 0.5390625, "learning_rate": 0.0001969587275783475, "loss": 0.9891, "step": 11910 }, { "epoch": 0.17, "grad_norm": 0.55859375, "learning_rate": 0.00019695259703406134, "loss": 0.9673, "step": 11915 }, { "epoch": 0.17, "grad_norm": 0.5625, "learning_rate": 0.00019694646041267598, "loss": 1.0024, "step": 11920 }, { "epoch": 0.17, "grad_norm": 0.53515625, "learning_rate": 0.00019694031771457608, "loss": 1.1921, "step": 11925 }, { "epoch": 0.17, "grad_norm": 0.4765625, "learning_rate": 0.00019693416894014666, "loss": 0.9211, "step": 11930 }, { "epoch": 0.17, "grad_norm": 0.55859375, "learning_rate": 0.00019692801408977313, "loss": 0.9258, "step": 11935 }, { "epoch": 0.17, "grad_norm": 0.55078125, "learning_rate": 0.00019692185316384127, "loss": 1.2574, "step": 11940 }, { "epoch": 0.17, "grad_norm": 0.62890625, "learning_rate": 0.0001969156861627373, "loss": 1.0631, "step": 11945 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.0001969095130868477, "loss": 0.9337, "step": 11950 }, { "epoch": 0.17, "grad_norm": 0.66796875, "learning_rate": 0.00019690333393655948, "loss": 1.0891, "step": 11955 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.0001968971487122599, "loss": 0.8698, "step": 11960 }, { "epoch": 0.17, "grad_norm": 0.59765625, "learning_rate": 0.00019689095741433672, "loss": 1.0847, "step": 11965 }, { "epoch": 0.17, "grad_norm": 0.5859375, "learning_rate": 0.00019688476004317795, "loss": 1.0954, "step": 11970 }, { "epoch": 0.17, "grad_norm": 0.59375, "learning_rate": 0.00019687855659917207, "loss": 1.0362, "step": 11975 }, { "epoch": 0.17, "grad_norm": 0.7109375, "learning_rate": 0.00019687234708270793, "loss": 0.9058, "step": 11980 }, { "epoch": 0.17, "grad_norm": 0.50390625, "learning_rate": 0.00019686613149417478, "loss": 0.9234, "step": 11985 }, { "epoch": 0.17, "grad_norm": 0.56640625, "learning_rate": 0.00019685990983396215, "loss": 0.9797, "step": 11990 }, { "epoch": 0.17, "grad_norm": 0.5859375, "learning_rate": 0.00019685368210246008, "loss": 0.9666, "step": 11995 }, { "epoch": 0.17, "grad_norm": 0.578125, "learning_rate": 0.00019684744830005893, "loss": 0.9774, "step": 12000 }, { "epoch": 0.17, "grad_norm": 0.5625, "learning_rate": 0.00019684120842714938, "loss": 1.1606, "step": 12005 }, { "epoch": 0.17, "grad_norm": 0.51171875, "learning_rate": 0.0001968349624841226, "loss": 0.9442, "step": 12010 }, { "epoch": 0.17, "grad_norm": 0.53515625, "learning_rate": 0.0001968287104713701, "loss": 1.0067, "step": 12015 }, { "epoch": 0.17, "grad_norm": 0.5078125, "learning_rate": 0.00019682245238928373, "loss": 1.0373, "step": 12020 }, { "epoch": 0.17, "grad_norm": 0.609375, "learning_rate": 0.00019681618823825581, "loss": 1.0156, "step": 12025 }, { "epoch": 0.17, "grad_norm": 0.53125, "learning_rate": 0.00019680991801867892, "loss": 0.9226, "step": 12030 }, { "epoch": 0.17, "grad_norm": 0.54296875, "learning_rate": 0.00019680364173094609, "loss": 0.9687, "step": 12035 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.00019679735937545075, "loss": 0.9303, "step": 12040 }, { "epoch": 0.17, "grad_norm": 0.59765625, "learning_rate": 0.00019679107095258672, "loss": 0.9594, "step": 12045 }, { "epoch": 0.17, "grad_norm": 0.59765625, "learning_rate": 0.0001967847764627481, "loss": 0.9388, "step": 12050 }, { "epoch": 0.17, "grad_norm": 0.53515625, "learning_rate": 0.00019677847590632947, "loss": 0.8384, "step": 12055 }, { "epoch": 0.17, "grad_norm": 0.5625, "learning_rate": 0.00019677216928372574, "loss": 1.0197, "step": 12060 }, { "epoch": 0.17, "grad_norm": 0.53515625, "learning_rate": 0.00019676585659533224, "loss": 0.9054, "step": 12065 }, { "epoch": 0.17, "grad_norm": 0.4765625, "learning_rate": 0.00019675953784154465, "loss": 0.8919, "step": 12070 }, { "epoch": 0.17, "grad_norm": 0.609375, "learning_rate": 0.000196753213022759, "loss": 0.9564, "step": 12075 }, { "epoch": 0.17, "grad_norm": 0.6875, "learning_rate": 0.00019674688213937178, "loss": 0.9152, "step": 12080 }, { "epoch": 0.17, "grad_norm": 0.58203125, "learning_rate": 0.00019674054519177981, "loss": 1.0695, "step": 12085 }, { "epoch": 0.17, "grad_norm": 0.5703125, "learning_rate": 0.00019673420218038027, "loss": 1.0131, "step": 12090 }, { "epoch": 0.17, "grad_norm": 0.51953125, "learning_rate": 0.00019672785310557078, "loss": 1.0804, "step": 12095 }, { "epoch": 0.17, "grad_norm": 0.5703125, "learning_rate": 0.00019672149796774927, "loss": 0.9286, "step": 12100 }, { "epoch": 0.17, "grad_norm": 0.56640625, "learning_rate": 0.00019671513676731414, "loss": 0.992, "step": 12105 }, { "epoch": 0.17, "grad_norm": 0.57421875, "learning_rate": 0.0001967087695046641, "loss": 1.0514, "step": 12110 }, { "epoch": 0.17, "grad_norm": 0.58984375, "learning_rate": 0.0001967023961801982, "loss": 1.1135, "step": 12115 }, { "epoch": 0.17, "grad_norm": 0.46875, "learning_rate": 0.000196696016794316, "loss": 0.8374, "step": 12120 }, { "epoch": 0.17, "grad_norm": 0.6015625, "learning_rate": 0.00019668963134741733, "loss": 1.0395, "step": 12125 }, { "epoch": 0.17, "grad_norm": 0.51171875, "learning_rate": 0.00019668323983990246, "loss": 0.776, "step": 12130 }, { "epoch": 0.17, "grad_norm": 0.57421875, "learning_rate": 0.000196676842272172, "loss": 0.9524, "step": 12135 }, { "epoch": 0.17, "grad_norm": 0.6328125, "learning_rate": 0.00019667043864462694, "loss": 0.8601, "step": 12140 }, { "epoch": 0.17, "grad_norm": 0.58203125, "learning_rate": 0.00019666402895766871, "loss": 0.98, "step": 12145 }, { "epoch": 0.17, "grad_norm": 0.546875, "learning_rate": 0.00019665761321169907, "loss": 0.921, "step": 12150 }, { "epoch": 0.17, "grad_norm": 0.455078125, "learning_rate": 0.0001966511914071201, "loss": 0.9572, "step": 12155 }, { "epoch": 0.17, "grad_norm": 0.54296875, "learning_rate": 0.00019664476354433444, "loss": 0.9071, "step": 12160 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.00019663832962374487, "loss": 0.9123, "step": 12165 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.00019663188964575475, "loss": 0.8975, "step": 12170 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.00019662544361076774, "loss": 0.9547, "step": 12175 }, { "epoch": 0.17, "grad_norm": 0.59765625, "learning_rate": 0.0001966189915191879, "loss": 1.0201, "step": 12180 }, { "epoch": 0.17, "grad_norm": 0.59375, "learning_rate": 0.00019661253337141963, "loss": 0.985, "step": 12185 }, { "epoch": 0.17, "grad_norm": 0.9765625, "learning_rate": 0.00019660606916786772, "loss": 0.9262, "step": 12190 }, { "epoch": 0.17, "grad_norm": 0.57421875, "learning_rate": 0.00019659959890893736, "loss": 0.9385, "step": 12195 }, { "epoch": 0.18, "grad_norm": 0.53515625, "learning_rate": 0.00019659312259503412, "loss": 0.9243, "step": 12200 }, { "epoch": 0.18, "grad_norm": 0.546875, "learning_rate": 0.00019658664022656396, "loss": 0.9716, "step": 12205 }, { "epoch": 0.18, "grad_norm": 0.6015625, "learning_rate": 0.00019658015180393318, "loss": 1.0187, "step": 12210 }, { "epoch": 0.18, "grad_norm": 0.52734375, "learning_rate": 0.0001965736573275485, "loss": 0.9634, "step": 12215 }, { "epoch": 0.18, "grad_norm": 0.5546875, "learning_rate": 0.00019656715679781698, "loss": 0.9778, "step": 12220 }, { "epoch": 0.18, "grad_norm": 0.5390625, "learning_rate": 0.0001965606502151461, "loss": 0.8875, "step": 12225 }, { "epoch": 0.18, "grad_norm": 0.6015625, "learning_rate": 0.0001965541375799437, "loss": 0.9571, "step": 12230 }, { "epoch": 0.18, "grad_norm": 0.58203125, "learning_rate": 0.000196547618892618, "loss": 0.9802, "step": 12235 }, { "epoch": 0.18, "grad_norm": 0.66015625, "learning_rate": 0.00019654109415357754, "loss": 1.049, "step": 12240 }, { "epoch": 0.18, "grad_norm": 0.61328125, "learning_rate": 0.0001965345633632314, "loss": 1.0749, "step": 12245 }, { "epoch": 0.18, "grad_norm": 0.5703125, "learning_rate": 0.00019652802652198886, "loss": 0.8459, "step": 12250 }, { "epoch": 0.18, "grad_norm": 0.578125, "learning_rate": 0.00019652148363025966, "loss": 1.0139, "step": 12255 }, { "epoch": 0.18, "grad_norm": 0.48828125, "learning_rate": 0.00019651493468845398, "loss": 0.8877, "step": 12260 }, { "epoch": 0.18, "grad_norm": 0.60546875, "learning_rate": 0.00019650837969698224, "loss": 0.9687, "step": 12265 }, { "epoch": 0.18, "grad_norm": 0.60546875, "learning_rate": 0.00019650181865625536, "loss": 0.9932, "step": 12270 }, { "epoch": 0.18, "grad_norm": 0.53125, "learning_rate": 0.0001964952515666846, "loss": 0.886, "step": 12275 }, { "epoch": 0.18, "grad_norm": 0.66015625, "learning_rate": 0.00019648867842868154, "loss": 0.963, "step": 12280 }, { "epoch": 0.18, "grad_norm": 0.6171875, "learning_rate": 0.00019648209924265825, "loss": 1.1426, "step": 12285 }, { "epoch": 0.18, "grad_norm": 0.578125, "learning_rate": 0.00019647551400902704, "loss": 0.9924, "step": 12290 }, { "epoch": 0.18, "grad_norm": 0.5625, "learning_rate": 0.00019646892272820078, "loss": 1.0424, "step": 12295 }, { "epoch": 0.18, "grad_norm": 0.56640625, "learning_rate": 0.00019646232540059257, "loss": 1.1567, "step": 12300 }, { "epoch": 0.18, "grad_norm": 0.54296875, "learning_rate": 0.00019645572202661596, "loss": 1.0748, "step": 12305 }, { "epoch": 0.18, "grad_norm": 0.5546875, "learning_rate": 0.00019644911260668482, "loss": 0.9622, "step": 12310 }, { "epoch": 0.18, "grad_norm": 0.5390625, "learning_rate": 0.00019644249714121346, "loss": 0.9486, "step": 12315 }, { "epoch": 0.18, "grad_norm": 0.546875, "learning_rate": 0.00019643587563061653, "loss": 0.9666, "step": 12320 }, { "epoch": 0.18, "grad_norm": 0.58984375, "learning_rate": 0.0001964292480753091, "loss": 1.1216, "step": 12325 }, { "epoch": 0.18, "grad_norm": 0.6171875, "learning_rate": 0.00019642261447570656, "loss": 1.001, "step": 12330 }, { "epoch": 0.18, "grad_norm": 0.5703125, "learning_rate": 0.00019641597483222476, "loss": 0.9032, "step": 12335 }, { "epoch": 0.18, "grad_norm": 0.5234375, "learning_rate": 0.0001964093291452798, "loss": 0.863, "step": 12340 }, { "epoch": 0.18, "grad_norm": 0.53125, "learning_rate": 0.00019640267741528834, "loss": 1.0108, "step": 12345 }, { "epoch": 0.18, "grad_norm": 0.5234375, "learning_rate": 0.00019639601964266726, "loss": 1.112, "step": 12350 }, { "epoch": 0.18, "grad_norm": 0.625, "learning_rate": 0.00019638935582783385, "loss": 0.9948, "step": 12355 }, { "epoch": 0.18, "grad_norm": 0.55078125, "learning_rate": 0.00019638268597120585, "loss": 0.9937, "step": 12360 }, { "epoch": 0.18, "grad_norm": 0.53515625, "learning_rate": 0.0001963760100732013, "loss": 1.0482, "step": 12365 }, { "epoch": 0.18, "grad_norm": 0.58203125, "learning_rate": 0.00019636932813423875, "loss": 0.9995, "step": 12370 }, { "epoch": 0.18, "grad_norm": 0.478515625, "learning_rate": 0.0001963626401547369, "loss": 0.9609, "step": 12375 }, { "epoch": 0.18, "grad_norm": 0.5390625, "learning_rate": 0.000196355946135115, "loss": 1.0666, "step": 12380 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.0001963492460757927, "loss": 0.8948, "step": 12385 }, { "epoch": 0.18, "grad_norm": 0.66796875, "learning_rate": 0.0001963425399771899, "loss": 0.9701, "step": 12390 }, { "epoch": 0.18, "grad_norm": 0.66015625, "learning_rate": 0.00019633582783972698, "loss": 0.9144, "step": 12395 }, { "epoch": 0.18, "grad_norm": 0.58203125, "learning_rate": 0.00019632910966382465, "loss": 0.9507, "step": 12400 }, { "epoch": 0.18, "grad_norm": 0.6328125, "learning_rate": 0.00019632238544990403, "loss": 1.0839, "step": 12405 }, { "epoch": 0.18, "grad_norm": 0.578125, "learning_rate": 0.00019631565519838656, "loss": 1.1661, "step": 12410 }, { "epoch": 0.18, "grad_norm": 0.52734375, "learning_rate": 0.00019630891890969415, "loss": 1.0304, "step": 12415 }, { "epoch": 0.18, "grad_norm": 0.546875, "learning_rate": 0.000196302176584249, "loss": 0.8476, "step": 12420 }, { "epoch": 0.18, "grad_norm": 0.65625, "learning_rate": 0.00019629542822247373, "loss": 1.0206, "step": 12425 }, { "epoch": 0.18, "grad_norm": 0.57421875, "learning_rate": 0.00019628867382479138, "loss": 0.9953, "step": 12430 }, { "epoch": 0.18, "grad_norm": 0.5625, "learning_rate": 0.0001962819133916253, "loss": 0.9312, "step": 12435 }, { "epoch": 0.18, "grad_norm": 0.61328125, "learning_rate": 0.0001962751469233992, "loss": 1.1058, "step": 12440 }, { "epoch": 0.18, "grad_norm": 0.57421875, "learning_rate": 0.00019626837442053726, "loss": 0.9904, "step": 12445 }, { "epoch": 0.18, "grad_norm": 0.56640625, "learning_rate": 0.00019626159588346392, "loss": 0.8433, "step": 12450 }, { "epoch": 0.18, "grad_norm": 0.52734375, "learning_rate": 0.00019625481131260418, "loss": 0.9796, "step": 12455 }, { "epoch": 0.18, "grad_norm": 0.62890625, "learning_rate": 0.00019624802070838325, "loss": 0.9605, "step": 12460 }, { "epoch": 0.18, "grad_norm": 0.546875, "learning_rate": 0.0001962412240712267, "loss": 1.0074, "step": 12465 }, { "epoch": 0.18, "grad_norm": 0.546875, "learning_rate": 0.00019623442140156066, "loss": 0.9153, "step": 12470 }, { "epoch": 0.18, "grad_norm": 0.5859375, "learning_rate": 0.0001962276126998115, "loss": 0.9167, "step": 12475 }, { "epoch": 0.18, "grad_norm": 0.66796875, "learning_rate": 0.00019622079796640597, "loss": 1.0444, "step": 12480 }, { "epoch": 0.18, "grad_norm": 0.61328125, "learning_rate": 0.0001962139772017712, "loss": 1.0472, "step": 12485 }, { "epoch": 0.18, "grad_norm": 0.5, "learning_rate": 0.0001962071504063348, "loss": 0.9135, "step": 12490 }, { "epoch": 0.18, "grad_norm": 0.5546875, "learning_rate": 0.00019620031758052465, "loss": 1.1986, "step": 12495 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.000196193478724769, "loss": 0.8138, "step": 12500 }, { "epoch": 0.18, "grad_norm": 0.50390625, "learning_rate": 0.00019618663383949656, "loss": 0.8546, "step": 12505 }, { "epoch": 0.18, "grad_norm": 0.56640625, "learning_rate": 0.00019617978292513634, "loss": 1.0783, "step": 12510 }, { "epoch": 0.18, "grad_norm": 0.484375, "learning_rate": 0.00019617292598211782, "loss": 0.8674, "step": 12515 }, { "epoch": 0.18, "grad_norm": 0.546875, "learning_rate": 0.00019616606301087078, "loss": 0.9813, "step": 12520 }, { "epoch": 0.18, "grad_norm": 0.74609375, "learning_rate": 0.00019615919401182535, "loss": 0.9427, "step": 12525 }, { "epoch": 0.18, "grad_norm": 0.5390625, "learning_rate": 0.00019615231898541214, "loss": 1.0899, "step": 12530 }, { "epoch": 0.18, "grad_norm": 0.6171875, "learning_rate": 0.00019614543793206206, "loss": 1.0309, "step": 12535 }, { "epoch": 0.18, "grad_norm": 0.56640625, "learning_rate": 0.00019613855085220646, "loss": 1.0088, "step": 12540 }, { "epoch": 0.18, "grad_norm": 0.64453125, "learning_rate": 0.00019613165774627696, "loss": 0.92, "step": 12545 }, { "epoch": 0.18, "grad_norm": 0.58984375, "learning_rate": 0.0001961247586147057, "loss": 0.8939, "step": 12550 }, { "epoch": 0.18, "grad_norm": 0.55078125, "learning_rate": 0.00019611785345792507, "loss": 0.9448, "step": 12555 }, { "epoch": 0.18, "grad_norm": 0.5234375, "learning_rate": 0.0001961109422763679, "loss": 0.9925, "step": 12560 }, { "epoch": 0.18, "grad_norm": 0.5, "learning_rate": 0.00019610402507046744, "loss": 1.0034, "step": 12565 }, { "epoch": 0.18, "grad_norm": 0.62890625, "learning_rate": 0.00019609710184065722, "loss": 0.9819, "step": 12570 }, { "epoch": 0.18, "grad_norm": 0.5390625, "learning_rate": 0.0001960901725873712, "loss": 0.9709, "step": 12575 }, { "epoch": 0.18, "grad_norm": 0.6640625, "learning_rate": 0.00019608323731104375, "loss": 0.8864, "step": 12580 }, { "epoch": 0.18, "grad_norm": 0.5625, "learning_rate": 0.00019607629601210954, "loss": 0.8654, "step": 12585 }, { "epoch": 0.18, "grad_norm": 0.458984375, "learning_rate": 0.0001960693486910037, "loss": 0.9974, "step": 12590 }, { "epoch": 0.18, "grad_norm": 0.59375, "learning_rate": 0.00019606239534816165, "loss": 0.9497, "step": 12595 }, { "epoch": 0.18, "grad_norm": 0.6171875, "learning_rate": 0.00019605543598401925, "loss": 1.0166, "step": 12600 }, { "epoch": 0.18, "grad_norm": 0.58984375, "learning_rate": 0.0001960484705990127, "loss": 0.8977, "step": 12605 }, { "epoch": 0.18, "grad_norm": 0.61328125, "learning_rate": 0.0001960414991935787, "loss": 1.0628, "step": 12610 }, { "epoch": 0.18, "grad_norm": 0.80859375, "learning_rate": 0.00019603452176815406, "loss": 1.0787, "step": 12615 }, { "epoch": 0.18, "grad_norm": 0.62890625, "learning_rate": 0.00019602753832317628, "loss": 0.9722, "step": 12620 }, { "epoch": 0.18, "grad_norm": 0.59375, "learning_rate": 0.000196020548859083, "loss": 0.9783, "step": 12625 }, { "epoch": 0.18, "grad_norm": 0.62890625, "learning_rate": 0.00019601355337631237, "loss": 0.9863, "step": 12630 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.00019600655187530287, "loss": 0.9108, "step": 12635 }, { "epoch": 0.18, "grad_norm": 0.57421875, "learning_rate": 0.00019599954435649337, "loss": 0.925, "step": 12640 }, { "epoch": 0.18, "grad_norm": 0.58984375, "learning_rate": 0.00019599253082032308, "loss": 0.9331, "step": 12645 }, { "epoch": 0.18, "grad_norm": 0.66796875, "learning_rate": 0.00019598551126723162, "loss": 1.043, "step": 12650 }, { "epoch": 0.18, "grad_norm": 0.57421875, "learning_rate": 0.000195978485697659, "loss": 0.8841, "step": 12655 }, { "epoch": 0.18, "grad_norm": 0.58984375, "learning_rate": 0.0001959714541120456, "loss": 0.9505, "step": 12660 }, { "epoch": 0.18, "grad_norm": 0.546875, "learning_rate": 0.00019596441651083214, "loss": 0.9653, "step": 12665 }, { "epoch": 0.18, "grad_norm": 0.5625, "learning_rate": 0.00019595737289445977, "loss": 0.9831, "step": 12670 }, { "epoch": 0.18, "grad_norm": 0.59765625, "learning_rate": 0.00019595032326336998, "loss": 1.0024, "step": 12675 }, { "epoch": 0.18, "grad_norm": 0.58984375, "learning_rate": 0.00019594326761800462, "loss": 1.0317, "step": 12680 }, { "epoch": 0.18, "grad_norm": 0.50390625, "learning_rate": 0.00019593620595880602, "loss": 0.9532, "step": 12685 }, { "epoch": 0.18, "grad_norm": 0.62890625, "learning_rate": 0.00019592913828621672, "loss": 1.076, "step": 12690 }, { "epoch": 0.18, "grad_norm": 0.58984375, "learning_rate": 0.00019592206460067983, "loss": 0.862, "step": 12695 }, { "epoch": 0.18, "grad_norm": 0.578125, "learning_rate": 0.00019591498490263866, "loss": 0.9048, "step": 12700 }, { "epoch": 0.18, "grad_norm": 0.640625, "learning_rate": 0.000195907899192537, "loss": 0.9484, "step": 12705 }, { "epoch": 0.18, "grad_norm": 0.55078125, "learning_rate": 0.000195900807470819, "loss": 0.9225, "step": 12710 }, { "epoch": 0.18, "grad_norm": 0.5703125, "learning_rate": 0.00019589370973792914, "loss": 1.0204, "step": 12715 }, { "epoch": 0.18, "grad_norm": 0.6015625, "learning_rate": 0.00019588660599431234, "loss": 0.9455, "step": 12720 }, { "epoch": 0.18, "grad_norm": 0.5625, "learning_rate": 0.0001958794962404139, "loss": 0.8872, "step": 12725 }, { "epoch": 0.18, "grad_norm": 0.5859375, "learning_rate": 0.00019587238047667942, "loss": 0.9301, "step": 12730 }, { "epoch": 0.18, "grad_norm": 0.546875, "learning_rate": 0.00019586525870355495, "loss": 1.0429, "step": 12735 }, { "epoch": 0.18, "grad_norm": 0.625, "learning_rate": 0.00019585813092148688, "loss": 1.094, "step": 12740 }, { "epoch": 0.18, "grad_norm": 0.671875, "learning_rate": 0.00019585099713092198, "loss": 0.9153, "step": 12745 }, { "epoch": 0.18, "grad_norm": 0.59375, "learning_rate": 0.00019584385733230743, "loss": 0.9439, "step": 12750 }, { "epoch": 0.18, "grad_norm": 0.65234375, "learning_rate": 0.00019583671152609075, "loss": 1.0327, "step": 12755 }, { "epoch": 0.18, "grad_norm": 0.54296875, "learning_rate": 0.00019582955971271981, "loss": 1.1026, "step": 12760 }, { "epoch": 0.18, "grad_norm": 0.6640625, "learning_rate": 0.00019582240189264295, "loss": 1.0614, "step": 12765 }, { "epoch": 0.18, "grad_norm": 0.51953125, "learning_rate": 0.0001958152380663088, "loss": 0.9044, "step": 12770 }, { "epoch": 0.18, "grad_norm": 0.5078125, "learning_rate": 0.00019580806823416642, "loss": 0.9334, "step": 12775 }, { "epoch": 0.18, "grad_norm": 0.56640625, "learning_rate": 0.00019580089239666518, "loss": 0.9532, "step": 12780 }, { "epoch": 0.18, "grad_norm": 0.640625, "learning_rate": 0.00019579371055425493, "loss": 1.0215, "step": 12785 }, { "epoch": 0.18, "grad_norm": 0.55078125, "learning_rate": 0.00019578652270738577, "loss": 0.9541, "step": 12790 }, { "epoch": 0.18, "grad_norm": 0.60546875, "learning_rate": 0.0001957793288565083, "loss": 0.9159, "step": 12795 }, { "epoch": 0.18, "grad_norm": 0.58203125, "learning_rate": 0.0001957721290020734, "loss": 1.0008, "step": 12800 }, { "epoch": 0.18, "grad_norm": 0.60546875, "learning_rate": 0.00019576492314453238, "loss": 0.9768, "step": 12805 }, { "epoch": 0.18, "grad_norm": 0.53125, "learning_rate": 0.00019575771128433692, "loss": 0.8682, "step": 12810 }, { "epoch": 0.18, "grad_norm": 0.609375, "learning_rate": 0.00019575049342193906, "loss": 0.926, "step": 12815 }, { "epoch": 0.18, "grad_norm": 0.54296875, "learning_rate": 0.00019574326955779122, "loss": 1.1345, "step": 12820 }, { "epoch": 0.18, "grad_norm": 0.58203125, "learning_rate": 0.00019573603969234618, "loss": 0.8593, "step": 12825 }, { "epoch": 0.18, "grad_norm": 0.5, "learning_rate": 0.00019572880382605714, "loss": 1.0199, "step": 12830 }, { "epoch": 0.18, "grad_norm": 0.5703125, "learning_rate": 0.00019572156195937765, "loss": 1.0294, "step": 12835 }, { "epoch": 0.18, "grad_norm": 0.51953125, "learning_rate": 0.00019571431409276166, "loss": 0.9291, "step": 12840 }, { "epoch": 0.18, "grad_norm": 0.7109375, "learning_rate": 0.00019570706022666343, "loss": 0.8817, "step": 12845 }, { "epoch": 0.18, "grad_norm": 0.53125, "learning_rate": 0.0001956998003615377, "loss": 0.9355, "step": 12850 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.00019569253449783943, "loss": 0.8385, "step": 12855 }, { "epoch": 0.18, "grad_norm": 0.60546875, "learning_rate": 0.00019568526263602417, "loss": 1.161, "step": 12860 }, { "epoch": 0.18, "grad_norm": 0.578125, "learning_rate": 0.00019567798477654767, "loss": 1.0588, "step": 12865 }, { "epoch": 0.18, "grad_norm": 0.5234375, "learning_rate": 0.00019567070091986607, "loss": 0.8892, "step": 12870 }, { "epoch": 0.18, "grad_norm": 0.58203125, "learning_rate": 0.00019566341106643602, "loss": 0.9518, "step": 12875 }, { "epoch": 0.18, "grad_norm": 0.55078125, "learning_rate": 0.0001956561152167144, "loss": 0.9279, "step": 12880 }, { "epoch": 0.18, "grad_norm": 0.546875, "learning_rate": 0.00019564881337115852, "loss": 0.9089, "step": 12885 }, { "epoch": 0.18, "grad_norm": 0.5234375, "learning_rate": 0.0001956415055302261, "loss": 0.8285, "step": 12890 }, { "epoch": 0.18, "grad_norm": 0.6484375, "learning_rate": 0.00019563419169437517, "loss": 1.0355, "step": 12895 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 0.00019562687186406422, "loss": 0.9374, "step": 12900 }, { "epoch": 0.19, "grad_norm": 0.55078125, "learning_rate": 0.000195619546039752, "loss": 0.9838, "step": 12905 }, { "epoch": 0.19, "grad_norm": 0.66015625, "learning_rate": 0.00019561221422189775, "loss": 1.0857, "step": 12910 }, { "epoch": 0.19, "grad_norm": 0.5078125, "learning_rate": 0.00019560487641096102, "loss": 1.0689, "step": 12915 }, { "epoch": 0.19, "grad_norm": 0.5859375, "learning_rate": 0.00019559753260740172, "loss": 0.9646, "step": 12920 }, { "epoch": 0.19, "grad_norm": 0.5078125, "learning_rate": 0.00019559018281168025, "loss": 0.9258, "step": 12925 }, { "epoch": 0.19, "grad_norm": 0.59375, "learning_rate": 0.00019558282702425724, "loss": 1.0102, "step": 12930 }, { "epoch": 0.19, "grad_norm": 0.515625, "learning_rate": 0.00019557546524559373, "loss": 1.1096, "step": 12935 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 0.00019556809747615125, "loss": 0.8644, "step": 12940 }, { "epoch": 0.19, "grad_norm": 0.53515625, "learning_rate": 0.00019556072371639156, "loss": 0.8821, "step": 12945 }, { "epoch": 0.19, "grad_norm": 0.48046875, "learning_rate": 0.00019555334396677688, "loss": 0.9682, "step": 12950 }, { "epoch": 0.19, "grad_norm": 0.640625, "learning_rate": 0.00019554595822776977, "loss": 1.192, "step": 12955 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.00019553856649983317, "loss": 1.0012, "step": 12960 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.00019553116878343044, "loss": 1.0151, "step": 12965 }, { "epoch": 0.19, "grad_norm": 0.55078125, "learning_rate": 0.00019552376507902524, "loss": 0.904, "step": 12970 }, { "epoch": 0.19, "grad_norm": 0.55859375, "learning_rate": 0.00019551635538708162, "loss": 1.1135, "step": 12975 }, { "epoch": 0.19, "grad_norm": 0.55859375, "learning_rate": 0.0001955089397080641, "loss": 0.9702, "step": 12980 }, { "epoch": 0.19, "grad_norm": 0.73828125, "learning_rate": 0.00019550151804243745, "loss": 1.0448, "step": 12985 }, { "epoch": 0.19, "grad_norm": 0.53515625, "learning_rate": 0.00019549409039066687, "loss": 1.125, "step": 12990 }, { "epoch": 0.19, "grad_norm": 0.5703125, "learning_rate": 0.00019548665675321795, "loss": 0.9953, "step": 12995 }, { "epoch": 0.19, "grad_norm": 0.5546875, "learning_rate": 0.00019547921713055665, "loss": 1.0935, "step": 13000 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.00019547177152314928, "loss": 0.9381, "step": 13005 }, { "epoch": 0.19, "grad_norm": 0.53515625, "learning_rate": 0.00019546431993146252, "loss": 1.1262, "step": 13010 }, { "epoch": 0.19, "grad_norm": 0.625, "learning_rate": 0.0001954568623559635, "loss": 0.9953, "step": 13015 }, { "epoch": 0.19, "grad_norm": 0.66015625, "learning_rate": 0.0001954493987971196, "loss": 1.2148, "step": 13020 }, { "epoch": 0.19, "grad_norm": 0.54296875, "learning_rate": 0.00019544192925539865, "loss": 1.1218, "step": 13025 }, { "epoch": 0.19, "grad_norm": 0.5625, "learning_rate": 0.0001954344537312689, "loss": 0.9282, "step": 13030 }, { "epoch": 0.19, "grad_norm": 0.4921875, "learning_rate": 0.0001954269722251989, "loss": 1.0104, "step": 13035 }, { "epoch": 0.19, "grad_norm": 0.56640625, "learning_rate": 0.0001954194847376576, "loss": 0.8732, "step": 13040 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.00019541199126911433, "loss": 1.0397, "step": 13045 }, { "epoch": 0.19, "grad_norm": 0.58203125, "learning_rate": 0.00019540449182003878, "loss": 0.9709, "step": 13050 }, { "epoch": 0.19, "grad_norm": 0.5703125, "learning_rate": 0.000195396986390901, "loss": 0.9555, "step": 13055 }, { "epoch": 0.19, "grad_norm": 0.64453125, "learning_rate": 0.0001953894749821715, "loss": 0.9479, "step": 13060 }, { "epoch": 0.19, "grad_norm": 0.515625, "learning_rate": 0.00019538195759432106, "loss": 0.88, "step": 13065 }, { "epoch": 0.19, "grad_norm": 0.62109375, "learning_rate": 0.00019537443422782089, "loss": 0.985, "step": 13070 }, { "epoch": 0.19, "grad_norm": 0.546875, "learning_rate": 0.00019536690488314252, "loss": 0.8963, "step": 13075 }, { "epoch": 0.19, "grad_norm": 0.5703125, "learning_rate": 0.000195359369560758, "loss": 0.9127, "step": 13080 }, { "epoch": 0.19, "grad_norm": 0.5625, "learning_rate": 0.00019535182826113954, "loss": 1.0403, "step": 13085 }, { "epoch": 0.19, "grad_norm": 0.5859375, "learning_rate": 0.00019534428098475992, "loss": 0.8949, "step": 13090 }, { "epoch": 0.19, "grad_norm": 0.5703125, "learning_rate": 0.00019533672773209217, "loss": 0.8789, "step": 13095 }, { "epoch": 0.19, "grad_norm": 0.5546875, "learning_rate": 0.00019532916850360975, "loss": 1.138, "step": 13100 }, { "epoch": 0.19, "grad_norm": 0.55859375, "learning_rate": 0.0001953216032997865, "loss": 0.9711, "step": 13105 }, { "epoch": 0.19, "grad_norm": 0.53125, "learning_rate": 0.00019531403212109653, "loss": 1.0461, "step": 13110 }, { "epoch": 0.19, "grad_norm": 0.55078125, "learning_rate": 0.0001953064549680145, "loss": 0.8697, "step": 13115 }, { "epoch": 0.19, "grad_norm": 0.5390625, "learning_rate": 0.00019529887184101537, "loss": 0.8727, "step": 13120 }, { "epoch": 0.19, "grad_norm": 0.4921875, "learning_rate": 0.00019529128274057436, "loss": 0.905, "step": 13125 }, { "epoch": 0.19, "grad_norm": 0.59375, "learning_rate": 0.00019528368766716725, "loss": 1.0066, "step": 13130 }, { "epoch": 0.19, "grad_norm": 0.55078125, "learning_rate": 0.00019527608662127009, "loss": 0.9105, "step": 13135 }, { "epoch": 0.19, "grad_norm": 0.5859375, "learning_rate": 0.00019526847960335927, "loss": 0.8907, "step": 13140 }, { "epoch": 0.19, "grad_norm": 0.54296875, "learning_rate": 0.00019526086661391168, "loss": 1.1167, "step": 13145 }, { "epoch": 0.19, "grad_norm": 0.56640625, "learning_rate": 0.00019525324765340445, "loss": 0.9489, "step": 13150 }, { "epoch": 0.19, "grad_norm": 0.546875, "learning_rate": 0.00019524562272231518, "loss": 1.1006, "step": 13155 }, { "epoch": 0.19, "grad_norm": 0.6484375, "learning_rate": 0.00019523799182112183, "loss": 1.1097, "step": 13160 }, { "epoch": 0.19, "grad_norm": 0.54296875, "learning_rate": 0.00019523035495030267, "loss": 0.9017, "step": 13165 }, { "epoch": 0.19, "grad_norm": 0.546875, "learning_rate": 0.0001952227121103364, "loss": 0.8943, "step": 13170 }, { "epoch": 0.19, "grad_norm": 0.5390625, "learning_rate": 0.00019521506330170206, "loss": 1.0787, "step": 13175 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.00019520740852487916, "loss": 0.9864, "step": 13180 }, { "epoch": 0.19, "grad_norm": 0.53515625, "learning_rate": 0.00019519974778034742, "loss": 1.0124, "step": 13185 }, { "epoch": 0.19, "grad_norm": 0.58984375, "learning_rate": 0.00019519208106858708, "loss": 1.1291, "step": 13190 }, { "epoch": 0.19, "grad_norm": 0.72265625, "learning_rate": 0.0001951844083900787, "loss": 1.0683, "step": 13195 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.0001951767297453032, "loss": 0.9281, "step": 13200 }, { "epoch": 0.19, "grad_norm": 0.59375, "learning_rate": 0.00019516904513474187, "loss": 1.0935, "step": 13205 }, { "epoch": 0.19, "grad_norm": 0.55859375, "learning_rate": 0.00019516135455887638, "loss": 0.951, "step": 13210 }, { "epoch": 0.19, "grad_norm": 0.546875, "learning_rate": 0.00019515365801818884, "loss": 0.9578, "step": 13215 }, { "epoch": 0.19, "grad_norm": 0.55859375, "learning_rate": 0.00019514595551316162, "loss": 1.0304, "step": 13220 }, { "epoch": 0.19, "grad_norm": 0.5703125, "learning_rate": 0.00019513824704427756, "loss": 0.8505, "step": 13225 }, { "epoch": 0.19, "grad_norm": 0.50390625, "learning_rate": 0.00019513053261201982, "loss": 0.9522, "step": 13230 }, { "epoch": 0.19, "grad_norm": 0.52734375, "learning_rate": 0.00019512281221687193, "loss": 0.9674, "step": 13235 }, { "epoch": 0.19, "grad_norm": 0.56640625, "learning_rate": 0.00019511508585931786, "loss": 1.0524, "step": 13240 }, { "epoch": 0.19, "grad_norm": 0.53125, "learning_rate": 0.00019510735353984185, "loss": 0.874, "step": 13245 }, { "epoch": 0.19, "grad_norm": 0.546875, "learning_rate": 0.00019509961525892867, "loss": 1.0424, "step": 13250 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.00019509187101706324, "loss": 0.7539, "step": 13255 }, { "epoch": 0.19, "grad_norm": 0.609375, "learning_rate": 0.00019508412081473104, "loss": 0.9879, "step": 13260 }, { "epoch": 0.19, "grad_norm": 0.62109375, "learning_rate": 0.00019507636465241785, "loss": 1.2274, "step": 13265 }, { "epoch": 0.19, "grad_norm": 0.56640625, "learning_rate": 0.0001950686025306099, "loss": 0.9844, "step": 13270 }, { "epoch": 0.19, "grad_norm": 0.55859375, "learning_rate": 0.0001950608344497936, "loss": 1.0499, "step": 13275 }, { "epoch": 0.19, "grad_norm": 0.58984375, "learning_rate": 0.00019505306041045595, "loss": 0.9718, "step": 13280 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.00019504528041308422, "loss": 1.0062, "step": 13285 }, { "epoch": 0.19, "grad_norm": 0.55078125, "learning_rate": 0.00019503749445816607, "loss": 0.9244, "step": 13290 }, { "epoch": 0.19, "grad_norm": 0.53125, "learning_rate": 0.00019502970254618954, "loss": 0.875, "step": 13295 }, { "epoch": 0.19, "grad_norm": 0.625, "learning_rate": 0.000195021904677643, "loss": 0.9849, "step": 13300 }, { "epoch": 0.19, "grad_norm": 0.58984375, "learning_rate": 0.00019501410085301526, "loss": 0.9765, "step": 13305 }, { "epoch": 0.19, "grad_norm": 0.54296875, "learning_rate": 0.0001950062910727955, "loss": 0.8658, "step": 13310 }, { "epoch": 0.19, "grad_norm": 0.515625, "learning_rate": 0.00019499847533747315, "loss": 0.8656, "step": 13315 }, { "epoch": 0.19, "grad_norm": 0.60546875, "learning_rate": 0.00019499065364753822, "loss": 0.9422, "step": 13320 }, { "epoch": 0.19, "grad_norm": 0.578125, "learning_rate": 0.0001949828260034809, "loss": 1.0623, "step": 13325 }, { "epoch": 0.19, "grad_norm": 0.59375, "learning_rate": 0.0001949749924057919, "loss": 1.1, "step": 13330 }, { "epoch": 0.19, "grad_norm": 0.58203125, "learning_rate": 0.0001949671528549622, "loss": 0.8578, "step": 13335 }, { "epoch": 0.19, "grad_norm": 0.578125, "learning_rate": 0.0001949593073514832, "loss": 0.9376, "step": 13340 }, { "epoch": 0.19, "grad_norm": 0.546875, "learning_rate": 0.00019495145589584664, "loss": 0.9657, "step": 13345 }, { "epoch": 0.19, "grad_norm": 0.6171875, "learning_rate": 0.00019494359848854474, "loss": 1.0854, "step": 13350 }, { "epoch": 0.19, "grad_norm": 0.6171875, "learning_rate": 0.00019493573513006994, "loss": 0.8838, "step": 13355 }, { "epoch": 0.19, "grad_norm": 0.50390625, "learning_rate": 0.00019492786582091508, "loss": 0.9277, "step": 13360 }, { "epoch": 0.19, "grad_norm": 0.53125, "learning_rate": 0.00019491999056157352, "loss": 0.9805, "step": 13365 }, { "epoch": 0.19, "grad_norm": 0.52734375, "learning_rate": 0.00019491210935253887, "loss": 1.0989, "step": 13370 }, { "epoch": 0.19, "grad_norm": 0.59375, "learning_rate": 0.00019490422219430506, "loss": 1.0407, "step": 13375 }, { "epoch": 0.19, "grad_norm": 0.55859375, "learning_rate": 0.00019489632908736658, "loss": 0.9724, "step": 13380 }, { "epoch": 0.19, "grad_norm": 0.60546875, "learning_rate": 0.00019488843003221805, "loss": 0.9384, "step": 13385 }, { "epoch": 0.19, "grad_norm": 0.6015625, "learning_rate": 0.00019488052502935469, "loss": 1.0806, "step": 13390 }, { "epoch": 0.19, "grad_norm": 0.625, "learning_rate": 0.00019487261407927196, "loss": 0.9685, "step": 13395 }, { "epoch": 0.19, "grad_norm": 0.578125, "learning_rate": 0.00019486469718246573, "loss": 0.936, "step": 13400 }, { "epoch": 0.19, "grad_norm": 0.55078125, "learning_rate": 0.0001948567743394322, "loss": 0.9386, "step": 13405 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.00019484884555066807, "loss": 0.9172, "step": 13410 }, { "epoch": 0.19, "grad_norm": 0.6171875, "learning_rate": 0.00019484091081667027, "loss": 1.1164, "step": 13415 }, { "epoch": 0.19, "grad_norm": 0.5546875, "learning_rate": 0.00019483297013793617, "loss": 1.0413, "step": 13420 }, { "epoch": 0.19, "grad_norm": 0.60546875, "learning_rate": 0.00019482502351496345, "loss": 1.0837, "step": 13425 }, { "epoch": 0.19, "grad_norm": 0.7109375, "learning_rate": 0.0001948170709482503, "loss": 0.9828, "step": 13430 }, { "epoch": 0.19, "grad_norm": 0.58984375, "learning_rate": 0.00019480911243829514, "loss": 1.0261, "step": 13435 }, { "epoch": 0.19, "grad_norm": 0.578125, "learning_rate": 0.00019480114798559685, "loss": 1.0415, "step": 13440 }, { "epoch": 0.19, "grad_norm": 0.5625, "learning_rate": 0.00019479317759065463, "loss": 0.8573, "step": 13445 }, { "epoch": 0.19, "grad_norm": 0.52734375, "learning_rate": 0.00019478520125396805, "loss": 0.9587, "step": 13450 }, { "epoch": 0.19, "grad_norm": 0.53515625, "learning_rate": 0.00019477721897603715, "loss": 1.146, "step": 13455 }, { "epoch": 0.19, "grad_norm": 0.54296875, "learning_rate": 0.00019476923075736218, "loss": 0.9267, "step": 13460 }, { "epoch": 0.19, "grad_norm": 0.578125, "learning_rate": 0.0001947612365984439, "loss": 0.979, "step": 13465 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.00019475323649978344, "loss": 0.9616, "step": 13470 }, { "epoch": 0.19, "grad_norm": 0.578125, "learning_rate": 0.00019474523046188216, "loss": 0.9822, "step": 13475 }, { "epoch": 0.19, "grad_norm": 0.53515625, "learning_rate": 0.00019473721848524195, "loss": 0.897, "step": 13480 }, { "epoch": 0.19, "grad_norm": 0.5859375, "learning_rate": 0.00019472920057036493, "loss": 0.9629, "step": 13485 }, { "epoch": 0.19, "grad_norm": 0.6796875, "learning_rate": 0.0001947211767177538, "loss": 1.0021, "step": 13490 }, { "epoch": 0.19, "grad_norm": 0.57421875, "learning_rate": 0.00019471314692791138, "loss": 1.0638, "step": 13495 }, { "epoch": 0.19, "grad_norm": 0.5546875, "learning_rate": 0.00019470511120134106, "loss": 0.9434, "step": 13500 }, { "epoch": 0.19, "grad_norm": 0.66796875, "learning_rate": 0.00019469706953854652, "loss": 1.125, "step": 13505 }, { "epoch": 0.19, "grad_norm": 0.498046875, "learning_rate": 0.0001946890219400318, "loss": 0.7527, "step": 13510 }, { "epoch": 0.19, "grad_norm": 0.66015625, "learning_rate": 0.00019468096840630136, "loss": 0.971, "step": 13515 }, { "epoch": 0.19, "grad_norm": 0.5703125, "learning_rate": 0.00019467290893785996, "loss": 0.9305, "step": 13520 }, { "epoch": 0.19, "grad_norm": 0.5625, "learning_rate": 0.00019466484353521284, "loss": 0.9969, "step": 13525 }, { "epoch": 0.19, "grad_norm": 0.484375, "learning_rate": 0.00019465677219886548, "loss": 0.8256, "step": 13530 }, { "epoch": 0.19, "grad_norm": 0.66015625, "learning_rate": 0.00019464869492932384, "loss": 1.0183, "step": 13535 }, { "epoch": 0.19, "grad_norm": 0.61328125, "learning_rate": 0.0001946406117270942, "loss": 0.9527, "step": 13540 }, { "epoch": 0.19, "grad_norm": 0.515625, "learning_rate": 0.00019463252259268327, "loss": 0.9422, "step": 13545 }, { "epoch": 0.19, "grad_norm": 0.5078125, "learning_rate": 0.000194624427526598, "loss": 1.123, "step": 13550 }, { "epoch": 0.19, "grad_norm": 0.55078125, "learning_rate": 0.00019461632652934587, "loss": 0.9995, "step": 13555 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 0.00019460821960143462, "loss": 0.9578, "step": 13560 }, { "epoch": 0.19, "grad_norm": 0.66015625, "learning_rate": 0.00019460010674337243, "loss": 0.9716, "step": 13565 }, { "epoch": 0.19, "grad_norm": 0.76171875, "learning_rate": 0.00019459198795566783, "loss": 1.0666, "step": 13570 }, { "epoch": 0.19, "grad_norm": 0.6015625, "learning_rate": 0.00019458386323882966, "loss": 0.9313, "step": 13575 }, { "epoch": 0.19, "grad_norm": 0.6015625, "learning_rate": 0.0001945757325933672, "loss": 0.8971, "step": 13580 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.00019456759601979014, "loss": 0.9537, "step": 13585 }, { "epoch": 0.19, "grad_norm": 0.67578125, "learning_rate": 0.00019455945351860848, "loss": 0.9548, "step": 13590 }, { "epoch": 0.2, "grad_norm": 0.5625, "learning_rate": 0.00019455130509033253, "loss": 1.0053, "step": 13595 }, { "epoch": 0.2, "grad_norm": 0.59765625, "learning_rate": 0.00019454315073547313, "loss": 1.0101, "step": 13600 }, { "epoch": 0.2, "grad_norm": 0.52734375, "learning_rate": 0.00019453499045454136, "loss": 0.9598, "step": 13605 }, { "epoch": 0.2, "grad_norm": 0.50390625, "learning_rate": 0.00019452682424804868, "loss": 0.8437, "step": 13610 }, { "epoch": 0.2, "grad_norm": 0.56640625, "learning_rate": 0.00019451865211650703, "loss": 0.8778, "step": 13615 }, { "epoch": 0.2, "grad_norm": 0.5859375, "learning_rate": 0.00019451047406042864, "loss": 0.9147, "step": 13620 }, { "epoch": 0.2, "grad_norm": 0.63671875, "learning_rate": 0.00019450229008032607, "loss": 1.0566, "step": 13625 }, { "epoch": 0.2, "grad_norm": 0.58203125, "learning_rate": 0.00019449410017671236, "loss": 1.0564, "step": 13630 }, { "epoch": 0.2, "grad_norm": 0.59765625, "learning_rate": 0.0001944859043501008, "loss": 1.0702, "step": 13635 }, { "epoch": 0.2, "grad_norm": 0.52734375, "learning_rate": 0.00019447770260100513, "loss": 0.8292, "step": 13640 }, { "epoch": 0.2, "grad_norm": 0.53515625, "learning_rate": 0.0001944694949299395, "loss": 0.9116, "step": 13645 }, { "epoch": 0.2, "grad_norm": 0.5546875, "learning_rate": 0.0001944612813374183, "loss": 0.8663, "step": 13650 }, { "epoch": 0.2, "grad_norm": 0.5625, "learning_rate": 0.00019445306182395644, "loss": 1.0415, "step": 13655 }, { "epoch": 0.2, "grad_norm": 0.53515625, "learning_rate": 0.00019444483639006904, "loss": 0.8293, "step": 13660 }, { "epoch": 0.2, "grad_norm": 0.52734375, "learning_rate": 0.0001944366050362718, "loss": 0.9584, "step": 13665 }, { "epoch": 0.2, "grad_norm": 0.80859375, "learning_rate": 0.00019442836776308056, "loss": 1.0026, "step": 13670 }, { "epoch": 0.2, "grad_norm": 0.5234375, "learning_rate": 0.00019442012457101168, "loss": 0.8867, "step": 13675 }, { "epoch": 0.2, "grad_norm": 0.578125, "learning_rate": 0.00019441187546058187, "loss": 0.9783, "step": 13680 }, { "epoch": 0.2, "grad_norm": 0.609375, "learning_rate": 0.00019440362043230818, "loss": 0.871, "step": 13685 }, { "epoch": 0.2, "grad_norm": 0.56640625, "learning_rate": 0.00019439535948670805, "loss": 0.9344, "step": 13690 }, { "epoch": 0.2, "grad_norm": 0.640625, "learning_rate": 0.00019438709262429926, "loss": 1.0194, "step": 13695 }, { "epoch": 0.2, "grad_norm": 0.48046875, "learning_rate": 0.00019437881984560003, "loss": 0.9557, "step": 13700 }, { "epoch": 0.2, "grad_norm": 0.55078125, "learning_rate": 0.00019437054115112887, "loss": 0.9423, "step": 13705 }, { "epoch": 0.2, "grad_norm": 0.5859375, "learning_rate": 0.00019436225654140472, "loss": 1.0194, "step": 13710 }, { "epoch": 0.2, "grad_norm": 0.609375, "learning_rate": 0.00019435396601694686, "loss": 1.0522, "step": 13715 }, { "epoch": 0.2, "grad_norm": 0.59375, "learning_rate": 0.00019434566957827492, "loss": 0.9833, "step": 13720 }, { "epoch": 0.2, "grad_norm": 0.55859375, "learning_rate": 0.000194337367225909, "loss": 0.8905, "step": 13725 }, { "epoch": 0.2, "grad_norm": 0.5625, "learning_rate": 0.00019432905896036944, "loss": 1.0386, "step": 13730 }, { "epoch": 0.2, "grad_norm": 0.69140625, "learning_rate": 0.000194320744782177, "loss": 1.0965, "step": 13735 }, { "epoch": 0.2, "grad_norm": 0.57421875, "learning_rate": 0.00019431242469185293, "loss": 0.9553, "step": 13740 }, { "epoch": 0.2, "grad_norm": 0.56640625, "learning_rate": 0.00019430409868991864, "loss": 0.9008, "step": 13745 }, { "epoch": 0.2, "grad_norm": 0.5703125, "learning_rate": 0.000194295766776896, "loss": 0.9997, "step": 13750 }, { "epoch": 0.2, "grad_norm": 0.5625, "learning_rate": 0.00019428742895330733, "loss": 0.9672, "step": 13755 }, { "epoch": 0.2, "grad_norm": 0.60546875, "learning_rate": 0.00019427908521967523, "loss": 0.9669, "step": 13760 }, { "epoch": 0.2, "grad_norm": 0.609375, "learning_rate": 0.0001942707355765227, "loss": 1.1381, "step": 13765 }, { "epoch": 0.2, "grad_norm": 0.5625, "learning_rate": 0.0001942623800243731, "loss": 1.1075, "step": 13770 }, { "epoch": 0.2, "grad_norm": 0.59765625, "learning_rate": 0.00019425401856375016, "loss": 1.0386, "step": 13775 }, { "epoch": 0.2, "grad_norm": 0.5390625, "learning_rate": 0.000194245651195178, "loss": 0.9591, "step": 13780 }, { "epoch": 0.2, "grad_norm": 0.63671875, "learning_rate": 0.0001942372779191811, "loss": 0.9495, "step": 13785 }, { "epoch": 0.2, "grad_norm": 0.5546875, "learning_rate": 0.00019422889873628424, "loss": 0.8961, "step": 13790 }, { "epoch": 0.2, "grad_norm": 0.65234375, "learning_rate": 0.00019422051364701272, "loss": 0.9591, "step": 13795 }, { "epoch": 0.2, "grad_norm": 0.5703125, "learning_rate": 0.0001942121226518921, "loss": 0.887, "step": 13800 }, { "epoch": 0.2, "grad_norm": 0.546875, "learning_rate": 0.00019420372575144833, "loss": 1.0454, "step": 13805 }, { "epoch": 0.2, "grad_norm": 0.54296875, "learning_rate": 0.00019419532294620773, "loss": 0.9947, "step": 13810 }, { "epoch": 0.2, "grad_norm": 0.58203125, "learning_rate": 0.00019418691423669704, "loss": 0.9633, "step": 13815 }, { "epoch": 0.2, "grad_norm": 0.55859375, "learning_rate": 0.00019417849962344327, "loss": 1.0141, "step": 13820 }, { "epoch": 0.2, "grad_norm": 0.5, "learning_rate": 0.00019417007910697387, "loss": 0.9378, "step": 13825 }, { "epoch": 0.2, "grad_norm": 0.50390625, "learning_rate": 0.00019416165268781668, "loss": 0.9427, "step": 13830 }, { "epoch": 0.2, "grad_norm": 0.53515625, "learning_rate": 0.00019415322036649986, "loss": 0.8609, "step": 13835 }, { "epoch": 0.2, "grad_norm": 0.5859375, "learning_rate": 0.00019414478214355193, "loss": 0.8503, "step": 13840 }, { "epoch": 0.2, "grad_norm": 0.59765625, "learning_rate": 0.00019413633801950187, "loss": 1.0469, "step": 13845 }, { "epoch": 0.2, "grad_norm": 0.62890625, "learning_rate": 0.0001941278879948789, "loss": 1.0054, "step": 13850 }, { "epoch": 0.2, "grad_norm": 0.5703125, "learning_rate": 0.00019411943207021275, "loss": 1.0992, "step": 13855 }, { "epoch": 0.2, "grad_norm": 0.53515625, "learning_rate": 0.0001941109702460334, "loss": 1.0576, "step": 13860 }, { "epoch": 0.2, "grad_norm": 0.50390625, "learning_rate": 0.0001941025025228712, "loss": 0.9899, "step": 13865 }, { "epoch": 0.2, "grad_norm": 0.55859375, "learning_rate": 0.00019409402890125697, "loss": 1.023, "step": 13870 }, { "epoch": 0.2, "grad_norm": 0.5546875, "learning_rate": 0.0001940855493817219, "loss": 1.0024, "step": 13875 }, { "epoch": 0.2, "grad_norm": 0.5234375, "learning_rate": 0.0001940770639647974, "loss": 0.9097, "step": 13880 }, { "epoch": 0.2, "grad_norm": 0.55078125, "learning_rate": 0.0001940685726510154, "loss": 0.9621, "step": 13885 }, { "epoch": 0.2, "grad_norm": 0.53125, "learning_rate": 0.00019406007544090813, "loss": 0.9476, "step": 13890 }, { "epoch": 0.2, "grad_norm": 0.6171875, "learning_rate": 0.0001940515723350082, "loss": 0.957, "step": 13895 }, { "epoch": 0.2, "grad_norm": 0.5546875, "learning_rate": 0.00019404306333384864, "loss": 1.1225, "step": 13900 }, { "epoch": 0.2, "grad_norm": 0.640625, "learning_rate": 0.0001940345484379627, "loss": 0.9353, "step": 13905 }, { "epoch": 0.2, "grad_norm": 0.6328125, "learning_rate": 0.0001940260276478842, "loss": 0.9559, "step": 13910 }, { "epoch": 0.2, "grad_norm": 0.5078125, "learning_rate": 0.00019401750096414722, "loss": 1.0461, "step": 13915 }, { "epoch": 0.2, "grad_norm": 0.578125, "learning_rate": 0.00019400896838728618, "loss": 1.0111, "step": 13920 }, { "epoch": 0.2, "grad_norm": 0.578125, "learning_rate": 0.00019400042991783592, "loss": 0.9265, "step": 13925 }, { "epoch": 0.2, "grad_norm": 0.70703125, "learning_rate": 0.00019399188555633168, "loss": 1.1099, "step": 13930 }, { "epoch": 0.2, "grad_norm": 0.53515625, "learning_rate": 0.000193983335303309, "loss": 1.0102, "step": 13935 }, { "epoch": 0.2, "grad_norm": 0.6796875, "learning_rate": 0.0001939747791593038, "loss": 0.9348, "step": 13940 }, { "epoch": 0.2, "grad_norm": 0.609375, "learning_rate": 0.00019396621712485245, "loss": 0.846, "step": 13945 }, { "epoch": 0.2, "grad_norm": 0.59375, "learning_rate": 0.00019395764920049157, "loss": 0.994, "step": 13950 }, { "epoch": 0.2, "grad_norm": 0.609375, "learning_rate": 0.00019394907538675823, "loss": 1.009, "step": 13955 }, { "epoch": 0.2, "grad_norm": 0.54296875, "learning_rate": 0.00019394049568418985, "loss": 0.9882, "step": 13960 }, { "epoch": 0.2, "grad_norm": 0.50390625, "learning_rate": 0.0001939319100933242, "loss": 0.983, "step": 13965 }, { "epoch": 0.2, "grad_norm": 0.5390625, "learning_rate": 0.0001939233186146995, "loss": 0.9918, "step": 13970 }, { "epoch": 0.2, "grad_norm": 0.5703125, "learning_rate": 0.00019391472124885418, "loss": 0.9375, "step": 13975 }, { "epoch": 0.2, "grad_norm": 0.57421875, "learning_rate": 0.00019390611799632716, "loss": 0.9159, "step": 13980 }, { "epoch": 0.2, "grad_norm": 0.6328125, "learning_rate": 0.00019389750885765773, "loss": 1.0269, "step": 13985 }, { "epoch": 0.2, "grad_norm": 0.5859375, "learning_rate": 0.00019388889383338548, "loss": 0.9451, "step": 13990 }, { "epoch": 0.2, "grad_norm": 0.60546875, "learning_rate": 0.00019388027292405045, "loss": 1.1171, "step": 13995 }, { "epoch": 0.2, "grad_norm": 0.498046875, "learning_rate": 0.000193871646130193, "loss": 0.9076, "step": 14000 }, { "epoch": 0.2, "grad_norm": 0.53125, "learning_rate": 0.00019386301345235385, "loss": 0.9822, "step": 14005 }, { "epoch": 0.2, "grad_norm": 0.54296875, "learning_rate": 0.0001938543748910741, "loss": 1.1278, "step": 14010 }, { "epoch": 0.2, "grad_norm": 0.73046875, "learning_rate": 0.0001938457304468953, "loss": 1.0708, "step": 14015 }, { "epoch": 0.2, "grad_norm": 0.625, "learning_rate": 0.0001938370801203592, "loss": 0.9955, "step": 14020 }, { "epoch": 0.2, "grad_norm": 0.51953125, "learning_rate": 0.000193828423912008, "loss": 0.8778, "step": 14025 }, { "epoch": 0.2, "grad_norm": 0.61328125, "learning_rate": 0.0001938197618223844, "loss": 0.9365, "step": 14030 }, { "epoch": 0.2, "grad_norm": 0.55078125, "learning_rate": 0.00019381109385203124, "loss": 0.9629, "step": 14035 }, { "epoch": 0.2, "grad_norm": 0.640625, "learning_rate": 0.0001938024200014919, "loss": 0.9758, "step": 14040 }, { "epoch": 0.2, "grad_norm": 0.48828125, "learning_rate": 0.00019379374027131, "loss": 0.9081, "step": 14045 }, { "epoch": 0.2, "grad_norm": 0.64453125, "learning_rate": 0.00019378505466202968, "loss": 1.1199, "step": 14050 }, { "epoch": 0.2, "grad_norm": 0.57421875, "learning_rate": 0.0001937763631741953, "loss": 0.9182, "step": 14055 }, { "epoch": 0.2, "grad_norm": 0.55859375, "learning_rate": 0.0001937676658083517, "loss": 0.9741, "step": 14060 }, { "epoch": 0.2, "grad_norm": 0.59765625, "learning_rate": 0.00019375896256504399, "loss": 0.9724, "step": 14065 }, { "epoch": 0.2, "grad_norm": 0.51953125, "learning_rate": 0.00019375025344481772, "loss": 1.0913, "step": 14070 }, { "epoch": 0.2, "grad_norm": 0.53515625, "learning_rate": 0.0001937415384482188, "loss": 1.0246, "step": 14075 }, { "epoch": 0.2, "grad_norm": 0.61328125, "learning_rate": 0.00019373281757579348, "loss": 1.2072, "step": 14080 }, { "epoch": 0.2, "grad_norm": 0.61328125, "learning_rate": 0.0001937240908280884, "loss": 1.1545, "step": 14085 }, { "epoch": 0.2, "grad_norm": 0.63671875, "learning_rate": 0.00019371535820565056, "loss": 1.0993, "step": 14090 }, { "epoch": 0.2, "grad_norm": 0.55078125, "learning_rate": 0.00019370661970902737, "loss": 1.0449, "step": 14095 }, { "epoch": 0.2, "grad_norm": 0.62109375, "learning_rate": 0.0001936978753387665, "loss": 1.0995, "step": 14100 }, { "epoch": 0.2, "grad_norm": 0.51953125, "learning_rate": 0.0001936891250954161, "loss": 0.9807, "step": 14105 }, { "epoch": 0.2, "grad_norm": 0.609375, "learning_rate": 0.00019368036897952464, "loss": 1.0008, "step": 14110 }, { "epoch": 0.2, "grad_norm": 0.59765625, "learning_rate": 0.00019367160699164097, "loss": 0.99, "step": 14115 }, { "epoch": 0.2, "grad_norm": 0.59375, "learning_rate": 0.00019366283913231427, "loss": 1.0677, "step": 14120 }, { "epoch": 0.2, "grad_norm": 0.80078125, "learning_rate": 0.00019365406540209414, "loss": 1.0974, "step": 14125 }, { "epoch": 0.2, "grad_norm": 0.5859375, "learning_rate": 0.00019364528580153055, "loss": 0.7966, "step": 14130 }, { "epoch": 0.2, "grad_norm": 0.59765625, "learning_rate": 0.00019363650033117375, "loss": 0.8748, "step": 14135 }, { "epoch": 0.2, "grad_norm": 0.62109375, "learning_rate": 0.00019362770899157452, "loss": 0.9802, "step": 14140 }, { "epoch": 0.2, "grad_norm": 0.5546875, "learning_rate": 0.00019361891178328383, "loss": 1.0155, "step": 14145 }, { "epoch": 0.2, "grad_norm": 0.62890625, "learning_rate": 0.00019361010870685313, "loss": 1.0516, "step": 14150 }, { "epoch": 0.2, "grad_norm": 0.53515625, "learning_rate": 0.0001936012997628342, "loss": 0.9472, "step": 14155 }, { "epoch": 0.2, "grad_norm": 0.5859375, "learning_rate": 0.0001935924849517792, "loss": 0.8923, "step": 14160 }, { "epoch": 0.2, "grad_norm": 0.55078125, "learning_rate": 0.00019358366427424064, "loss": 0.9306, "step": 14165 }, { "epoch": 0.2, "grad_norm": 0.498046875, "learning_rate": 0.00019357483773077143, "loss": 0.9439, "step": 14170 }, { "epoch": 0.2, "grad_norm": 0.6171875, "learning_rate": 0.0001935660053219248, "loss": 0.9135, "step": 14175 }, { "epoch": 0.2, "grad_norm": 0.5, "learning_rate": 0.00019355716704825442, "loss": 0.9764, "step": 14180 }, { "epoch": 0.2, "grad_norm": 0.55078125, "learning_rate": 0.00019354832291031425, "loss": 0.8676, "step": 14185 }, { "epoch": 0.2, "grad_norm": 0.546875, "learning_rate": 0.0001935394729086587, "loss": 1.0203, "step": 14190 }, { "epoch": 0.2, "grad_norm": 0.55078125, "learning_rate": 0.00019353061704384237, "loss": 0.8991, "step": 14195 }, { "epoch": 0.2, "grad_norm": 0.57421875, "learning_rate": 0.00019352175531642049, "loss": 0.933, "step": 14200 }, { "epoch": 0.2, "grad_norm": 0.5390625, "learning_rate": 0.00019351288772694847, "loss": 0.9262, "step": 14205 }, { "epoch": 0.2, "grad_norm": 0.498046875, "learning_rate": 0.00019350401427598214, "loss": 0.9976, "step": 14210 }, { "epoch": 0.2, "grad_norm": 0.5, "learning_rate": 0.00019349513496407772, "loss": 0.8633, "step": 14215 }, { "epoch": 0.2, "grad_norm": 0.51953125, "learning_rate": 0.00019348624979179173, "loss": 1.0477, "step": 14220 }, { "epoch": 0.2, "grad_norm": 0.6328125, "learning_rate": 0.00019347735875968115, "loss": 1.0083, "step": 14225 }, { "epoch": 0.2, "grad_norm": 0.5078125, "learning_rate": 0.00019346846186830326, "loss": 0.9553, "step": 14230 }, { "epoch": 0.2, "grad_norm": 0.59375, "learning_rate": 0.00019345955911821572, "loss": 0.9713, "step": 14235 }, { "epoch": 0.2, "grad_norm": 0.51953125, "learning_rate": 0.00019345065050997657, "loss": 0.9801, "step": 14240 }, { "epoch": 0.2, "grad_norm": 0.6953125, "learning_rate": 0.0001934417360441442, "loss": 1.0721, "step": 14245 }, { "epoch": 0.2, "grad_norm": 0.55859375, "learning_rate": 0.00019343281572127742, "loss": 1.081, "step": 14250 }, { "epoch": 0.2, "grad_norm": 0.56640625, "learning_rate": 0.00019342388954193536, "loss": 0.8543, "step": 14255 }, { "epoch": 0.2, "grad_norm": 0.55859375, "learning_rate": 0.00019341495750667748, "loss": 0.878, "step": 14260 }, { "epoch": 0.2, "grad_norm": 0.61328125, "learning_rate": 0.00019340601961606365, "loss": 1.0285, "step": 14265 }, { "epoch": 0.2, "grad_norm": 0.5390625, "learning_rate": 0.00019339707587065415, "loss": 0.9378, "step": 14270 }, { "epoch": 0.2, "grad_norm": 0.57421875, "learning_rate": 0.00019338812627100958, "loss": 1.1077, "step": 14275 }, { "epoch": 0.2, "grad_norm": 0.51171875, "learning_rate": 0.0001933791708176909, "loss": 1.0227, "step": 14280 }, { "epoch": 0.2, "grad_norm": 0.6171875, "learning_rate": 0.00019337020951125942, "loss": 1.2616, "step": 14285 }, { "epoch": 0.2, "grad_norm": 0.56640625, "learning_rate": 0.00019336124235227686, "loss": 0.9189, "step": 14290 }, { "epoch": 0.21, "grad_norm": 0.5, "learning_rate": 0.0001933522693413053, "loss": 0.9835, "step": 14295 }, { "epoch": 0.21, "grad_norm": 0.59375, "learning_rate": 0.0001933432904789072, "loss": 1.0314, "step": 14300 }, { "epoch": 0.21, "grad_norm": 0.53515625, "learning_rate": 0.00019333430576564534, "loss": 1.0991, "step": 14305 }, { "epoch": 0.21, "grad_norm": 0.51953125, "learning_rate": 0.00019332531520208286, "loss": 1.0224, "step": 14310 }, { "epoch": 0.21, "grad_norm": 0.50390625, "learning_rate": 0.00019331631878878337, "loss": 0.9544, "step": 14315 }, { "epoch": 0.21, "grad_norm": 0.53125, "learning_rate": 0.00019330731652631073, "loss": 0.9824, "step": 14320 }, { "epoch": 0.21, "grad_norm": 0.55859375, "learning_rate": 0.00019329830841522923, "loss": 1.0508, "step": 14325 }, { "epoch": 0.21, "grad_norm": 0.60546875, "learning_rate": 0.00019328929445610351, "loss": 0.8179, "step": 14330 }, { "epoch": 0.21, "grad_norm": 0.490234375, "learning_rate": 0.00019328027464949853, "loss": 1.1035, "step": 14335 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019327124899597972, "loss": 0.9759, "step": 14340 }, { "epoch": 0.21, "grad_norm": 0.625, "learning_rate": 0.00019326221749611282, "loss": 1.1146, "step": 14345 }, { "epoch": 0.21, "grad_norm": 0.52734375, "learning_rate": 0.00019325318015046388, "loss": 0.9521, "step": 14350 }, { "epoch": 0.21, "grad_norm": 0.62109375, "learning_rate": 0.0001932441369595994, "loss": 0.9631, "step": 14355 }, { "epoch": 0.21, "grad_norm": 0.5625, "learning_rate": 0.0001932350879240862, "loss": 0.879, "step": 14360 }, { "epoch": 0.21, "grad_norm": 0.51171875, "learning_rate": 0.00019322603304449155, "loss": 0.8715, "step": 14365 }, { "epoch": 0.21, "grad_norm": 1.2421875, "learning_rate": 0.00019321697232138296, "loss": 0.9968, "step": 14370 }, { "epoch": 0.21, "grad_norm": 0.5546875, "learning_rate": 0.00019320790575532836, "loss": 0.8983, "step": 14375 }, { "epoch": 0.21, "grad_norm": 0.5625, "learning_rate": 0.0001931988333468961, "loss": 0.9373, "step": 14380 }, { "epoch": 0.21, "grad_norm": 0.5078125, "learning_rate": 0.0001931897550966548, "loss": 0.9609, "step": 14385 }, { "epoch": 0.21, "grad_norm": 0.6328125, "learning_rate": 0.0001931806710051735, "loss": 0.8751, "step": 14390 }, { "epoch": 0.21, "grad_norm": 0.56640625, "learning_rate": 0.00019317158107302166, "loss": 0.9652, "step": 14395 }, { "epoch": 0.21, "grad_norm": 0.53515625, "learning_rate": 0.000193162485300769, "loss": 0.9237, "step": 14400 }, { "epoch": 0.21, "grad_norm": 0.5078125, "learning_rate": 0.00019315338368898564, "loss": 0.932, "step": 14405 }, { "epoch": 0.21, "grad_norm": 0.59375, "learning_rate": 0.0001931442762382421, "loss": 0.9976, "step": 14410 }, { "epoch": 0.21, "grad_norm": 0.5859375, "learning_rate": 0.00019313516294910924, "loss": 0.9154, "step": 14415 }, { "epoch": 0.21, "grad_norm": 0.55078125, "learning_rate": 0.0001931260438221583, "loss": 1.094, "step": 14420 }, { "epoch": 0.21, "grad_norm": 0.70703125, "learning_rate": 0.00019311691885796087, "loss": 1.0531, "step": 14425 }, { "epoch": 0.21, "grad_norm": 0.578125, "learning_rate": 0.00019310778805708888, "loss": 1.0205, "step": 14430 }, { "epoch": 0.21, "grad_norm": 0.56640625, "learning_rate": 0.00019309865142011474, "loss": 1.0082, "step": 14435 }, { "epoch": 0.21, "grad_norm": 0.6171875, "learning_rate": 0.0001930895089476111, "loss": 1.0266, "step": 14440 }, { "epoch": 0.21, "grad_norm": 0.52734375, "learning_rate": 0.000193080360640151, "loss": 0.9064, "step": 14445 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.0001930712064983079, "loss": 0.8931, "step": 14450 }, { "epoch": 0.21, "grad_norm": 0.5546875, "learning_rate": 0.00019306204652265558, "loss": 0.907, "step": 14455 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019305288071376817, "loss": 0.9252, "step": 14460 }, { "epoch": 0.21, "grad_norm": 0.85546875, "learning_rate": 0.0001930437090722202, "loss": 0.946, "step": 14465 }, { "epoch": 0.21, "grad_norm": 0.61328125, "learning_rate": 0.00019303453159858665, "loss": 1.1208, "step": 14470 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019302534829344266, "loss": 0.9683, "step": 14475 }, { "epoch": 0.21, "grad_norm": 0.5703125, "learning_rate": 0.0001930161591573639, "loss": 0.8724, "step": 14480 }, { "epoch": 0.21, "grad_norm": 0.5390625, "learning_rate": 0.0001930069641909263, "loss": 1.0984, "step": 14485 }, { "epoch": 0.21, "grad_norm": 0.55859375, "learning_rate": 0.00019299776339470632, "loss": 0.9404, "step": 14490 }, { "epoch": 0.21, "grad_norm": 0.59375, "learning_rate": 0.0001929885567692806, "loss": 0.9694, "step": 14495 }, { "epoch": 0.21, "grad_norm": 0.51953125, "learning_rate": 0.00019297934431522623, "loss": 1.0963, "step": 14500 }, { "epoch": 0.21, "grad_norm": 0.6015625, "learning_rate": 0.00019297012603312067, "loss": 0.9967, "step": 14505 }, { "epoch": 0.21, "grad_norm": 0.578125, "learning_rate": 0.0001929609019235417, "loss": 0.8824, "step": 14510 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019295167198706757, "loss": 0.9929, "step": 14515 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019294243622427674, "loss": 1.0092, "step": 14520 }, { "epoch": 0.21, "grad_norm": 0.546875, "learning_rate": 0.00019293319463574817, "loss": 1.0737, "step": 14525 }, { "epoch": 0.21, "grad_norm": 0.546875, "learning_rate": 0.0001929239472220611, "loss": 0.932, "step": 14530 }, { "epoch": 0.21, "grad_norm": 0.61328125, "learning_rate": 0.00019291469398379524, "loss": 0.906, "step": 14535 }, { "epoch": 0.21, "grad_norm": 1.046875, "learning_rate": 0.00019290543492153045, "loss": 1.0696, "step": 14540 }, { "epoch": 0.21, "grad_norm": 0.5703125, "learning_rate": 0.0001928961700358473, "loss": 0.8614, "step": 14545 }, { "epoch": 0.21, "grad_norm": 0.5625, "learning_rate": 0.00019288689932732634, "loss": 1.0545, "step": 14550 }, { "epoch": 0.21, "grad_norm": 0.640625, "learning_rate": 0.00019287762279654872, "loss": 0.9849, "step": 14555 }, { "epoch": 0.21, "grad_norm": 0.55078125, "learning_rate": 0.00019286834044409597, "loss": 0.9447, "step": 14560 }, { "epoch": 0.21, "grad_norm": 0.56640625, "learning_rate": 0.00019285905227054985, "loss": 0.9369, "step": 14565 }, { "epoch": 0.21, "grad_norm": 0.578125, "learning_rate": 0.00019284975827649258, "loss": 0.9404, "step": 14570 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019284045846250673, "loss": 0.899, "step": 14575 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.0001928311528291752, "loss": 1.0397, "step": 14580 }, { "epoch": 0.21, "grad_norm": 0.66796875, "learning_rate": 0.0001928218413770813, "loss": 1.0044, "step": 14585 }, { "epoch": 0.21, "grad_norm": 0.51171875, "learning_rate": 0.00019281252410680862, "loss": 0.9415, "step": 14590 }, { "epoch": 0.21, "grad_norm": 0.6171875, "learning_rate": 0.00019280320101894128, "loss": 0.9335, "step": 14595 }, { "epoch": 0.21, "grad_norm": 0.54296875, "learning_rate": 0.00019279387211406358, "loss": 0.8984, "step": 14600 }, { "epoch": 0.21, "grad_norm": 0.53125, "learning_rate": 0.00019278453739276027, "loss": 1.1, "step": 14605 }, { "epoch": 0.21, "grad_norm": 0.58984375, "learning_rate": 0.0001927751968556165, "loss": 1.0613, "step": 14610 }, { "epoch": 0.21, "grad_norm": 0.61328125, "learning_rate": 0.00019276585050321775, "loss": 1.1709, "step": 14615 }, { "epoch": 0.21, "grad_norm": 0.71875, "learning_rate": 0.00019275649833614983, "loss": 0.8661, "step": 14620 }, { "epoch": 0.21, "grad_norm": 0.61328125, "learning_rate": 0.00019274714035499895, "loss": 0.8943, "step": 14625 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019273777656035168, "loss": 0.9973, "step": 14630 }, { "epoch": 0.21, "grad_norm": 0.625, "learning_rate": 0.00019272840695279495, "loss": 0.9958, "step": 14635 }, { "epoch": 0.21, "grad_norm": 0.65234375, "learning_rate": 0.00019271903153291606, "loss": 0.953, "step": 14640 }, { "epoch": 0.21, "grad_norm": 1.1484375, "learning_rate": 0.0001927096503013027, "loss": 0.8613, "step": 14645 }, { "epoch": 0.21, "grad_norm": 0.5234375, "learning_rate": 0.00019270026325854284, "loss": 0.8887, "step": 14650 }, { "epoch": 0.21, "grad_norm": 0.60546875, "learning_rate": 0.00019269087040522495, "loss": 0.9941, "step": 14655 }, { "epoch": 0.21, "grad_norm": 0.50390625, "learning_rate": 0.00019268147174193766, "loss": 0.8934, "step": 14660 }, { "epoch": 0.21, "grad_norm": 0.5859375, "learning_rate": 0.00019267206726927025, "loss": 0.959, "step": 14665 }, { "epoch": 0.21, "grad_norm": 0.54296875, "learning_rate": 0.00019266265698781205, "loss": 0.9928, "step": 14670 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019265324089815303, "loss": 0.8441, "step": 14675 }, { "epoch": 0.21, "grad_norm": 0.6640625, "learning_rate": 0.00019264381900088333, "loss": 0.9877, "step": 14680 }, { "epoch": 0.21, "grad_norm": 0.5390625, "learning_rate": 0.0001926343912965935, "loss": 0.8847, "step": 14685 }, { "epoch": 0.21, "grad_norm": 0.609375, "learning_rate": 0.00019262495778587458, "loss": 1.1266, "step": 14690 }, { "epoch": 0.21, "grad_norm": 0.53515625, "learning_rate": 0.00019261551846931778, "loss": 1.1062, "step": 14695 }, { "epoch": 0.21, "grad_norm": 0.5390625, "learning_rate": 0.00019260607334751483, "loss": 1.1259, "step": 14700 }, { "epoch": 0.21, "grad_norm": 0.62890625, "learning_rate": 0.0001925966224210577, "loss": 0.9801, "step": 14705 }, { "epoch": 0.21, "grad_norm": 0.55859375, "learning_rate": 0.00019258716569053885, "loss": 0.9701, "step": 14710 }, { "epoch": 0.21, "grad_norm": 0.625, "learning_rate": 0.00019257770315655098, "loss": 1.0589, "step": 14715 }, { "epoch": 0.21, "grad_norm": 0.5546875, "learning_rate": 0.00019256823481968727, "loss": 0.9431, "step": 14720 }, { "epoch": 0.21, "grad_norm": 0.59765625, "learning_rate": 0.00019255876068054116, "loss": 0.8557, "step": 14725 }, { "epoch": 0.21, "grad_norm": 0.640625, "learning_rate": 0.0001925492807397065, "loss": 1.1344, "step": 14730 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019253979499777755, "loss": 0.9122, "step": 14735 }, { "epoch": 0.21, "grad_norm": 0.58984375, "learning_rate": 0.00019253030345534882, "loss": 1.0495, "step": 14740 }, { "epoch": 0.21, "grad_norm": 0.59375, "learning_rate": 0.0001925208061130153, "loss": 0.97, "step": 14745 }, { "epoch": 0.21, "grad_norm": 0.5703125, "learning_rate": 0.00019251130297137227, "loss": 0.9526, "step": 14750 }, { "epoch": 0.21, "grad_norm": 0.56640625, "learning_rate": 0.00019250179403101542, "loss": 1.0612, "step": 14755 }, { "epoch": 0.21, "grad_norm": 0.5546875, "learning_rate": 0.00019249227929254078, "loss": 0.9857, "step": 14760 }, { "epoch": 0.21, "grad_norm": 0.5390625, "learning_rate": 0.00019248275875654474, "loss": 0.9431, "step": 14765 }, { "epoch": 0.21, "grad_norm": 0.46875, "learning_rate": 0.00019247323242362402, "loss": 0.9341, "step": 14770 }, { "epoch": 0.21, "grad_norm": 0.52734375, "learning_rate": 0.0001924637002943758, "loss": 0.9805, "step": 14775 }, { "epoch": 0.21, "grad_norm": 0.5625, "learning_rate": 0.00019245416236939752, "loss": 1.0725, "step": 14780 }, { "epoch": 0.21, "grad_norm": 0.5859375, "learning_rate": 0.00019244461864928707, "loss": 1.0164, "step": 14785 }, { "epoch": 0.21, "grad_norm": 0.62890625, "learning_rate": 0.00019243506913464261, "loss": 1.2681, "step": 14790 }, { "epoch": 0.21, "grad_norm": 0.61328125, "learning_rate": 0.00019242551382606275, "loss": 1.1246, "step": 14795 }, { "epoch": 0.21, "grad_norm": 0.609375, "learning_rate": 0.00019241595272414643, "loss": 1.091, "step": 14800 }, { "epoch": 0.21, "grad_norm": 0.515625, "learning_rate": 0.00019240638582949292, "loss": 0.8939, "step": 14805 }, { "epoch": 0.21, "grad_norm": 0.765625, "learning_rate": 0.00019239681314270194, "loss": 1.2496, "step": 14810 }, { "epoch": 0.21, "grad_norm": 0.53125, "learning_rate": 0.00019238723466437346, "loss": 0.9049, "step": 14815 }, { "epoch": 0.21, "grad_norm": 0.6015625, "learning_rate": 0.0001923776503951079, "loss": 0.8829, "step": 14820 }, { "epoch": 0.21, "grad_norm": 0.53515625, "learning_rate": 0.00019236806033550599, "loss": 0.901, "step": 14825 }, { "epoch": 0.21, "grad_norm": 0.55859375, "learning_rate": 0.00019235846448616885, "loss": 1.0361, "step": 14830 }, { "epoch": 0.21, "grad_norm": 0.62109375, "learning_rate": 0.00019234886284769798, "loss": 0.9696, "step": 14835 }, { "epoch": 0.21, "grad_norm": 0.5859375, "learning_rate": 0.00019233925542069523, "loss": 0.9323, "step": 14840 }, { "epoch": 0.21, "grad_norm": 0.6015625, "learning_rate": 0.00019232964220576275, "loss": 0.9367, "step": 14845 }, { "epoch": 0.21, "grad_norm": 0.6171875, "learning_rate": 0.00019232002320350317, "loss": 1.1198, "step": 14850 }, { "epoch": 0.21, "grad_norm": 0.5546875, "learning_rate": 0.00019231039841451937, "loss": 0.9123, "step": 14855 }, { "epoch": 0.21, "grad_norm": 0.58203125, "learning_rate": 0.00019230076783941468, "loss": 0.8982, "step": 14860 }, { "epoch": 0.21, "grad_norm": 0.47265625, "learning_rate": 0.00019229113147879273, "loss": 0.9017, "step": 14865 }, { "epoch": 0.21, "grad_norm": 0.57421875, "learning_rate": 0.00019228148933325755, "loss": 0.9551, "step": 14870 }, { "epoch": 0.21, "grad_norm": 0.59765625, "learning_rate": 0.0001922718414034135, "loss": 0.8955, "step": 14875 }, { "epoch": 0.21, "grad_norm": 0.44921875, "learning_rate": 0.0001922621876898654, "loss": 0.9203, "step": 14880 }, { "epoch": 0.21, "grad_norm": 0.54296875, "learning_rate": 0.00019225252819321826, "loss": 0.9607, "step": 14885 }, { "epoch": 0.21, "grad_norm": 0.57421875, "learning_rate": 0.0001922428629140776, "loss": 0.8713, "step": 14890 }, { "epoch": 0.21, "grad_norm": 0.62890625, "learning_rate": 0.0001922331918530492, "loss": 0.9595, "step": 14895 }, { "epoch": 0.21, "grad_norm": 0.5546875, "learning_rate": 0.00019222351501073934, "loss": 0.9485, "step": 14900 }, { "epoch": 0.21, "grad_norm": 0.5625, "learning_rate": 0.00019221383238775448, "loss": 0.9874, "step": 14905 }, { "epoch": 0.21, "grad_norm": 0.55859375, "learning_rate": 0.00019220414398470162, "loss": 0.9396, "step": 14910 }, { "epoch": 0.21, "grad_norm": 0.515625, "learning_rate": 0.00019219444980218798, "loss": 0.9336, "step": 14915 }, { "epoch": 0.21, "grad_norm": 0.703125, "learning_rate": 0.00019218474984082122, "loss": 0.9949, "step": 14920 }, { "epoch": 0.21, "grad_norm": 0.55859375, "learning_rate": 0.00019217504410120936, "loss": 1.0587, "step": 14925 }, { "epoch": 0.21, "grad_norm": 0.57421875, "learning_rate": 0.00019216533258396078, "loss": 1.0473, "step": 14930 }, { "epoch": 0.21, "grad_norm": 0.5390625, "learning_rate": 0.00019215561528968414, "loss": 0.9306, "step": 14935 }, { "epoch": 0.21, "grad_norm": 0.65625, "learning_rate": 0.00019214589221898862, "loss": 0.8904, "step": 14940 }, { "epoch": 0.21, "grad_norm": 0.55859375, "learning_rate": 0.00019213616337248362, "loss": 0.9354, "step": 14945 }, { "epoch": 0.21, "grad_norm": 0.62109375, "learning_rate": 0.00019212642875077895, "loss": 0.9908, "step": 14950 }, { "epoch": 0.21, "grad_norm": 0.453125, "learning_rate": 0.0001921166883544848, "loss": 1.0877, "step": 14955 }, { "epoch": 0.21, "grad_norm": 0.625, "learning_rate": 0.00019210694218421175, "loss": 1.0058, "step": 14960 }, { "epoch": 0.21, "grad_norm": 0.55078125, "learning_rate": 0.00019209719024057063, "loss": 0.8071, "step": 14965 }, { "epoch": 0.21, "grad_norm": 0.51953125, "learning_rate": 0.00019208743252417275, "loss": 0.9298, "step": 14970 }, { "epoch": 0.21, "grad_norm": 0.53125, "learning_rate": 0.0001920776690356297, "loss": 0.853, "step": 14975 }, { "epoch": 0.21, "grad_norm": 0.5625, "learning_rate": 0.0001920678997755535, "loss": 1.0874, "step": 14980 }, { "epoch": 0.21, "grad_norm": 0.6015625, "learning_rate": 0.00019205812474455648, "loss": 1.062, "step": 14985 }, { "epoch": 0.22, "grad_norm": 0.58203125, "learning_rate": 0.00019204834394325135, "loss": 1.0068, "step": 14990 }, { "epoch": 0.22, "grad_norm": 0.57421875, "learning_rate": 0.00019203855737225122, "loss": 0.8639, "step": 14995 }, { "epoch": 0.22, "grad_norm": 0.671875, "learning_rate": 0.00019202876503216946, "loss": 1.007, "step": 15000 }, { "epoch": 0.22, "grad_norm": 0.58203125, "learning_rate": 0.0001920189669236199, "loss": 1.0242, "step": 15005 }, { "epoch": 0.22, "grad_norm": 0.5234375, "learning_rate": 0.00019200916304721672, "loss": 1.0116, "step": 15010 }, { "epoch": 0.22, "grad_norm": 0.58203125, "learning_rate": 0.00019199935340357438, "loss": 1.0486, "step": 15015 }, { "epoch": 0.22, "grad_norm": 0.66015625, "learning_rate": 0.0001919895379933078, "loss": 1.1622, "step": 15020 }, { "epoch": 0.22, "grad_norm": 0.52734375, "learning_rate": 0.0001919797168170322, "loss": 1.0576, "step": 15025 }, { "epoch": 0.22, "grad_norm": 0.5390625, "learning_rate": 0.0001919698898753632, "loss": 0.9873, "step": 15030 }, { "epoch": 0.22, "grad_norm": 0.6015625, "learning_rate": 0.00019196005716891676, "loss": 1.0213, "step": 15035 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019195021869830922, "loss": 1.0241, "step": 15040 }, { "epoch": 0.22, "grad_norm": 0.62890625, "learning_rate": 0.00019194037446415723, "loss": 0.8688, "step": 15045 }, { "epoch": 0.22, "grad_norm": 0.55859375, "learning_rate": 0.00019193052446707785, "loss": 0.8734, "step": 15050 }, { "epoch": 0.22, "grad_norm": 0.5546875, "learning_rate": 0.00019192066870768853, "loss": 0.8426, "step": 15055 }, { "epoch": 0.22, "grad_norm": 0.55078125, "learning_rate": 0.00019191080718660697, "loss": 0.949, "step": 15060 }, { "epoch": 0.22, "grad_norm": 0.5390625, "learning_rate": 0.00019190093990445134, "loss": 0.9425, "step": 15065 }, { "epoch": 0.22, "grad_norm": 0.5703125, "learning_rate": 0.00019189106686184014, "loss": 0.8756, "step": 15070 }, { "epoch": 0.22, "grad_norm": 0.5859375, "learning_rate": 0.00019188118805939222, "loss": 0.9335, "step": 15075 }, { "epoch": 0.22, "grad_norm": 0.546875, "learning_rate": 0.0001918713034977268, "loss": 0.9527, "step": 15080 }, { "epoch": 0.22, "grad_norm": 0.66796875, "learning_rate": 0.00019186141317746342, "loss": 0.9751, "step": 15085 }, { "epoch": 0.22, "grad_norm": 0.5390625, "learning_rate": 0.00019185151709922205, "loss": 0.9198, "step": 15090 }, { "epoch": 0.22, "grad_norm": 0.5703125, "learning_rate": 0.00019184161526362298, "loss": 0.8982, "step": 15095 }, { "epoch": 0.22, "grad_norm": 0.435546875, "learning_rate": 0.00019183170767128686, "loss": 0.9553, "step": 15100 }, { "epoch": 0.22, "grad_norm": 0.671875, "learning_rate": 0.00019182179432283473, "loss": 0.9782, "step": 15105 }, { "epoch": 0.22, "grad_norm": 0.5078125, "learning_rate": 0.00019181187521888796, "loss": 0.8864, "step": 15110 }, { "epoch": 0.22, "grad_norm": 0.515625, "learning_rate": 0.00019180195036006825, "loss": 0.9765, "step": 15115 }, { "epoch": 0.22, "grad_norm": 0.61328125, "learning_rate": 0.00019179201974699775, "loss": 0.9345, "step": 15120 }, { "epoch": 0.22, "grad_norm": 0.5546875, "learning_rate": 0.00019178208338029894, "loss": 1.0356, "step": 15125 }, { "epoch": 0.22, "grad_norm": 0.625, "learning_rate": 0.00019177214126059458, "loss": 1.0129, "step": 15130 }, { "epoch": 0.22, "grad_norm": 0.57421875, "learning_rate": 0.0001917621933885079, "loss": 0.9623, "step": 15135 }, { "epoch": 0.22, "grad_norm": 0.53125, "learning_rate": 0.00019175223976466242, "loss": 0.8772, "step": 15140 }, { "epoch": 0.22, "grad_norm": 0.53515625, "learning_rate": 0.00019174228038968205, "loss": 1.0285, "step": 15145 }, { "epoch": 0.22, "grad_norm": 0.4296875, "learning_rate": 0.0001917323152641911, "loss": 1.0615, "step": 15150 }, { "epoch": 0.22, "grad_norm": 0.5546875, "learning_rate": 0.00019172234438881412, "loss": 0.952, "step": 15155 }, { "epoch": 0.22, "grad_norm": 0.68359375, "learning_rate": 0.00019171236776417612, "loss": 1.0457, "step": 15160 }, { "epoch": 0.22, "grad_norm": 0.49609375, "learning_rate": 0.0001917023853909025, "loss": 1.1488, "step": 15165 }, { "epoch": 0.22, "grad_norm": 0.5625, "learning_rate": 0.00019169239726961887, "loss": 0.9128, "step": 15170 }, { "epoch": 0.22, "grad_norm": 0.63671875, "learning_rate": 0.0001916824034009514, "loss": 1.0244, "step": 15175 }, { "epoch": 0.22, "grad_norm": 0.6328125, "learning_rate": 0.00019167240378552644, "loss": 0.9686, "step": 15180 }, { "epoch": 0.22, "grad_norm": 0.5546875, "learning_rate": 0.0001916623984239708, "loss": 0.9202, "step": 15185 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019165238731691164, "loss": 0.8916, "step": 15190 }, { "epoch": 0.22, "grad_norm": 0.578125, "learning_rate": 0.00019164237046497646, "loss": 0.8949, "step": 15195 }, { "epoch": 0.22, "grad_norm": 0.53515625, "learning_rate": 0.00019163234786879314, "loss": 0.962, "step": 15200 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019162231952898987, "loss": 0.85, "step": 15205 }, { "epoch": 0.22, "grad_norm": 0.65625, "learning_rate": 0.0001916122854461953, "loss": 0.9933, "step": 15210 }, { "epoch": 0.22, "grad_norm": 0.59375, "learning_rate": 0.00019160224562103832, "loss": 0.9851, "step": 15215 }, { "epoch": 0.22, "grad_norm": 0.58984375, "learning_rate": 0.00019159220005414825, "loss": 0.9937, "step": 15220 }, { "epoch": 0.22, "grad_norm": 0.52734375, "learning_rate": 0.00019158214874615475, "loss": 0.9975, "step": 15225 }, { "epoch": 0.22, "grad_norm": 0.69140625, "learning_rate": 0.0001915720916976879, "loss": 1.0316, "step": 15230 }, { "epoch": 0.22, "grad_norm": 0.9140625, "learning_rate": 0.00019156202890937803, "loss": 1.1671, "step": 15235 }, { "epoch": 0.22, "grad_norm": 0.63671875, "learning_rate": 0.00019155196038185592, "loss": 1.0737, "step": 15240 }, { "epoch": 0.22, "grad_norm": 0.6171875, "learning_rate": 0.00019154188611575265, "loss": 1.0276, "step": 15245 }, { "epoch": 0.22, "grad_norm": 0.5546875, "learning_rate": 0.0001915318061116997, "loss": 0.9565, "step": 15250 }, { "epoch": 0.22, "grad_norm": 0.625, "learning_rate": 0.00019152172037032892, "loss": 1.0703, "step": 15255 }, { "epoch": 0.22, "grad_norm": 0.58203125, "learning_rate": 0.00019151162889227246, "loss": 0.8682, "step": 15260 }, { "epoch": 0.22, "grad_norm": 0.59375, "learning_rate": 0.00019150153167816288, "loss": 1.1064, "step": 15265 }, { "epoch": 0.22, "grad_norm": 0.51171875, "learning_rate": 0.0001914914287286331, "loss": 0.9682, "step": 15270 }, { "epoch": 0.22, "grad_norm": 0.671875, "learning_rate": 0.00019148132004431632, "loss": 0.9411, "step": 15275 }, { "epoch": 0.22, "grad_norm": 0.6171875, "learning_rate": 0.00019147120562584624, "loss": 0.8581, "step": 15280 }, { "epoch": 0.22, "grad_norm": 0.609375, "learning_rate": 0.0001914610854738568, "loss": 0.9584, "step": 15285 }, { "epoch": 0.22, "grad_norm": 0.56640625, "learning_rate": 0.00019145095958898235, "loss": 0.8806, "step": 15290 }, { "epoch": 0.22, "grad_norm": 0.5625, "learning_rate": 0.00019144082797185763, "loss": 1.026, "step": 15295 }, { "epoch": 0.22, "grad_norm": 0.578125, "learning_rate": 0.00019143069062311767, "loss": 0.9719, "step": 15300 }, { "epoch": 0.22, "grad_norm": 0.60546875, "learning_rate": 0.0001914205475433979, "loss": 0.891, "step": 15305 }, { "epoch": 0.22, "grad_norm": 0.482421875, "learning_rate": 0.00019141039873333404, "loss": 0.9281, "step": 15310 }, { "epoch": 0.22, "grad_norm": 0.52734375, "learning_rate": 0.00019140024419356233, "loss": 1.0256, "step": 15315 }, { "epoch": 0.22, "grad_norm": 0.546875, "learning_rate": 0.00019139008392471917, "loss": 1.1613, "step": 15320 }, { "epoch": 0.22, "grad_norm": 0.5, "learning_rate": 0.0001913799179274415, "loss": 0.969, "step": 15325 }, { "epoch": 0.22, "grad_norm": 0.65234375, "learning_rate": 0.00019136974620236652, "loss": 0.8954, "step": 15330 }, { "epoch": 0.22, "grad_norm": 0.68359375, "learning_rate": 0.00019135956875013176, "loss": 1.04, "step": 15335 }, { "epoch": 0.22, "grad_norm": 0.58203125, "learning_rate": 0.00019134938557137515, "loss": 1.0477, "step": 15340 }, { "epoch": 0.22, "grad_norm": 0.55859375, "learning_rate": 0.00019133919666673507, "loss": 0.9627, "step": 15345 }, { "epoch": 0.22, "grad_norm": 0.6171875, "learning_rate": 0.00019132900203685006, "loss": 0.8964, "step": 15350 }, { "epoch": 0.22, "grad_norm": 0.5234375, "learning_rate": 0.00019131880168235922, "loss": 0.9475, "step": 15355 }, { "epoch": 0.22, "grad_norm": 0.58203125, "learning_rate": 0.0001913085956039019, "loss": 1.0046, "step": 15360 }, { "epoch": 0.22, "grad_norm": 0.54296875, "learning_rate": 0.0001912983838021178, "loss": 0.8238, "step": 15365 }, { "epoch": 0.22, "grad_norm": 0.546875, "learning_rate": 0.00019128816627764702, "loss": 0.8787, "step": 15370 }, { "epoch": 0.22, "grad_norm": 0.625, "learning_rate": 0.00019127794303113002, "loss": 0.9763, "step": 15375 }, { "epoch": 0.22, "grad_norm": 0.5234375, "learning_rate": 0.00019126771406320758, "loss": 0.8963, "step": 15380 }, { "epoch": 0.22, "grad_norm": 0.59375, "learning_rate": 0.0001912574793745209, "loss": 0.9862, "step": 15385 }, { "epoch": 0.22, "grad_norm": 0.55078125, "learning_rate": 0.00019124723896571147, "loss": 1.0727, "step": 15390 }, { "epoch": 0.22, "grad_norm": 0.56640625, "learning_rate": 0.00019123699283742117, "loss": 0.9808, "step": 15395 }, { "epoch": 0.22, "grad_norm": 0.671875, "learning_rate": 0.00019122674099029225, "loss": 1.028, "step": 15400 }, { "epoch": 0.22, "grad_norm": 0.56640625, "learning_rate": 0.00019121648342496731, "loss": 0.9697, "step": 15405 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019120622014208932, "loss": 0.8534, "step": 15410 }, { "epoch": 0.22, "grad_norm": 0.7890625, "learning_rate": 0.00019119595114230154, "loss": 1.0241, "step": 15415 }, { "epoch": 0.22, "grad_norm": 0.734375, "learning_rate": 0.0001911856764262477, "loss": 1.088, "step": 15420 }, { "epoch": 0.22, "grad_norm": 0.65625, "learning_rate": 0.00019117539599457182, "loss": 1.0458, "step": 15425 }, { "epoch": 0.22, "grad_norm": 0.609375, "learning_rate": 0.00019116510984791825, "loss": 0.9081, "step": 15430 }, { "epoch": 0.22, "grad_norm": 0.56640625, "learning_rate": 0.0001911548179869318, "loss": 0.9618, "step": 15435 }, { "epoch": 0.22, "grad_norm": 0.58984375, "learning_rate": 0.0001911445204122575, "loss": 0.9968, "step": 15440 }, { "epoch": 0.22, "grad_norm": 0.49609375, "learning_rate": 0.0001911342171245409, "loss": 0.9958, "step": 15445 }, { "epoch": 0.22, "grad_norm": 0.5703125, "learning_rate": 0.00019112390812442773, "loss": 0.984, "step": 15450 }, { "epoch": 0.22, "grad_norm": 0.6015625, "learning_rate": 0.00019111359341256426, "loss": 1.0099, "step": 15455 }, { "epoch": 0.22, "grad_norm": 0.62890625, "learning_rate": 0.00019110327298959697, "loss": 0.879, "step": 15460 }, { "epoch": 0.22, "grad_norm": 0.5234375, "learning_rate": 0.00019109294685617275, "loss": 1.2221, "step": 15465 }, { "epoch": 0.22, "grad_norm": 0.53125, "learning_rate": 0.0001910826150129389, "loss": 0.8186, "step": 15470 }, { "epoch": 0.22, "grad_norm": 0.58203125, "learning_rate": 0.00019107227746054302, "loss": 0.8354, "step": 15475 }, { "epoch": 0.22, "grad_norm": 0.484375, "learning_rate": 0.00019106193419963304, "loss": 1.0459, "step": 15480 }, { "epoch": 0.22, "grad_norm": 0.62890625, "learning_rate": 0.00019105158523085734, "loss": 1.156, "step": 15485 }, { "epoch": 0.22, "grad_norm": 0.54296875, "learning_rate": 0.00019104123055486454, "loss": 0.9457, "step": 15490 }, { "epoch": 0.22, "grad_norm": 0.625, "learning_rate": 0.00019103087017230375, "loss": 0.8631, "step": 15495 }, { "epoch": 0.22, "grad_norm": 0.58984375, "learning_rate": 0.00019102050408382432, "loss": 1.0076, "step": 15500 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019101013229007606, "loss": 0.8997, "step": 15505 }, { "epoch": 0.22, "grad_norm": 0.5859375, "learning_rate": 0.00019099975479170903, "loss": 1.0633, "step": 15510 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019098937158937374, "loss": 0.8956, "step": 15515 }, { "epoch": 0.22, "grad_norm": 0.62109375, "learning_rate": 0.000190978982683721, "loss": 1.0573, "step": 15520 }, { "epoch": 0.22, "grad_norm": 0.55859375, "learning_rate": 0.000190968588075402, "loss": 1.0133, "step": 15525 }, { "epoch": 0.22, "grad_norm": 0.546875, "learning_rate": 0.00019095818776506836, "loss": 0.9309, "step": 15530 }, { "epoch": 0.22, "grad_norm": 0.6484375, "learning_rate": 0.00019094778175337185, "loss": 1.1443, "step": 15535 }, { "epoch": 0.22, "grad_norm": 0.65625, "learning_rate": 0.00019093737004096485, "loss": 1.0728, "step": 15540 }, { "epoch": 0.22, "grad_norm": 0.625, "learning_rate": 0.00019092695262849988, "loss": 0.9081, "step": 15545 }, { "epoch": 0.22, "grad_norm": 0.71484375, "learning_rate": 0.00019091652951662997, "loss": 0.9224, "step": 15550 }, { "epoch": 0.22, "grad_norm": 0.62109375, "learning_rate": 0.00019090610070600844, "loss": 0.9895, "step": 15555 }, { "epoch": 0.22, "grad_norm": 0.5390625, "learning_rate": 0.00019089566619728902, "loss": 0.8279, "step": 15560 }, { "epoch": 0.22, "grad_norm": 0.58984375, "learning_rate": 0.0001908852259911257, "loss": 0.8762, "step": 15565 }, { "epoch": 0.22, "grad_norm": 0.53125, "learning_rate": 0.0001908747800881729, "loss": 0.9472, "step": 15570 }, { "epoch": 0.22, "grad_norm": 0.5078125, "learning_rate": 0.0001908643284890854, "loss": 0.9305, "step": 15575 }, { "epoch": 0.22, "grad_norm": 0.59765625, "learning_rate": 0.0001908538711945183, "loss": 0.9279, "step": 15580 }, { "epoch": 0.22, "grad_norm": 0.609375, "learning_rate": 0.00019084340820512706, "loss": 1.154, "step": 15585 }, { "epoch": 0.22, "grad_norm": 0.56640625, "learning_rate": 0.00019083293952156755, "loss": 1.019, "step": 15590 }, { "epoch": 0.22, "grad_norm": 0.59765625, "learning_rate": 0.00019082246514449594, "loss": 1.0037, "step": 15595 }, { "epoch": 0.22, "grad_norm": 0.671875, "learning_rate": 0.00019081198507456878, "loss": 0.8323, "step": 15600 }, { "epoch": 0.22, "grad_norm": 0.55078125, "learning_rate": 0.00019080149931244297, "loss": 0.9139, "step": 15605 }, { "epoch": 0.22, "grad_norm": 0.5546875, "learning_rate": 0.00019079100785877577, "loss": 0.7942, "step": 15610 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019078051071422478, "loss": 0.9099, "step": 15615 }, { "epoch": 0.22, "grad_norm": 0.55859375, "learning_rate": 0.000190770007879448, "loss": 1.0347, "step": 15620 }, { "epoch": 0.22, "grad_norm": 0.5234375, "learning_rate": 0.00019075949935510374, "loss": 0.9075, "step": 15625 }, { "epoch": 0.22, "grad_norm": 0.451171875, "learning_rate": 0.00019074898514185072, "loss": 0.8973, "step": 15630 }, { "epoch": 0.22, "grad_norm": 0.6640625, "learning_rate": 0.00019073846524034793, "loss": 1.0093, "step": 15635 }, { "epoch": 0.22, "grad_norm": 0.6171875, "learning_rate": 0.0001907279396512548, "loss": 1.025, "step": 15640 }, { "epoch": 0.22, "grad_norm": 0.609375, "learning_rate": 0.0001907174083752311, "loss": 1.1046, "step": 15645 }, { "epoch": 0.22, "grad_norm": 0.52734375, "learning_rate": 0.00019070687141293689, "loss": 0.807, "step": 15650 }, { "epoch": 0.22, "grad_norm": 0.5859375, "learning_rate": 0.00019069632876503269, "loss": 0.9024, "step": 15655 }, { "epoch": 0.22, "grad_norm": 0.5390625, "learning_rate": 0.00019068578043217934, "loss": 0.8765, "step": 15660 }, { "epoch": 0.22, "grad_norm": 0.66796875, "learning_rate": 0.00019067522641503794, "loss": 0.976, "step": 15665 }, { "epoch": 0.22, "grad_norm": 0.6328125, "learning_rate": 0.0001906646667142701, "loss": 1.0139, "step": 15670 }, { "epoch": 0.22, "grad_norm": 0.498046875, "learning_rate": 0.00019065410133053766, "loss": 0.979, "step": 15675 }, { "epoch": 0.22, "grad_norm": 0.625, "learning_rate": 0.00019064353026450296, "loss": 1.1048, "step": 15680 }, { "epoch": 0.22, "grad_norm": 0.55859375, "learning_rate": 0.00019063295351682852, "loss": 0.975, "step": 15685 }, { "epoch": 0.23, "grad_norm": 0.5, "learning_rate": 0.00019062237108817732, "loss": 0.9718, "step": 15690 }, { "epoch": 0.23, "grad_norm": 0.43359375, "learning_rate": 0.00019061178297921272, "loss": 0.962, "step": 15695 }, { "epoch": 0.23, "grad_norm": 0.5390625, "learning_rate": 0.00019060118919059834, "loss": 1.074, "step": 15700 }, { "epoch": 0.23, "grad_norm": 0.62890625, "learning_rate": 0.00019059058972299825, "loss": 1.0041, "step": 15705 }, { "epoch": 0.23, "grad_norm": 0.59375, "learning_rate": 0.00019057998457707682, "loss": 0.9185, "step": 15710 }, { "epoch": 0.23, "grad_norm": 0.60546875, "learning_rate": 0.00019056937375349877, "loss": 0.9434, "step": 15715 }, { "epoch": 0.23, "grad_norm": 0.8203125, "learning_rate": 0.00019055875725292927, "loss": 0.7916, "step": 15720 }, { "epoch": 0.23, "grad_norm": 0.5625, "learning_rate": 0.00019054813507603368, "loss": 0.9599, "step": 15725 }, { "epoch": 0.23, "grad_norm": 0.53125, "learning_rate": 0.0001905375072234779, "loss": 0.8582, "step": 15730 }, { "epoch": 0.23, "grad_norm": 0.55078125, "learning_rate": 0.00019052687369592802, "loss": 0.8423, "step": 15735 }, { "epoch": 0.23, "grad_norm": 0.466796875, "learning_rate": 0.00019051623449405062, "loss": 0.9381, "step": 15740 }, { "epoch": 0.23, "grad_norm": 0.5703125, "learning_rate": 0.00019050558961851254, "loss": 0.8004, "step": 15745 }, { "epoch": 0.23, "grad_norm": 0.59375, "learning_rate": 0.00019049493906998102, "loss": 0.9623, "step": 15750 }, { "epoch": 0.23, "grad_norm": 0.578125, "learning_rate": 0.00019048428284912364, "loss": 1.1378, "step": 15755 }, { "epoch": 0.23, "grad_norm": 0.5703125, "learning_rate": 0.0001904736209566084, "loss": 1.0624, "step": 15760 }, { "epoch": 0.23, "grad_norm": 0.4921875, "learning_rate": 0.00019046295339310353, "loss": 0.8383, "step": 15765 }, { "epoch": 0.23, "grad_norm": 0.546875, "learning_rate": 0.00019045228015927772, "loss": 0.9157, "step": 15770 }, { "epoch": 0.23, "grad_norm": 0.5546875, "learning_rate": 0.00019044160125579994, "loss": 0.9132, "step": 15775 }, { "epoch": 0.23, "grad_norm": 0.59375, "learning_rate": 0.00019043091668333965, "loss": 1.1574, "step": 15780 }, { "epoch": 0.23, "grad_norm": 0.59765625, "learning_rate": 0.00019042022644256648, "loss": 1.047, "step": 15785 }, { "epoch": 0.23, "grad_norm": 0.73046875, "learning_rate": 0.00019040953053415055, "loss": 0.919, "step": 15790 }, { "epoch": 0.23, "grad_norm": 0.6171875, "learning_rate": 0.00019039882895876224, "loss": 0.9386, "step": 15795 }, { "epoch": 0.23, "grad_norm": 0.58203125, "learning_rate": 0.00019038812171707242, "loss": 0.9384, "step": 15800 }, { "epoch": 0.23, "grad_norm": 0.59765625, "learning_rate": 0.00019037740880975217, "loss": 1.1885, "step": 15805 }, { "epoch": 0.23, "grad_norm": 0.59375, "learning_rate": 0.000190366690237473, "loss": 0.9901, "step": 15810 }, { "epoch": 0.23, "grad_norm": 0.49609375, "learning_rate": 0.00019035596600090675, "loss": 0.9708, "step": 15815 }, { "epoch": 0.23, "grad_norm": 0.5390625, "learning_rate": 0.0001903452361007257, "loss": 1.0002, "step": 15820 }, { "epoch": 0.23, "grad_norm": 0.55859375, "learning_rate": 0.0001903345005376023, "loss": 1.0308, "step": 15825 }, { "epoch": 0.23, "grad_norm": 0.578125, "learning_rate": 0.00019032375931220954, "loss": 0.9574, "step": 15830 }, { "epoch": 0.23, "grad_norm": 0.52734375, "learning_rate": 0.0001903130124252207, "loss": 0.972, "step": 15835 }, { "epoch": 0.23, "grad_norm": 0.58203125, "learning_rate": 0.00019030225987730934, "loss": 1.0213, "step": 15840 }, { "epoch": 0.23, "grad_norm": 0.55078125, "learning_rate": 0.0001902915016691495, "loss": 1.0051, "step": 15845 }, { "epoch": 0.23, "grad_norm": 0.51171875, "learning_rate": 0.00019028073780141553, "loss": 1.1086, "step": 15850 }, { "epoch": 0.23, "grad_norm": 0.54296875, "learning_rate": 0.00019026996827478207, "loss": 0.8926, "step": 15855 }, { "epoch": 0.23, "grad_norm": 0.6328125, "learning_rate": 0.0001902591930899242, "loss": 1.1078, "step": 15860 }, { "epoch": 0.23, "grad_norm": 0.60546875, "learning_rate": 0.00019024841224751728, "loss": 0.9868, "step": 15865 }, { "epoch": 0.23, "grad_norm": 0.71875, "learning_rate": 0.00019023762574823714, "loss": 1.1652, "step": 15870 }, { "epoch": 0.23, "grad_norm": 0.56640625, "learning_rate": 0.0001902268335927598, "loss": 0.849, "step": 15875 }, { "epoch": 0.23, "grad_norm": 0.6015625, "learning_rate": 0.00019021603578176183, "loss": 1.0021, "step": 15880 }, { "epoch": 0.23, "grad_norm": 0.546875, "learning_rate": 0.00019020523231591993, "loss": 0.8994, "step": 15885 }, { "epoch": 0.23, "grad_norm": 0.578125, "learning_rate": 0.00019019442319591137, "loss": 1.0758, "step": 15890 }, { "epoch": 0.23, "grad_norm": 0.6015625, "learning_rate": 0.00019018360842241362, "loss": 0.9965, "step": 15895 }, { "epoch": 0.23, "grad_norm": 0.58203125, "learning_rate": 0.0001901727879961046, "loss": 0.8621, "step": 15900 }, { "epoch": 0.23, "grad_norm": 0.65234375, "learning_rate": 0.00019016196191766255, "loss": 1.0158, "step": 15905 }, { "epoch": 0.23, "grad_norm": 0.455078125, "learning_rate": 0.000190151130187766, "loss": 1.0215, "step": 15910 }, { "epoch": 0.23, "grad_norm": 0.51171875, "learning_rate": 0.00019014029280709397, "loss": 0.9089, "step": 15915 }, { "epoch": 0.23, "grad_norm": 0.65234375, "learning_rate": 0.0001901294497763257, "loss": 0.8874, "step": 15920 }, { "epoch": 0.23, "grad_norm": 0.49609375, "learning_rate": 0.00019011860109614088, "loss": 0.908, "step": 15925 }, { "epoch": 0.23, "grad_norm": 0.54296875, "learning_rate": 0.00019010774676721947, "loss": 0.9505, "step": 15930 }, { "epoch": 0.23, "grad_norm": 0.5859375, "learning_rate": 0.0001900968867902419, "loss": 0.8472, "step": 15935 }, { "epoch": 0.23, "grad_norm": 0.59375, "learning_rate": 0.0001900860211658889, "loss": 0.9841, "step": 15940 }, { "epoch": 0.23, "grad_norm": 0.69140625, "learning_rate": 0.00019007514989484144, "loss": 1.1091, "step": 15945 }, { "epoch": 0.23, "grad_norm": 0.486328125, "learning_rate": 0.000190064272977781, "loss": 0.9715, "step": 15950 }, { "epoch": 0.23, "grad_norm": 0.6796875, "learning_rate": 0.00019005339041538937, "loss": 1.041, "step": 15955 }, { "epoch": 0.23, "grad_norm": 0.5546875, "learning_rate": 0.00019004250220834866, "loss": 0.941, "step": 15960 }, { "epoch": 0.23, "grad_norm": 0.62890625, "learning_rate": 0.00019003160835734135, "loss": 1.0787, "step": 15965 }, { "epoch": 0.23, "grad_norm": 0.53125, "learning_rate": 0.0001900207088630503, "loss": 0.8941, "step": 15970 }, { "epoch": 0.23, "grad_norm": 0.64453125, "learning_rate": 0.0001900098037261587, "loss": 1.229, "step": 15975 }, { "epoch": 0.23, "grad_norm": 0.55078125, "learning_rate": 0.0001899988929473501, "loss": 1.0056, "step": 15980 }, { "epoch": 0.23, "grad_norm": 0.50390625, "learning_rate": 0.00018998797652730837, "loss": 0.9708, "step": 15985 }, { "epoch": 0.23, "grad_norm": 0.54296875, "learning_rate": 0.00018997705446671778, "loss": 0.8722, "step": 15990 }, { "epoch": 0.23, "grad_norm": 0.6484375, "learning_rate": 0.00018996612676626295, "loss": 1.0668, "step": 15995 }, { "epoch": 0.23, "grad_norm": 0.486328125, "learning_rate": 0.00018995519342662883, "loss": 1.0248, "step": 16000 }, { "epoch": 0.23, "grad_norm": 0.5078125, "learning_rate": 0.00018994425444850076, "loss": 0.9662, "step": 16005 }, { "epoch": 0.23, "grad_norm": 0.578125, "learning_rate": 0.00018993330983256435, "loss": 0.9792, "step": 16010 }, { "epoch": 0.23, "grad_norm": 0.6484375, "learning_rate": 0.00018992235957950566, "loss": 1.0946, "step": 16015 }, { "epoch": 0.23, "grad_norm": 0.55078125, "learning_rate": 0.00018991140369001107, "loss": 0.8866, "step": 16020 }, { "epoch": 0.23, "grad_norm": 0.54296875, "learning_rate": 0.0001899004421647673, "loss": 0.9569, "step": 16025 }, { "epoch": 0.23, "grad_norm": 0.60546875, "learning_rate": 0.00018988947500446138, "loss": 1.0165, "step": 16030 }, { "epoch": 0.23, "grad_norm": 0.515625, "learning_rate": 0.00018987850220978086, "loss": 0.9958, "step": 16035 }, { "epoch": 0.23, "grad_norm": 0.62890625, "learning_rate": 0.00018986752378141343, "loss": 1.1076, "step": 16040 }, { "epoch": 0.23, "grad_norm": 0.5546875, "learning_rate": 0.0001898565397200472, "loss": 0.9612, "step": 16045 }, { "epoch": 0.23, "grad_norm": 0.5546875, "learning_rate": 0.0001898455500263708, "loss": 0.9617, "step": 16050 }, { "epoch": 0.23, "grad_norm": 0.58203125, "learning_rate": 0.000189834554701073, "loss": 1.0497, "step": 16055 }, { "epoch": 0.23, "grad_norm": 0.56640625, "learning_rate": 0.00018982355374484296, "loss": 0.9259, "step": 16060 }, { "epoch": 0.23, "grad_norm": 0.66796875, "learning_rate": 0.0001898125471583703, "loss": 0.955, "step": 16065 }, { "epoch": 0.23, "grad_norm": 0.5859375, "learning_rate": 0.00018980153494234486, "loss": 0.8977, "step": 16070 }, { "epoch": 0.23, "grad_norm": 0.515625, "learning_rate": 0.000189790517097457, "loss": 0.8006, "step": 16075 }, { "epoch": 0.23, "grad_norm": 0.5625, "learning_rate": 0.0001897794936243972, "loss": 1.0832, "step": 16080 }, { "epoch": 0.23, "grad_norm": 0.6875, "learning_rate": 0.00018976846452385652, "loss": 1.0089, "step": 16085 }, { "epoch": 0.23, "grad_norm": 0.64453125, "learning_rate": 0.00018975742979652623, "loss": 0.9711, "step": 16090 }, { "epoch": 0.23, "grad_norm": 0.59765625, "learning_rate": 0.00018974638944309802, "loss": 0.8898, "step": 16095 }, { "epoch": 0.23, "grad_norm": 0.5703125, "learning_rate": 0.0001897353434642639, "loss": 0.8701, "step": 16100 }, { "epoch": 0.23, "grad_norm": 0.5390625, "learning_rate": 0.0001897242918607163, "loss": 0.9969, "step": 16105 }, { "epoch": 0.23, "grad_norm": 0.546875, "learning_rate": 0.00018971323463314784, "loss": 0.9325, "step": 16110 }, { "epoch": 0.23, "grad_norm": 0.57421875, "learning_rate": 0.00018970217178225168, "loss": 0.9804, "step": 16115 }, { "epoch": 0.23, "grad_norm": 0.546875, "learning_rate": 0.0001896911033087212, "loss": 0.8709, "step": 16120 }, { "epoch": 0.23, "grad_norm": 0.52734375, "learning_rate": 0.00018968002921325027, "loss": 1.0715, "step": 16125 }, { "epoch": 0.23, "grad_norm": 0.609375, "learning_rate": 0.00018966894949653293, "loss": 1.1704, "step": 16130 }, { "epoch": 0.23, "grad_norm": 0.50390625, "learning_rate": 0.0001896578641592637, "loss": 0.8445, "step": 16135 }, { "epoch": 0.23, "grad_norm": 0.56640625, "learning_rate": 0.00018964677320213748, "loss": 0.8603, "step": 16140 }, { "epoch": 0.23, "grad_norm": 0.6015625, "learning_rate": 0.0001896356766258494, "loss": 0.9586, "step": 16145 }, { "epoch": 0.23, "grad_norm": 0.5859375, "learning_rate": 0.00018962457443109503, "loss": 0.9594, "step": 16150 }, { "epoch": 0.23, "grad_norm": 0.54296875, "learning_rate": 0.00018961346661857021, "loss": 0.9027, "step": 16155 }, { "epoch": 0.23, "grad_norm": 0.7265625, "learning_rate": 0.00018960235318897132, "loss": 0.9021, "step": 16160 }, { "epoch": 0.23, "grad_norm": 0.83203125, "learning_rate": 0.00018959123414299484, "loss": 1.0627, "step": 16165 }, { "epoch": 0.23, "grad_norm": 0.6171875, "learning_rate": 0.00018958010948133777, "loss": 1.0311, "step": 16170 }, { "epoch": 0.23, "grad_norm": 0.59765625, "learning_rate": 0.00018956897920469743, "loss": 0.9209, "step": 16175 }, { "epoch": 0.23, "grad_norm": 0.61328125, "learning_rate": 0.0001895578433137714, "loss": 0.9624, "step": 16180 }, { "epoch": 0.23, "grad_norm": 0.6171875, "learning_rate": 0.00018954670180925783, "loss": 1.0426, "step": 16185 }, { "epoch": 0.23, "grad_norm": 0.640625, "learning_rate": 0.000189535554691855, "loss": 0.9737, "step": 16190 }, { "epoch": 0.23, "grad_norm": 0.59375, "learning_rate": 0.0001895244019622616, "loss": 1.0148, "step": 16195 }, { "epoch": 0.23, "grad_norm": 0.5703125, "learning_rate": 0.00018951324362117674, "loss": 0.9849, "step": 16200 }, { "epoch": 0.23, "grad_norm": 0.5625, "learning_rate": 0.0001895020796692998, "loss": 0.985, "step": 16205 }, { "epoch": 0.23, "grad_norm": 0.57421875, "learning_rate": 0.0001894909101073306, "loss": 0.7954, "step": 16210 }, { "epoch": 0.23, "grad_norm": 0.60546875, "learning_rate": 0.0001894797349359692, "loss": 0.9957, "step": 16215 }, { "epoch": 0.23, "grad_norm": 0.63671875, "learning_rate": 0.00018946855415591615, "loss": 1.074, "step": 16220 }, { "epoch": 0.23, "grad_norm": 0.5234375, "learning_rate": 0.0001894573677678722, "loss": 0.9898, "step": 16225 }, { "epoch": 0.23, "grad_norm": 0.5625, "learning_rate": 0.00018944617577253855, "loss": 0.8775, "step": 16230 }, { "epoch": 0.23, "grad_norm": 0.640625, "learning_rate": 0.00018943497817061676, "loss": 1.0057, "step": 16235 }, { "epoch": 0.23, "grad_norm": 0.68359375, "learning_rate": 0.00018942377496280867, "loss": 0.8942, "step": 16240 }, { "epoch": 0.23, "grad_norm": 0.61328125, "learning_rate": 0.0001894125661498165, "loss": 1.0112, "step": 16245 }, { "epoch": 0.23, "grad_norm": 0.478515625, "learning_rate": 0.00018940135173234286, "loss": 0.7383, "step": 16250 }, { "epoch": 0.23, "grad_norm": 0.63671875, "learning_rate": 0.00018939013171109068, "loss": 0.965, "step": 16255 }, { "epoch": 0.23, "grad_norm": 0.59765625, "learning_rate": 0.00018937890608676324, "loss": 0.8931, "step": 16260 }, { "epoch": 0.23, "grad_norm": 0.5, "learning_rate": 0.00018936767486006417, "loss": 0.9322, "step": 16265 }, { "epoch": 0.23, "grad_norm": 0.60546875, "learning_rate": 0.00018935643803169746, "loss": 1.1366, "step": 16270 }, { "epoch": 0.23, "grad_norm": 0.57421875, "learning_rate": 0.00018934519560236744, "loss": 0.8809, "step": 16275 }, { "epoch": 0.23, "grad_norm": 0.484375, "learning_rate": 0.0001893339475727788, "loss": 0.9132, "step": 16280 }, { "epoch": 0.23, "grad_norm": 0.5703125, "learning_rate": 0.0001893226939436366, "loss": 0.8929, "step": 16285 }, { "epoch": 0.23, "grad_norm": 0.54296875, "learning_rate": 0.00018931143471564622, "loss": 0.919, "step": 16290 }, { "epoch": 0.23, "grad_norm": 0.52734375, "learning_rate": 0.00018930016988951334, "loss": 0.8347, "step": 16295 }, { "epoch": 0.23, "grad_norm": 0.51953125, "learning_rate": 0.00018928889946594416, "loss": 0.8911, "step": 16300 }, { "epoch": 0.23, "grad_norm": 0.55859375, "learning_rate": 0.00018927762344564503, "loss": 1.0486, "step": 16305 }, { "epoch": 0.23, "grad_norm": 0.59375, "learning_rate": 0.00018926634182932282, "loss": 0.9989, "step": 16310 }, { "epoch": 0.23, "grad_norm": 0.65234375, "learning_rate": 0.00018925505461768466, "loss": 0.949, "step": 16315 }, { "epoch": 0.23, "grad_norm": 0.6328125, "learning_rate": 0.00018924376181143798, "loss": 1.0124, "step": 16320 }, { "epoch": 0.23, "grad_norm": 0.61328125, "learning_rate": 0.00018923246341129066, "loss": 0.9844, "step": 16325 }, { "epoch": 0.23, "grad_norm": 0.54296875, "learning_rate": 0.00018922115941795092, "loss": 0.9543, "step": 16330 }, { "epoch": 0.23, "grad_norm": 0.55078125, "learning_rate": 0.0001892098498321273, "loss": 1.0143, "step": 16335 }, { "epoch": 0.23, "grad_norm": 0.4921875, "learning_rate": 0.0001891985346545287, "loss": 0.997, "step": 16340 }, { "epoch": 0.23, "grad_norm": 0.55859375, "learning_rate": 0.00018918721388586435, "loss": 1.0011, "step": 16345 }, { "epoch": 0.23, "grad_norm": 0.5859375, "learning_rate": 0.00018917588752684385, "loss": 0.9518, "step": 16350 }, { "epoch": 0.23, "grad_norm": 0.5078125, "learning_rate": 0.00018916455557817718, "loss": 0.9175, "step": 16355 }, { "epoch": 0.23, "grad_norm": 0.58984375, "learning_rate": 0.0001891532180405746, "loss": 0.9749, "step": 16360 }, { "epoch": 0.23, "grad_norm": 0.4921875, "learning_rate": 0.00018914187491474678, "loss": 0.9813, "step": 16365 }, { "epoch": 0.23, "grad_norm": 0.5078125, "learning_rate": 0.0001891305262014047, "loss": 0.9044, "step": 16370 }, { "epoch": 0.23, "grad_norm": 0.56640625, "learning_rate": 0.00018911917190125976, "loss": 1.0183, "step": 16375 }, { "epoch": 0.23, "grad_norm": 0.578125, "learning_rate": 0.0001891078120150236, "loss": 1.0059, "step": 16380 }, { "epoch": 0.24, "grad_norm": 0.6484375, "learning_rate": 0.0001890964465434083, "loss": 0.9395, "step": 16385 }, { "epoch": 0.24, "grad_norm": 0.486328125, "learning_rate": 0.00018908507548712626, "loss": 0.9566, "step": 16390 }, { "epoch": 0.24, "grad_norm": 0.5703125, "learning_rate": 0.00018907369884689024, "loss": 0.9328, "step": 16395 }, { "epoch": 0.24, "grad_norm": 0.7578125, "learning_rate": 0.00018906231662341332, "loss": 1.021, "step": 16400 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 0.00018905092881740899, "loss": 0.9493, "step": 16405 }, { "epoch": 0.24, "grad_norm": 0.6171875, "learning_rate": 0.00018903953542959097, "loss": 0.9246, "step": 16410 }, { "epoch": 0.24, "grad_norm": 0.6328125, "learning_rate": 0.0001890281364606735, "loss": 1.0011, "step": 16415 }, { "epoch": 0.24, "grad_norm": 0.58984375, "learning_rate": 0.00018901673191137102, "loss": 0.9221, "step": 16420 }, { "epoch": 0.24, "grad_norm": 0.62890625, "learning_rate": 0.0001890053217823984, "loss": 0.9183, "step": 16425 }, { "epoch": 0.24, "grad_norm": 0.55078125, "learning_rate": 0.00018899390607447086, "loss": 1.0647, "step": 16430 }, { "epoch": 0.24, "grad_norm": 0.54296875, "learning_rate": 0.0001889824847883039, "loss": 1.0372, "step": 16435 }, { "epoch": 0.24, "grad_norm": 0.578125, "learning_rate": 0.0001889710579246135, "loss": 0.9747, "step": 16440 }, { "epoch": 0.24, "grad_norm": 0.625, "learning_rate": 0.00018895962548411583, "loss": 0.9561, "step": 16445 }, { "epoch": 0.24, "grad_norm": 0.6171875, "learning_rate": 0.0001889481874675275, "loss": 0.9192, "step": 16450 }, { "epoch": 0.24, "grad_norm": 0.5859375, "learning_rate": 0.0001889367438755655, "loss": 0.9859, "step": 16455 }, { "epoch": 0.24, "grad_norm": 0.703125, "learning_rate": 0.00018892529470894713, "loss": 1.0249, "step": 16460 }, { "epoch": 0.24, "grad_norm": 0.5859375, "learning_rate": 0.00018891383996838998, "loss": 0.9751, "step": 16465 }, { "epoch": 0.24, "grad_norm": 0.5390625, "learning_rate": 0.00018890237965461207, "loss": 0.8279, "step": 16470 }, { "epoch": 0.24, "grad_norm": 0.5546875, "learning_rate": 0.00018889091376833177, "loss": 1.1476, "step": 16475 }, { "epoch": 0.24, "grad_norm": 0.5234375, "learning_rate": 0.00018887944231026774, "loss": 0.9666, "step": 16480 }, { "epoch": 0.24, "grad_norm": 0.50390625, "learning_rate": 0.00018886796528113907, "loss": 0.9175, "step": 16485 }, { "epoch": 0.24, "grad_norm": 0.55078125, "learning_rate": 0.00018885648268166509, "loss": 0.8682, "step": 16490 }, { "epoch": 0.24, "grad_norm": 0.5078125, "learning_rate": 0.00018884499451256556, "loss": 0.8868, "step": 16495 }, { "epoch": 0.24, "grad_norm": 0.66015625, "learning_rate": 0.00018883350077456064, "loss": 0.9363, "step": 16500 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.00018882200146837067, "loss": 0.9751, "step": 16505 }, { "epoch": 0.24, "grad_norm": 0.5078125, "learning_rate": 0.00018881049659471652, "loss": 1.0708, "step": 16510 }, { "epoch": 0.24, "grad_norm": 0.57421875, "learning_rate": 0.00018879898615431931, "loss": 1.0396, "step": 16515 }, { "epoch": 0.24, "grad_norm": 0.53125, "learning_rate": 0.00018878747014790048, "loss": 0.7451, "step": 16520 }, { "epoch": 0.24, "grad_norm": 0.5390625, "learning_rate": 0.00018877594857618193, "loss": 0.9338, "step": 16525 }, { "epoch": 0.24, "grad_norm": 0.63671875, "learning_rate": 0.0001887644214398858, "loss": 1.0115, "step": 16530 }, { "epoch": 0.24, "grad_norm": 0.478515625, "learning_rate": 0.00018875288873973465, "loss": 0.8998, "step": 16535 }, { "epoch": 0.24, "grad_norm": 0.65234375, "learning_rate": 0.00018874135047645133, "loss": 1.0624, "step": 16540 }, { "epoch": 0.24, "grad_norm": 0.546875, "learning_rate": 0.0001887298066507591, "loss": 0.9733, "step": 16545 }, { "epoch": 0.24, "grad_norm": 0.59765625, "learning_rate": 0.00018871825726338157, "loss": 0.9463, "step": 16550 }, { "epoch": 0.24, "grad_norm": 0.59765625, "learning_rate": 0.00018870670231504257, "loss": 1.0434, "step": 16555 }, { "epoch": 0.24, "grad_norm": 0.59765625, "learning_rate": 0.0001886951418064665, "loss": 0.9086, "step": 16560 }, { "epoch": 0.24, "grad_norm": 0.48046875, "learning_rate": 0.00018868357573837788, "loss": 1.0508, "step": 16565 }, { "epoch": 0.24, "grad_norm": 0.55078125, "learning_rate": 0.00018867200411150177, "loss": 1.0226, "step": 16570 }, { "epoch": 0.24, "grad_norm": 0.53515625, "learning_rate": 0.00018866042692656344, "loss": 0.8664, "step": 16575 }, { "epoch": 0.24, "grad_norm": 0.6171875, "learning_rate": 0.00018864884418428855, "loss": 0.908, "step": 16580 }, { "epoch": 0.24, "grad_norm": 0.68359375, "learning_rate": 0.00018863725588540316, "loss": 0.9478, "step": 16585 }, { "epoch": 0.24, "grad_norm": 0.59375, "learning_rate": 0.00018862566203063365, "loss": 1.0568, "step": 16590 }, { "epoch": 0.24, "grad_norm": 0.609375, "learning_rate": 0.0001886140626207067, "loss": 1.0956, "step": 16595 }, { "epoch": 0.24, "grad_norm": 0.58203125, "learning_rate": 0.00018860245765634935, "loss": 0.9001, "step": 16600 }, { "epoch": 0.24, "grad_norm": 0.54296875, "learning_rate": 0.00018859084713828908, "loss": 1.122, "step": 16605 }, { "epoch": 0.24, "grad_norm": 0.56640625, "learning_rate": 0.00018857923106725357, "loss": 0.9613, "step": 16610 }, { "epoch": 0.24, "grad_norm": 0.5078125, "learning_rate": 0.000188567609443971, "loss": 1.0178, "step": 16615 }, { "epoch": 0.24, "grad_norm": 0.69921875, "learning_rate": 0.0001885559822691698, "loss": 1.1274, "step": 16620 }, { "epoch": 0.24, "grad_norm": 0.61328125, "learning_rate": 0.00018854434954357875, "loss": 0.9865, "step": 16625 }, { "epoch": 0.24, "grad_norm": 0.546875, "learning_rate": 0.00018853271126792706, "loss": 0.9709, "step": 16630 }, { "epoch": 0.24, "grad_norm": 0.578125, "learning_rate": 0.0001885210674429442, "loss": 0.9059, "step": 16635 }, { "epoch": 0.24, "grad_norm": 0.58984375, "learning_rate": 0.00018850941806936004, "loss": 0.7779, "step": 16640 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 0.0001884977631479047, "loss": 0.9693, "step": 16645 }, { "epoch": 0.24, "grad_norm": 0.6484375, "learning_rate": 0.00018848610267930877, "loss": 0.9123, "step": 16650 }, { "epoch": 0.24, "grad_norm": 0.5859375, "learning_rate": 0.00018847443666430316, "loss": 1.0178, "step": 16655 }, { "epoch": 0.24, "grad_norm": 0.5703125, "learning_rate": 0.0001884627651036191, "loss": 0.8877, "step": 16660 }, { "epoch": 0.24, "grad_norm": 0.5859375, "learning_rate": 0.0001884510879979882, "loss": 1.0474, "step": 16665 }, { "epoch": 0.24, "grad_norm": 0.53125, "learning_rate": 0.00018843940534814233, "loss": 0.9245, "step": 16670 }, { "epoch": 0.24, "grad_norm": 0.59765625, "learning_rate": 0.00018842771715481382, "loss": 0.9171, "step": 16675 }, { "epoch": 0.24, "grad_norm": 0.494140625, "learning_rate": 0.00018841602341873527, "loss": 0.8864, "step": 16680 }, { "epoch": 0.24, "grad_norm": 0.6328125, "learning_rate": 0.00018840432414063974, "loss": 0.922, "step": 16685 }, { "epoch": 0.24, "grad_norm": 0.52734375, "learning_rate": 0.00018839261932126044, "loss": 0.8619, "step": 16690 }, { "epoch": 0.24, "grad_norm": 0.54296875, "learning_rate": 0.00018838090896133107, "loss": 0.9576, "step": 16695 }, { "epoch": 0.24, "grad_norm": 0.6328125, "learning_rate": 0.00018836919306158568, "loss": 1.0071, "step": 16700 }, { "epoch": 0.24, "grad_norm": 0.67578125, "learning_rate": 0.00018835747162275864, "loss": 1.0803, "step": 16705 }, { "epoch": 0.24, "grad_norm": 0.5390625, "learning_rate": 0.00018834574464558464, "loss": 0.8814, "step": 16710 }, { "epoch": 0.24, "grad_norm": 0.5546875, "learning_rate": 0.00018833401213079877, "loss": 0.938, "step": 16715 }, { "epoch": 0.24, "grad_norm": 0.5859375, "learning_rate": 0.00018832227407913638, "loss": 1.0113, "step": 16720 }, { "epoch": 0.24, "grad_norm": 0.59765625, "learning_rate": 0.00018831053049133328, "loss": 0.9646, "step": 16725 }, { "epoch": 0.24, "grad_norm": 0.58203125, "learning_rate": 0.00018829878136812558, "loss": 1.1012, "step": 16730 }, { "epoch": 0.24, "grad_norm": 0.57421875, "learning_rate": 0.00018828702671024966, "loss": 1.0641, "step": 16735 }, { "epoch": 0.24, "grad_norm": 0.6328125, "learning_rate": 0.00018827526651844236, "loss": 0.8878, "step": 16740 }, { "epoch": 0.24, "grad_norm": 0.58984375, "learning_rate": 0.00018826350079344085, "loss": 0.9392, "step": 16745 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 0.00018825172953598252, "loss": 0.9466, "step": 16750 }, { "epoch": 0.24, "grad_norm": 0.5078125, "learning_rate": 0.00018823995274680532, "loss": 0.9176, "step": 16755 }, { "epoch": 0.24, "grad_norm": 0.6328125, "learning_rate": 0.0001882281704266474, "loss": 0.8922, "step": 16760 }, { "epoch": 0.24, "grad_norm": 0.54296875, "learning_rate": 0.00018821638257624724, "loss": 0.95, "step": 16765 }, { "epoch": 0.24, "grad_norm": 0.62109375, "learning_rate": 0.00018820458919634376, "loss": 0.9833, "step": 16770 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 0.00018819279028767618, "loss": 0.9822, "step": 16775 }, { "epoch": 0.24, "grad_norm": 0.609375, "learning_rate": 0.00018818098585098405, "loss": 1.0689, "step": 16780 }, { "epoch": 0.24, "grad_norm": 0.58984375, "learning_rate": 0.00018816917588700732, "loss": 0.8965, "step": 16785 }, { "epoch": 0.24, "grad_norm": 0.56640625, "learning_rate": 0.0001881573603964862, "loss": 0.9886, "step": 16790 }, { "epoch": 0.24, "grad_norm": 0.625, "learning_rate": 0.00018814553938016135, "loss": 1.0189, "step": 16795 }, { "epoch": 0.24, "grad_norm": 0.60546875, "learning_rate": 0.00018813371283877368, "loss": 1.0355, "step": 16800 }, { "epoch": 0.24, "grad_norm": 0.53125, "learning_rate": 0.0001881218807730645, "loss": 0.9595, "step": 16805 }, { "epoch": 0.24, "grad_norm": 0.59375, "learning_rate": 0.00018811004318377552, "loss": 0.9699, "step": 16810 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 0.00018809820007164863, "loss": 0.9936, "step": 16815 }, { "epoch": 0.24, "grad_norm": 0.5234375, "learning_rate": 0.00018808635143742627, "loss": 0.9886, "step": 16820 }, { "epoch": 0.24, "grad_norm": 0.59375, "learning_rate": 0.00018807449728185108, "loss": 0.9061, "step": 16825 }, { "epoch": 0.24, "grad_norm": 0.6640625, "learning_rate": 0.00018806263760566606, "loss": 0.9264, "step": 16830 }, { "epoch": 0.24, "grad_norm": 0.57421875, "learning_rate": 0.00018805077240961464, "loss": 1.0603, "step": 16835 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.0001880389016944405, "loss": 0.8612, "step": 16840 }, { "epoch": 0.24, "grad_norm": 0.5390625, "learning_rate": 0.0001880270254608878, "loss": 0.983, "step": 16845 }, { "epoch": 0.24, "grad_norm": 0.640625, "learning_rate": 0.0001880151437097008, "loss": 0.9251, "step": 16850 }, { "epoch": 0.24, "grad_norm": 0.58203125, "learning_rate": 0.00018800325644162443, "loss": 1.1613, "step": 16855 }, { "epoch": 0.24, "grad_norm": 0.57421875, "learning_rate": 0.00018799136365740368, "loss": 0.9665, "step": 16860 }, { "epoch": 0.24, "grad_norm": 0.5703125, "learning_rate": 0.00018797946535778403, "loss": 1.066, "step": 16865 }, { "epoch": 0.24, "grad_norm": 0.5390625, "learning_rate": 0.00018796756154351133, "loss": 0.9028, "step": 16870 }, { "epoch": 0.24, "grad_norm": 0.5234375, "learning_rate": 0.00018795565221533167, "loss": 0.9527, "step": 16875 }, { "epoch": 0.24, "grad_norm": 0.6328125, "learning_rate": 0.00018794373737399152, "loss": 1.0282, "step": 16880 }, { "epoch": 0.24, "grad_norm": 0.51953125, "learning_rate": 0.0001879318170202378, "loss": 0.8708, "step": 16885 }, { "epoch": 0.24, "grad_norm": 0.54296875, "learning_rate": 0.00018791989115481762, "loss": 0.9773, "step": 16890 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.00018790795977847852, "loss": 0.9057, "step": 16895 }, { "epoch": 0.24, "grad_norm": 0.56640625, "learning_rate": 0.00018789602289196838, "loss": 1.0618, "step": 16900 }, { "epoch": 0.24, "grad_norm": 0.5703125, "learning_rate": 0.0001878840804960355, "loss": 0.8619, "step": 16905 }, { "epoch": 0.24, "grad_norm": 0.55859375, "learning_rate": 0.00018787213259142827, "loss": 0.9153, "step": 16910 }, { "epoch": 0.24, "grad_norm": 0.578125, "learning_rate": 0.0001878601791788957, "loss": 1.0297, "step": 16915 }, { "epoch": 0.24, "grad_norm": 0.578125, "learning_rate": 0.00018784822025918706, "loss": 0.9308, "step": 16920 }, { "epoch": 0.24, "grad_norm": 0.6796875, "learning_rate": 0.00018783625583305194, "loss": 1.0167, "step": 16925 }, { "epoch": 0.24, "grad_norm": 0.66796875, "learning_rate": 0.00018782428590124025, "loss": 0.984, "step": 16930 }, { "epoch": 0.24, "grad_norm": 0.61328125, "learning_rate": 0.0001878123104645023, "loss": 0.9254, "step": 16935 }, { "epoch": 0.24, "grad_norm": 0.66796875, "learning_rate": 0.0001878003295235887, "loss": 1.0324, "step": 16940 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 0.00018778834307925046, "loss": 1.0301, "step": 16945 }, { "epoch": 0.24, "grad_norm": 0.490234375, "learning_rate": 0.00018777635113223888, "loss": 0.8193, "step": 16950 }, { "epoch": 0.24, "grad_norm": 0.65234375, "learning_rate": 0.00018776435368330567, "loss": 1.0801, "step": 16955 }, { "epoch": 0.24, "grad_norm": 0.515625, "learning_rate": 0.0001877523507332028, "loss": 0.9553, "step": 16960 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 0.00018774034228268265, "loss": 0.8491, "step": 16965 }, { "epoch": 0.24, "grad_norm": 0.5859375, "learning_rate": 0.00018772832833249792, "loss": 0.9802, "step": 16970 }, { "epoch": 0.24, "grad_norm": 0.6328125, "learning_rate": 0.00018771630888340165, "loss": 0.9014, "step": 16975 }, { "epoch": 0.24, "grad_norm": 0.5546875, "learning_rate": 0.00018770428393614724, "loss": 0.9266, "step": 16980 }, { "epoch": 0.24, "grad_norm": 0.59375, "learning_rate": 0.00018769225349148842, "loss": 1.0798, "step": 16985 }, { "epoch": 0.24, "grad_norm": 0.54296875, "learning_rate": 0.00018768021755017928, "loss": 0.9488, "step": 16990 }, { "epoch": 0.24, "grad_norm": 0.5859375, "learning_rate": 0.00018766817611297424, "loss": 0.9043, "step": 16995 }, { "epoch": 0.24, "grad_norm": 0.66015625, "learning_rate": 0.0001876561291806281, "loss": 1.104, "step": 17000 }, { "epoch": 0.24, "grad_norm": 0.60546875, "learning_rate": 0.00018764407675389593, "loss": 1.1461, "step": 17005 }, { "epoch": 0.24, "grad_norm": 0.56640625, "learning_rate": 0.00018763201883353323, "loss": 1.035, "step": 17010 }, { "epoch": 0.24, "grad_norm": 0.53515625, "learning_rate": 0.00018761995542029576, "loss": 0.8472, "step": 17015 }, { "epoch": 0.24, "grad_norm": 0.57421875, "learning_rate": 0.00018760788651493973, "loss": 0.9361, "step": 17020 }, { "epoch": 0.24, "grad_norm": 0.5546875, "learning_rate": 0.00018759581211822156, "loss": 1.0741, "step": 17025 }, { "epoch": 0.24, "grad_norm": 0.59765625, "learning_rate": 0.0001875837322308981, "loss": 1.028, "step": 17030 }, { "epoch": 0.24, "grad_norm": 0.515625, "learning_rate": 0.00018757164685372663, "loss": 1.028, "step": 17035 }, { "epoch": 0.24, "grad_norm": 0.57421875, "learning_rate": 0.00018755955598746455, "loss": 1.055, "step": 17040 }, { "epoch": 0.24, "grad_norm": 0.52734375, "learning_rate": 0.00018754745963286978, "loss": 0.8644, "step": 17045 }, { "epoch": 0.24, "grad_norm": 0.609375, "learning_rate": 0.00018753535779070056, "loss": 0.9841, "step": 17050 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 0.00018752325046171537, "loss": 0.908, "step": 17055 }, { "epoch": 0.24, "grad_norm": 0.6015625, "learning_rate": 0.0001875111376466732, "loss": 1.0285, "step": 17060 }, { "epoch": 0.24, "grad_norm": 0.55859375, "learning_rate": 0.00018749901934633325, "loss": 0.9899, "step": 17065 }, { "epoch": 0.24, "grad_norm": 0.625, "learning_rate": 0.0001874868955614551, "loss": 1.1264, "step": 17070 }, { "epoch": 0.24, "grad_norm": 0.59375, "learning_rate": 0.0001874747662927987, "loss": 1.0565, "step": 17075 }, { "epoch": 0.25, "grad_norm": 0.6015625, "learning_rate": 0.00018746263154112433, "loss": 0.9554, "step": 17080 }, { "epoch": 0.25, "grad_norm": 0.54296875, "learning_rate": 0.0001874504913071926, "loss": 1.0431, "step": 17085 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.0001874383455917645, "loss": 0.9707, "step": 17090 }, { "epoch": 0.25, "grad_norm": 0.60546875, "learning_rate": 0.00018742619439560126, "loss": 1.0394, "step": 17095 }, { "epoch": 0.25, "grad_norm": 0.515625, "learning_rate": 0.00018741403771946463, "loss": 0.9724, "step": 17100 }, { "epoch": 0.25, "grad_norm": 0.5546875, "learning_rate": 0.00018740187556411653, "loss": 1.0729, "step": 17105 }, { "epoch": 0.25, "grad_norm": 0.56640625, "learning_rate": 0.0001873897079303194, "loss": 0.9667, "step": 17110 }, { "epoch": 0.25, "grad_norm": 0.5703125, "learning_rate": 0.00018737753481883575, "loss": 1.1016, "step": 17115 }, { "epoch": 0.25, "grad_norm": 0.6015625, "learning_rate": 0.00018736535623042877, "loss": 0.9476, "step": 17120 }, { "epoch": 0.25, "grad_norm": 0.55859375, "learning_rate": 0.00018735317216586172, "loss": 1.0506, "step": 17125 }, { "epoch": 0.25, "grad_norm": 0.640625, "learning_rate": 0.00018734098262589835, "loss": 0.9753, "step": 17130 }, { "epoch": 0.25, "grad_norm": 0.609375, "learning_rate": 0.00018732878761130274, "loss": 0.9667, "step": 17135 }, { "epoch": 0.25, "grad_norm": 0.494140625, "learning_rate": 0.00018731658712283929, "loss": 0.9733, "step": 17140 }, { "epoch": 0.25, "grad_norm": 0.6015625, "learning_rate": 0.00018730438116127266, "loss": 0.9274, "step": 17145 }, { "epoch": 0.25, "grad_norm": 0.56640625, "learning_rate": 0.000187292169727368, "loss": 1.0296, "step": 17150 }, { "epoch": 0.25, "grad_norm": 0.55859375, "learning_rate": 0.00018727995282189074, "loss": 0.7776, "step": 17155 }, { "epoch": 0.25, "grad_norm": 0.48046875, "learning_rate": 0.00018726773044560664, "loss": 0.8341, "step": 17160 }, { "epoch": 0.25, "grad_norm": 0.58984375, "learning_rate": 0.0001872555025992818, "loss": 0.7679, "step": 17165 }, { "epoch": 0.25, "grad_norm": 0.53515625, "learning_rate": 0.00018724326928368267, "loss": 0.9864, "step": 17170 }, { "epoch": 0.25, "grad_norm": 0.62890625, "learning_rate": 0.00018723103049957606, "loss": 1.0016, "step": 17175 }, { "epoch": 0.25, "grad_norm": 0.625, "learning_rate": 0.00018721878624772912, "loss": 0.958, "step": 17180 }, { "epoch": 0.25, "grad_norm": 0.55078125, "learning_rate": 0.00018720653652890934, "loss": 0.8523, "step": 17185 }, { "epoch": 0.25, "grad_norm": 0.57421875, "learning_rate": 0.0001871942813438845, "loss": 0.8542, "step": 17190 }, { "epoch": 0.25, "grad_norm": 0.54296875, "learning_rate": 0.00018718202069342282, "loss": 0.8759, "step": 17195 }, { "epoch": 0.25, "grad_norm": 0.546875, "learning_rate": 0.00018716975457829275, "loss": 0.9566, "step": 17200 }, { "epoch": 0.25, "grad_norm": 0.65625, "learning_rate": 0.00018715748299926322, "loss": 1.0393, "step": 17205 }, { "epoch": 0.25, "grad_norm": 0.59375, "learning_rate": 0.0001871452059571034, "loss": 1.0312, "step": 17210 }, { "epoch": 0.25, "grad_norm": 0.48046875, "learning_rate": 0.0001871329234525828, "loss": 1.0642, "step": 17215 }, { "epoch": 0.25, "grad_norm": 0.69921875, "learning_rate": 0.00018712063548647132, "loss": 1.0684, "step": 17220 }, { "epoch": 0.25, "grad_norm": 0.61328125, "learning_rate": 0.0001871083420595392, "loss": 0.9215, "step": 17225 }, { "epoch": 0.25, "grad_norm": 0.62109375, "learning_rate": 0.00018709604317255699, "loss": 0.9932, "step": 17230 }, { "epoch": 0.25, "grad_norm": 0.5234375, "learning_rate": 0.0001870837388262956, "loss": 0.8693, "step": 17235 }, { "epoch": 0.25, "grad_norm": 0.58203125, "learning_rate": 0.00018707142902152626, "loss": 0.9005, "step": 17240 }, { "epoch": 0.25, "grad_norm": 0.65234375, "learning_rate": 0.00018705911375902062, "loss": 1.029, "step": 17245 }, { "epoch": 0.25, "grad_norm": 0.5546875, "learning_rate": 0.00018704679303955054, "loss": 1.0987, "step": 17250 }, { "epoch": 0.25, "grad_norm": 0.52734375, "learning_rate": 0.00018703446686388838, "loss": 0.9246, "step": 17255 }, { "epoch": 0.25, "grad_norm": 0.8203125, "learning_rate": 0.00018702213523280668, "loss": 0.9038, "step": 17260 }, { "epoch": 0.25, "grad_norm": 0.546875, "learning_rate": 0.00018700979814707843, "loss": 1.0012, "step": 17265 }, { "epoch": 0.25, "grad_norm": 0.498046875, "learning_rate": 0.00018699745560747696, "loss": 0.8978, "step": 17270 }, { "epoch": 0.25, "grad_norm": 0.6328125, "learning_rate": 0.00018698510761477587, "loss": 0.9811, "step": 17275 }, { "epoch": 0.25, "grad_norm": 0.640625, "learning_rate": 0.00018697275416974915, "loss": 0.9977, "step": 17280 }, { "epoch": 0.25, "grad_norm": 0.609375, "learning_rate": 0.00018696039527317117, "loss": 0.9776, "step": 17285 }, { "epoch": 0.25, "grad_norm": 0.703125, "learning_rate": 0.00018694803092581655, "loss": 1.0117, "step": 17290 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.00018693566112846038, "loss": 0.9689, "step": 17295 }, { "epoch": 0.25, "grad_norm": 0.5234375, "learning_rate": 0.0001869232858818779, "loss": 0.8599, "step": 17300 }, { "epoch": 0.25, "grad_norm": 0.62890625, "learning_rate": 0.0001869109051868449, "loss": 0.9914, "step": 17305 }, { "epoch": 0.25, "grad_norm": 0.51953125, "learning_rate": 0.00018689851904413738, "loss": 1.0214, "step": 17310 }, { "epoch": 0.25, "grad_norm": 0.5703125, "learning_rate": 0.00018688612745453172, "loss": 0.9126, "step": 17315 }, { "epoch": 0.25, "grad_norm": 0.55859375, "learning_rate": 0.0001868737304188046, "loss": 0.972, "step": 17320 }, { "epoch": 0.25, "grad_norm": 0.51171875, "learning_rate": 0.00018686132793773315, "loss": 0.9786, "step": 17325 }, { "epoch": 0.25, "grad_norm": 0.5234375, "learning_rate": 0.00018684892001209473, "loss": 0.8262, "step": 17330 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.00018683650664266707, "loss": 0.9764, "step": 17335 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.00018682408783022832, "loss": 1.0312, "step": 17340 }, { "epoch": 0.25, "grad_norm": 0.62109375, "learning_rate": 0.0001868116635755568, "loss": 0.8887, "step": 17345 }, { "epoch": 0.25, "grad_norm": 0.5234375, "learning_rate": 0.0001867992338794314, "loss": 0.9327, "step": 17350 }, { "epoch": 0.25, "grad_norm": 0.6640625, "learning_rate": 0.00018678679874263113, "loss": 1.0066, "step": 17355 }, { "epoch": 0.25, "grad_norm": 0.5390625, "learning_rate": 0.00018677435816593548, "loss": 1.0451, "step": 17360 }, { "epoch": 0.25, "grad_norm": 0.48828125, "learning_rate": 0.00018676191215012423, "loss": 0.8877, "step": 17365 }, { "epoch": 0.25, "grad_norm": 0.5234375, "learning_rate": 0.0001867494606959775, "loss": 0.951, "step": 17370 }, { "epoch": 0.25, "grad_norm": 0.55078125, "learning_rate": 0.00018673700380427582, "loss": 1.0618, "step": 17375 }, { "epoch": 0.25, "grad_norm": 0.5703125, "learning_rate": 0.00018672454147579992, "loss": 1.0022, "step": 17380 }, { "epoch": 0.25, "grad_norm": 0.55859375, "learning_rate": 0.00018671207371133097, "loss": 0.85, "step": 17385 }, { "epoch": 0.25, "grad_norm": 0.50390625, "learning_rate": 0.00018669960051165052, "loss": 0.8242, "step": 17390 }, { "epoch": 0.25, "grad_norm": 0.53515625, "learning_rate": 0.00018668712187754034, "loss": 0.9412, "step": 17395 }, { "epoch": 0.25, "grad_norm": 0.5859375, "learning_rate": 0.00018667463780978265, "loss": 0.9668, "step": 17400 }, { "epoch": 0.25, "grad_norm": 0.4765625, "learning_rate": 0.00018666214830915997, "loss": 0.9269, "step": 17405 }, { "epoch": 0.25, "grad_norm": 0.71875, "learning_rate": 0.0001866496533764551, "loss": 1.0226, "step": 17410 }, { "epoch": 0.25, "grad_norm": 0.546875, "learning_rate": 0.00018663715301245128, "loss": 0.9235, "step": 17415 }, { "epoch": 0.25, "grad_norm": 0.5546875, "learning_rate": 0.00018662464721793203, "loss": 1.1844, "step": 17420 }, { "epoch": 0.25, "grad_norm": 0.65234375, "learning_rate": 0.00018661213599368125, "loss": 1.0601, "step": 17425 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.00018659961934048313, "loss": 1.0451, "step": 17430 }, { "epoch": 0.25, "grad_norm": 0.62890625, "learning_rate": 0.00018658709725912225, "loss": 0.9832, "step": 17435 }, { "epoch": 0.25, "grad_norm": 0.56640625, "learning_rate": 0.0001865745697503835, "loss": 1.0443, "step": 17440 }, { "epoch": 0.25, "grad_norm": 0.66796875, "learning_rate": 0.0001865620368150521, "loss": 0.8875, "step": 17445 }, { "epoch": 0.25, "grad_norm": 0.58203125, "learning_rate": 0.00018654949845391366, "loss": 0.8692, "step": 17450 }, { "epoch": 0.25, "grad_norm": 0.59765625, "learning_rate": 0.0001865369546677541, "loss": 1.0209, "step": 17455 }, { "epoch": 0.25, "grad_norm": 0.416015625, "learning_rate": 0.00018652440545735964, "loss": 0.7376, "step": 17460 }, { "epoch": 0.25, "grad_norm": 0.494140625, "learning_rate": 0.0001865118508235169, "loss": 0.8804, "step": 17465 }, { "epoch": 0.25, "grad_norm": 0.62109375, "learning_rate": 0.00018649929076701283, "loss": 1.0329, "step": 17470 }, { "epoch": 0.25, "grad_norm": 0.66796875, "learning_rate": 0.0001864867252886347, "loss": 0.9949, "step": 17475 }, { "epoch": 0.25, "grad_norm": 0.51953125, "learning_rate": 0.00018647415438917015, "loss": 0.9137, "step": 17480 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.00018646157806940708, "loss": 0.9676, "step": 17485 }, { "epoch": 0.25, "grad_norm": 0.546875, "learning_rate": 0.00018644899633013388, "loss": 1.0912, "step": 17490 }, { "epoch": 0.25, "grad_norm": 0.5625, "learning_rate": 0.00018643640917213907, "loss": 0.9975, "step": 17495 }, { "epoch": 0.25, "grad_norm": 0.55078125, "learning_rate": 0.00018642381659621173, "loss": 0.932, "step": 17500 }, { "epoch": 0.25, "grad_norm": 0.57421875, "learning_rate": 0.00018641121860314114, "loss": 1.0452, "step": 17505 }, { "epoch": 0.25, "grad_norm": 0.5, "learning_rate": 0.00018639861519371693, "loss": 0.9322, "step": 17510 }, { "epoch": 0.25, "grad_norm": 0.5859375, "learning_rate": 0.00018638600636872914, "loss": 0.8564, "step": 17515 }, { "epoch": 0.25, "grad_norm": 0.5234375, "learning_rate": 0.00018637339212896806, "loss": 0.9579, "step": 17520 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.0001863607724752244, "loss": 0.9324, "step": 17525 }, { "epoch": 0.25, "grad_norm": 0.49609375, "learning_rate": 0.0001863481474082892, "loss": 0.9017, "step": 17530 }, { "epoch": 0.25, "grad_norm": 0.6796875, "learning_rate": 0.00018633551692895373, "loss": 0.9417, "step": 17535 }, { "epoch": 0.25, "grad_norm": 0.8203125, "learning_rate": 0.00018632288103800974, "loss": 0.8638, "step": 17540 }, { "epoch": 0.25, "grad_norm": 0.5, "learning_rate": 0.00018631023973624927, "loss": 0.9747, "step": 17545 }, { "epoch": 0.25, "grad_norm": 0.63671875, "learning_rate": 0.00018629759302446467, "loss": 0.8661, "step": 17550 }, { "epoch": 0.25, "grad_norm": 0.6171875, "learning_rate": 0.00018628494090344863, "loss": 1.106, "step": 17555 }, { "epoch": 0.25, "grad_norm": 0.67578125, "learning_rate": 0.00018627228337399427, "loss": 1.0067, "step": 17560 }, { "epoch": 0.25, "grad_norm": 0.609375, "learning_rate": 0.0001862596204368949, "loss": 1.1062, "step": 17565 }, { "epoch": 0.25, "grad_norm": 0.58203125, "learning_rate": 0.00018624695209294427, "loss": 0.8607, "step": 17570 }, { "epoch": 0.25, "grad_norm": 0.53515625, "learning_rate": 0.0001862342783429365, "loss": 1.0601, "step": 17575 }, { "epoch": 0.25, "grad_norm": 0.54296875, "learning_rate": 0.0001862215991876659, "loss": 0.8794, "step": 17580 }, { "epoch": 0.25, "grad_norm": 0.5859375, "learning_rate": 0.00018620891462792729, "loss": 0.9462, "step": 17585 }, { "epoch": 0.25, "grad_norm": 0.5859375, "learning_rate": 0.00018619622466451574, "loss": 1.0127, "step": 17590 }, { "epoch": 0.25, "grad_norm": 0.498046875, "learning_rate": 0.00018618352929822662, "loss": 1.0018, "step": 17595 }, { "epoch": 0.25, "grad_norm": 0.54296875, "learning_rate": 0.00018617082852985577, "loss": 1.0369, "step": 17600 }, { "epoch": 0.25, "grad_norm": 0.609375, "learning_rate": 0.00018615812236019922, "loss": 0.9432, "step": 17605 }, { "epoch": 0.25, "grad_norm": 0.5390625, "learning_rate": 0.00018614541079005345, "loss": 0.8503, "step": 17610 }, { "epoch": 0.25, "grad_norm": 0.51953125, "learning_rate": 0.00018613269382021522, "loss": 0.9858, "step": 17615 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.00018611997145148165, "loss": 0.9059, "step": 17620 }, { "epoch": 0.25, "grad_norm": 0.609375, "learning_rate": 0.00018610724368465015, "loss": 1.0486, "step": 17625 }, { "epoch": 0.25, "grad_norm": 0.60546875, "learning_rate": 0.00018609451052051858, "loss": 0.9702, "step": 17630 }, { "epoch": 0.25, "grad_norm": 0.70703125, "learning_rate": 0.00018608177195988504, "loss": 1.2176, "step": 17635 }, { "epoch": 0.25, "grad_norm": 0.55859375, "learning_rate": 0.00018606902800354796, "loss": 1.0093, "step": 17640 }, { "epoch": 0.25, "grad_norm": 0.6171875, "learning_rate": 0.0001860562786523062, "loss": 0.9306, "step": 17645 }, { "epoch": 0.25, "grad_norm": 0.6953125, "learning_rate": 0.0001860435239069589, "loss": 1.1033, "step": 17650 }, { "epoch": 0.25, "grad_norm": 0.64453125, "learning_rate": 0.00018603076376830555, "loss": 0.9792, "step": 17655 }, { "epoch": 0.25, "grad_norm": 0.6953125, "learning_rate": 0.0001860179982371459, "loss": 0.9309, "step": 17660 }, { "epoch": 0.25, "grad_norm": 0.65625, "learning_rate": 0.00018600522731428016, "loss": 1.0316, "step": 17665 }, { "epoch": 0.25, "grad_norm": 0.5859375, "learning_rate": 0.00018599245100050883, "loss": 1.0418, "step": 17670 }, { "epoch": 0.25, "grad_norm": 0.57421875, "learning_rate": 0.00018597966929663273, "loss": 1.02, "step": 17675 }, { "epoch": 0.25, "grad_norm": 0.546875, "learning_rate": 0.00018596688220345303, "loss": 0.9424, "step": 17680 }, { "epoch": 0.25, "grad_norm": 0.59765625, "learning_rate": 0.00018595408972177127, "loss": 1.0338, "step": 17685 }, { "epoch": 0.25, "grad_norm": 0.61328125, "learning_rate": 0.00018594129185238925, "loss": 0.9645, "step": 17690 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.00018592848859610918, "loss": 0.9934, "step": 17695 }, { "epoch": 0.25, "grad_norm": 0.59375, "learning_rate": 0.0001859156799537336, "loss": 1.063, "step": 17700 }, { "epoch": 0.25, "grad_norm": 0.5, "learning_rate": 0.0001859028659260653, "loss": 0.9458, "step": 17705 }, { "epoch": 0.25, "grad_norm": 0.578125, "learning_rate": 0.00018589004651390758, "loss": 0.9579, "step": 17710 }, { "epoch": 0.25, "grad_norm": 0.71875, "learning_rate": 0.0001858772217180639, "loss": 1.0698, "step": 17715 }, { "epoch": 0.25, "grad_norm": 0.59765625, "learning_rate": 0.0001858643915393382, "loss": 0.9331, "step": 17720 }, { "epoch": 0.25, "grad_norm": 0.55078125, "learning_rate": 0.0001858515559785346, "loss": 0.8905, "step": 17725 }, { "epoch": 0.25, "grad_norm": 0.58203125, "learning_rate": 0.0001858387150364577, "loss": 0.9107, "step": 17730 }, { "epoch": 0.25, "grad_norm": 0.5546875, "learning_rate": 0.00018582586871391236, "loss": 1.063, "step": 17735 }, { "epoch": 0.25, "grad_norm": 0.5625, "learning_rate": 0.00018581301701170387, "loss": 1.0574, "step": 17740 }, { "epoch": 0.25, "grad_norm": 0.6171875, "learning_rate": 0.0001858001599306377, "loss": 1.01, "step": 17745 }, { "epoch": 0.25, "grad_norm": 0.6015625, "learning_rate": 0.0001857872974715198, "loss": 0.9275, "step": 17750 }, { "epoch": 0.25, "grad_norm": 0.5234375, "learning_rate": 0.0001857744296351564, "loss": 0.9735, "step": 17755 }, { "epoch": 0.25, "grad_norm": 0.52734375, "learning_rate": 0.00018576155642235407, "loss": 1.0272, "step": 17760 }, { "epoch": 0.25, "grad_norm": 0.58203125, "learning_rate": 0.0001857486778339197, "loss": 1.0457, "step": 17765 }, { "epoch": 0.25, "grad_norm": 0.609375, "learning_rate": 0.00018573579387066053, "loss": 0.9561, "step": 17770 }, { "epoch": 0.25, "grad_norm": 0.59375, "learning_rate": 0.00018572290453338416, "loss": 0.9612, "step": 17775 }, { "epoch": 0.26, "grad_norm": 0.57421875, "learning_rate": 0.0001857100098228985, "loss": 0.8423, "step": 17780 }, { "epoch": 0.26, "grad_norm": 0.515625, "learning_rate": 0.00018569710974001183, "loss": 1.0384, "step": 17785 }, { "epoch": 0.26, "grad_norm": 0.54296875, "learning_rate": 0.00018568420428553272, "loss": 1.1032, "step": 17790 }, { "epoch": 0.26, "grad_norm": 0.5078125, "learning_rate": 0.00018567129346027007, "loss": 1.1377, "step": 17795 }, { "epoch": 0.26, "grad_norm": 0.58203125, "learning_rate": 0.00018565837726503318, "loss": 0.9317, "step": 17800 }, { "epoch": 0.26, "grad_norm": 0.53515625, "learning_rate": 0.00018564545570063168, "loss": 0.9791, "step": 17805 }, { "epoch": 0.26, "grad_norm": 0.6484375, "learning_rate": 0.00018563252876787546, "loss": 0.9285, "step": 17810 }, { "epoch": 0.26, "grad_norm": 0.53125, "learning_rate": 0.00018561959646757478, "loss": 0.8951, "step": 17815 }, { "epoch": 0.26, "grad_norm": 0.57421875, "learning_rate": 0.00018560665880054032, "loss": 0.9693, "step": 17820 }, { "epoch": 0.26, "grad_norm": 0.54296875, "learning_rate": 0.00018559371576758296, "loss": 0.8104, "step": 17825 }, { "epoch": 0.26, "grad_norm": 0.5703125, "learning_rate": 0.00018558076736951404, "loss": 0.9264, "step": 17830 }, { "epoch": 0.26, "grad_norm": 0.490234375, "learning_rate": 0.00018556781360714512, "loss": 1.1434, "step": 17835 }, { "epoch": 0.26, "grad_norm": 0.53125, "learning_rate": 0.0001855548544812882, "loss": 0.9131, "step": 17840 }, { "epoch": 0.26, "grad_norm": 0.59765625, "learning_rate": 0.0001855418899927556, "loss": 1.0054, "step": 17845 }, { "epoch": 0.26, "grad_norm": 0.6171875, "learning_rate": 0.00018552892014235986, "loss": 0.9281, "step": 17850 }, { "epoch": 0.26, "grad_norm": 0.625, "learning_rate": 0.000185515944930914, "loss": 1.033, "step": 17855 }, { "epoch": 0.26, "grad_norm": 0.5859375, "learning_rate": 0.00018550296435923135, "loss": 0.9832, "step": 17860 }, { "epoch": 0.26, "grad_norm": 0.69921875, "learning_rate": 0.00018548997842812552, "loss": 1.0676, "step": 17865 }, { "epoch": 0.26, "grad_norm": 0.55859375, "learning_rate": 0.00018547698713841047, "loss": 0.847, "step": 17870 }, { "epoch": 0.26, "grad_norm": 0.5859375, "learning_rate": 0.00018546399049090048, "loss": 0.8618, "step": 17875 }, { "epoch": 0.26, "grad_norm": 0.59765625, "learning_rate": 0.00018545098848641025, "loss": 0.9026, "step": 17880 }, { "epoch": 0.26, "grad_norm": 0.578125, "learning_rate": 0.00018543798112575474, "loss": 1.1252, "step": 17885 }, { "epoch": 0.26, "grad_norm": 0.55078125, "learning_rate": 0.0001854249684097493, "loss": 0.8721, "step": 17890 }, { "epoch": 0.26, "grad_norm": 0.5546875, "learning_rate": 0.0001854119503392095, "loss": 0.9944, "step": 17895 }, { "epoch": 0.26, "grad_norm": 0.56640625, "learning_rate": 0.0001853989269149514, "loss": 0.9431, "step": 17900 }, { "epoch": 0.26, "grad_norm": 0.62890625, "learning_rate": 0.0001853858981377913, "loss": 1.072, "step": 17905 }, { "epoch": 0.26, "grad_norm": 0.62109375, "learning_rate": 0.00018537286400854583, "loss": 0.9626, "step": 17910 }, { "epoch": 0.26, "grad_norm": 0.55859375, "learning_rate": 0.00018535982452803204, "loss": 0.9894, "step": 17915 }, { "epoch": 0.26, "grad_norm": 0.62109375, "learning_rate": 0.00018534677969706724, "loss": 0.9762, "step": 17920 }, { "epoch": 0.26, "grad_norm": 0.53515625, "learning_rate": 0.00018533372951646908, "loss": 0.9588, "step": 17925 }, { "epoch": 0.26, "grad_norm": 0.52734375, "learning_rate": 0.00018532067398705556, "loss": 0.828, "step": 17930 }, { "epoch": 0.26, "grad_norm": 0.625, "learning_rate": 0.00018530761310964504, "loss": 1.1494, "step": 17935 }, { "epoch": 0.26, "grad_norm": 0.6875, "learning_rate": 0.00018529454688505614, "loss": 1.1224, "step": 17940 }, { "epoch": 0.26, "grad_norm": 0.63671875, "learning_rate": 0.00018528147531410793, "loss": 0.931, "step": 17945 }, { "epoch": 0.26, "grad_norm": 0.625, "learning_rate": 0.00018526839839761968, "loss": 0.9902, "step": 17950 }, { "epoch": 0.26, "grad_norm": 0.58984375, "learning_rate": 0.00018525531613641113, "loss": 0.9354, "step": 17955 }, { "epoch": 0.26, "grad_norm": 0.5859375, "learning_rate": 0.0001852422285313023, "loss": 0.9686, "step": 17960 }, { "epoch": 0.26, "grad_norm": 0.6953125, "learning_rate": 0.00018522913558311345, "loss": 0.9746, "step": 17965 }, { "epoch": 0.26, "grad_norm": 0.61328125, "learning_rate": 0.00018521603729266536, "loss": 0.9197, "step": 17970 }, { "epoch": 0.26, "grad_norm": 0.58203125, "learning_rate": 0.00018520293366077896, "loss": 1.0712, "step": 17975 }, { "epoch": 0.26, "grad_norm": 0.56640625, "learning_rate": 0.00018518982468827567, "loss": 0.9947, "step": 17980 }, { "epoch": 0.26, "grad_norm": 0.5625, "learning_rate": 0.00018517671037597712, "loss": 0.8395, "step": 17985 }, { "epoch": 0.26, "grad_norm": 0.6171875, "learning_rate": 0.00018516359072470536, "loss": 0.9635, "step": 17990 }, { "epoch": 0.26, "grad_norm": 0.578125, "learning_rate": 0.00018515046573528275, "loss": 0.9441, "step": 17995 }, { "epoch": 0.26, "grad_norm": 0.5859375, "learning_rate": 0.00018513733540853196, "loss": 0.9905, "step": 18000 }, { "epoch": 0.26, "grad_norm": 0.58203125, "learning_rate": 0.00018512419974527604, "loss": 0.9265, "step": 18005 }, { "epoch": 0.26, "grad_norm": 0.625, "learning_rate": 0.00018511105874633832, "loss": 0.9096, "step": 18010 }, { "epoch": 0.26, "grad_norm": 0.53515625, "learning_rate": 0.00018509791241254253, "loss": 0.9061, "step": 18015 }, { "epoch": 0.26, "grad_norm": 0.703125, "learning_rate": 0.00018508476074471263, "loss": 1.0156, "step": 18020 }, { "epoch": 0.26, "grad_norm": 0.609375, "learning_rate": 0.00018507160374367306, "loss": 1.0067, "step": 18025 }, { "epoch": 0.26, "grad_norm": 0.5390625, "learning_rate": 0.00018505844141024843, "loss": 0.9401, "step": 18030 }, { "epoch": 0.26, "grad_norm": 0.6328125, "learning_rate": 0.00018504527374526387, "loss": 1.0436, "step": 18035 }, { "epoch": 0.26, "grad_norm": 0.58203125, "learning_rate": 0.00018503210074954468, "loss": 1.0336, "step": 18040 }, { "epoch": 0.26, "grad_norm": 0.625, "learning_rate": 0.00018501892242391653, "loss": 0.9606, "step": 18045 }, { "epoch": 0.26, "grad_norm": 0.5703125, "learning_rate": 0.00018500573876920555, "loss": 0.9777, "step": 18050 }, { "epoch": 0.26, "grad_norm": 0.5390625, "learning_rate": 0.000184992549786238, "loss": 0.9084, "step": 18055 }, { "epoch": 0.26, "grad_norm": 0.59375, "learning_rate": 0.00018497935547584068, "loss": 1.0103, "step": 18060 }, { "epoch": 0.26, "grad_norm": 0.56640625, "learning_rate": 0.00018496615583884056, "loss": 0.9744, "step": 18065 }, { "epoch": 0.26, "grad_norm": 0.54296875, "learning_rate": 0.000184952950876065, "loss": 0.9016, "step": 18070 }, { "epoch": 0.26, "grad_norm": 0.54296875, "learning_rate": 0.00018493974058834178, "loss": 0.8987, "step": 18075 }, { "epoch": 0.26, "grad_norm": 0.55078125, "learning_rate": 0.00018492652497649882, "loss": 0.8471, "step": 18080 }, { "epoch": 0.26, "grad_norm": 0.734375, "learning_rate": 0.00018491330404136458, "loss": 1.0032, "step": 18085 }, { "epoch": 0.26, "grad_norm": 0.54296875, "learning_rate": 0.00018490007778376776, "loss": 0.8814, "step": 18090 }, { "epoch": 0.26, "grad_norm": 0.59765625, "learning_rate": 0.00018488684620453731, "loss": 0.9529, "step": 18095 }, { "epoch": 0.26, "grad_norm": 0.52734375, "learning_rate": 0.00018487360930450272, "loss": 1.0356, "step": 18100 }, { "epoch": 0.26, "grad_norm": 0.62890625, "learning_rate": 0.0001848603670844936, "loss": 1.0452, "step": 18105 }, { "epoch": 0.26, "grad_norm": 0.427734375, "learning_rate": 0.00018484711954534002, "loss": 0.723, "step": 18110 }, { "epoch": 0.26, "grad_norm": 0.54296875, "learning_rate": 0.0001848338666878724, "loss": 1.0693, "step": 18115 }, { "epoch": 0.26, "grad_norm": 0.5234375, "learning_rate": 0.00018482060851292132, "loss": 0.9527, "step": 18120 }, { "epoch": 0.26, "grad_norm": 0.6015625, "learning_rate": 0.00018480734502131796, "loss": 0.9394, "step": 18125 }, { "epoch": 0.26, "grad_norm": 0.50390625, "learning_rate": 0.00018479407621389363, "loss": 0.8132, "step": 18130 }, { "epoch": 0.26, "grad_norm": 0.55859375, "learning_rate": 0.00018478080209148, "loss": 1.0694, "step": 18135 }, { "epoch": 0.26, "grad_norm": 0.6015625, "learning_rate": 0.00018476752265490914, "loss": 0.9333, "step": 18140 }, { "epoch": 0.26, "grad_norm": 0.6796875, "learning_rate": 0.00018475423790501342, "loss": 1.0483, "step": 18145 }, { "epoch": 0.26, "grad_norm": 0.55078125, "learning_rate": 0.00018474094784262554, "loss": 0.9722, "step": 18150 }, { "epoch": 0.26, "grad_norm": 0.625, "learning_rate": 0.00018472765246857855, "loss": 1.1281, "step": 18155 }, { "epoch": 0.26, "grad_norm": 0.6875, "learning_rate": 0.0001847143517837058, "loss": 1.0055, "step": 18160 }, { "epoch": 0.26, "grad_norm": 0.5546875, "learning_rate": 0.00018470104578884099, "loss": 0.8147, "step": 18165 }, { "epoch": 0.26, "grad_norm": 0.59375, "learning_rate": 0.00018468773448481818, "loss": 1.1144, "step": 18170 }, { "epoch": 0.26, "grad_norm": 0.59765625, "learning_rate": 0.0001846744178724717, "loss": 0.9699, "step": 18175 }, { "epoch": 0.26, "grad_norm": 0.53125, "learning_rate": 0.0001846610959526363, "loss": 1.0632, "step": 18180 }, { "epoch": 0.26, "grad_norm": 0.5625, "learning_rate": 0.00018464776872614697, "loss": 0.8508, "step": 18185 }, { "epoch": 0.26, "grad_norm": 0.66015625, "learning_rate": 0.0001846344361938391, "loss": 1.0169, "step": 18190 }, { "epoch": 0.26, "grad_norm": 0.62109375, "learning_rate": 0.00018462109835654838, "loss": 1.0482, "step": 18195 }, { "epoch": 0.26, "grad_norm": 0.57421875, "learning_rate": 0.00018460775521511082, "loss": 0.9038, "step": 18200 }, { "epoch": 0.26, "grad_norm": 0.65625, "learning_rate": 0.0001845944067703628, "loss": 0.9428, "step": 18205 }, { "epoch": 0.26, "grad_norm": 0.5546875, "learning_rate": 0.00018458105302314104, "loss": 0.8876, "step": 18210 }, { "epoch": 0.26, "grad_norm": 0.53125, "learning_rate": 0.00018456769397428254, "loss": 0.8324, "step": 18215 }, { "epoch": 0.26, "grad_norm": 0.6640625, "learning_rate": 0.00018455432962462466, "loss": 1.1117, "step": 18220 }, { "epoch": 0.26, "grad_norm": 0.546875, "learning_rate": 0.0001845409599750051, "loss": 0.87, "step": 18225 }, { "epoch": 0.26, "grad_norm": 0.6484375, "learning_rate": 0.0001845275850262619, "loss": 0.8693, "step": 18230 }, { "epoch": 0.26, "grad_norm": 0.64453125, "learning_rate": 0.00018451420477923338, "loss": 1.0202, "step": 18235 }, { "epoch": 0.26, "grad_norm": 0.5, "learning_rate": 0.00018450081923475828, "loss": 1.0089, "step": 18240 }, { "epoch": 0.26, "grad_norm": 0.609375, "learning_rate": 0.00018448742839367557, "loss": 1.0084, "step": 18245 }, { "epoch": 0.26, "grad_norm": 0.58984375, "learning_rate": 0.00018447403225682464, "loss": 0.797, "step": 18250 }, { "epoch": 0.26, "grad_norm": 0.56640625, "learning_rate": 0.00018446063082504512, "loss": 1.0342, "step": 18255 }, { "epoch": 0.26, "grad_norm": 0.578125, "learning_rate": 0.0001844472240991771, "loss": 1.0519, "step": 18260 }, { "epoch": 0.26, "grad_norm": 0.5859375, "learning_rate": 0.0001844338120800609, "loss": 0.9145, "step": 18265 }, { "epoch": 0.26, "grad_norm": 0.515625, "learning_rate": 0.0001844203947685372, "loss": 0.8796, "step": 18270 }, { "epoch": 0.26, "grad_norm": 0.5546875, "learning_rate": 0.000184406972165447, "loss": 0.8601, "step": 18275 }, { "epoch": 0.26, "grad_norm": 0.58984375, "learning_rate": 0.00018439354427163162, "loss": 0.9921, "step": 18280 }, { "epoch": 0.26, "grad_norm": 0.70703125, "learning_rate": 0.00018438011108793282, "loss": 0.91, "step": 18285 }, { "epoch": 0.26, "grad_norm": 0.5703125, "learning_rate": 0.00018436667261519254, "loss": 1.024, "step": 18290 }, { "epoch": 0.26, "grad_norm": 0.59765625, "learning_rate": 0.00018435322885425312, "loss": 0.9442, "step": 18295 }, { "epoch": 0.26, "grad_norm": 0.69921875, "learning_rate": 0.00018433977980595727, "loss": 0.9701, "step": 18300 }, { "epoch": 0.26, "grad_norm": 0.54296875, "learning_rate": 0.00018432632547114795, "loss": 1.0441, "step": 18305 }, { "epoch": 0.26, "grad_norm": 0.609375, "learning_rate": 0.00018431286585066851, "loss": 0.8904, "step": 18310 }, { "epoch": 0.26, "grad_norm": 0.53515625, "learning_rate": 0.0001842994009453626, "loss": 0.913, "step": 18315 }, { "epoch": 0.26, "grad_norm": 0.53515625, "learning_rate": 0.00018428593075607425, "loss": 0.8808, "step": 18320 }, { "epoch": 0.26, "grad_norm": 0.5234375, "learning_rate": 0.00018427245528364778, "loss": 0.9081, "step": 18325 }, { "epoch": 0.26, "grad_norm": 0.55078125, "learning_rate": 0.00018425897452892782, "loss": 0.9171, "step": 18330 }, { "epoch": 0.26, "grad_norm": 0.57421875, "learning_rate": 0.00018424548849275935, "loss": 0.8542, "step": 18335 }, { "epoch": 0.26, "grad_norm": 0.5859375, "learning_rate": 0.00018423199717598776, "loss": 0.9848, "step": 18340 }, { "epoch": 0.26, "grad_norm": 0.56640625, "learning_rate": 0.00018421850057945863, "loss": 0.8988, "step": 18345 }, { "epoch": 0.26, "grad_norm": 0.5390625, "learning_rate": 0.00018420499870401796, "loss": 0.9853, "step": 18350 }, { "epoch": 0.26, "grad_norm": 0.63671875, "learning_rate": 0.00018419149155051207, "loss": 0.9578, "step": 18355 }, { "epoch": 0.26, "grad_norm": 0.56640625, "learning_rate": 0.0001841779791197876, "loss": 0.8141, "step": 18360 }, { "epoch": 0.26, "grad_norm": 0.53125, "learning_rate": 0.00018416446141269156, "loss": 0.8497, "step": 18365 }, { "epoch": 0.26, "grad_norm": 0.5546875, "learning_rate": 0.0001841509384300712, "loss": 0.9357, "step": 18370 }, { "epoch": 0.26, "grad_norm": 0.6171875, "learning_rate": 0.0001841374101727742, "loss": 1.0044, "step": 18375 }, { "epoch": 0.26, "grad_norm": 0.60546875, "learning_rate": 0.00018412387664164847, "loss": 0.9814, "step": 18380 }, { "epoch": 0.26, "grad_norm": 0.59765625, "learning_rate": 0.00018411033783754234, "loss": 0.9315, "step": 18385 }, { "epoch": 0.26, "grad_norm": 0.4921875, "learning_rate": 0.00018409679376130445, "loss": 0.9981, "step": 18390 }, { "epoch": 0.26, "grad_norm": 0.5703125, "learning_rate": 0.0001840832444137838, "loss": 1.0124, "step": 18395 }, { "epoch": 0.26, "grad_norm": 0.58203125, "learning_rate": 0.00018406968979582956, "loss": 0.9862, "step": 18400 }, { "epoch": 0.26, "grad_norm": 0.5234375, "learning_rate": 0.00018405612990829147, "loss": 0.9314, "step": 18405 }, { "epoch": 0.26, "grad_norm": 0.58984375, "learning_rate": 0.00018404256475201938, "loss": 0.9507, "step": 18410 }, { "epoch": 0.26, "grad_norm": 0.6015625, "learning_rate": 0.00018402899432786365, "loss": 0.9749, "step": 18415 }, { "epoch": 0.26, "grad_norm": 0.61328125, "learning_rate": 0.00018401541863667485, "loss": 0.9224, "step": 18420 }, { "epoch": 0.26, "grad_norm": 0.59765625, "learning_rate": 0.00018400183767930387, "loss": 1.1241, "step": 18425 }, { "epoch": 0.26, "grad_norm": 0.59375, "learning_rate": 0.00018398825145660212, "loss": 0.8608, "step": 18430 }, { "epoch": 0.26, "grad_norm": 0.76953125, "learning_rate": 0.00018397465996942107, "loss": 1.0383, "step": 18435 }, { "epoch": 0.26, "grad_norm": 0.6953125, "learning_rate": 0.00018396106321861267, "loss": 0.915, "step": 18440 }, { "epoch": 0.26, "grad_norm": 0.56640625, "learning_rate": 0.00018394746120502922, "loss": 0.95, "step": 18445 }, { "epoch": 0.26, "grad_norm": 0.58203125, "learning_rate": 0.0001839338539295233, "loss": 0.9485, "step": 18450 }, { "epoch": 0.26, "grad_norm": 0.609375, "learning_rate": 0.00018392024139294785, "loss": 0.8437, "step": 18455 }, { "epoch": 0.26, "grad_norm": 0.56640625, "learning_rate": 0.00018390662359615603, "loss": 1.0354, "step": 18460 }, { "epoch": 0.26, "grad_norm": 0.609375, "learning_rate": 0.00018389300054000155, "loss": 1.0424, "step": 18465 }, { "epoch": 0.26, "grad_norm": 0.54296875, "learning_rate": 0.00018387937222533825, "loss": 1.1084, "step": 18470 }, { "epoch": 0.27, "grad_norm": 0.5234375, "learning_rate": 0.0001838657386530203, "loss": 0.909, "step": 18475 }, { "epoch": 0.27, "grad_norm": 0.5625, "learning_rate": 0.0001838520998239024, "loss": 1.0199, "step": 18480 }, { "epoch": 0.27, "grad_norm": 0.59375, "learning_rate": 0.00018383845573883932, "loss": 0.8707, "step": 18485 }, { "epoch": 0.27, "grad_norm": 0.609375, "learning_rate": 0.0001838248063986864, "loss": 1.0918, "step": 18490 }, { "epoch": 0.27, "grad_norm": 0.55859375, "learning_rate": 0.00018381115180429912, "loss": 0.9805, "step": 18495 }, { "epoch": 0.27, "grad_norm": 0.6328125, "learning_rate": 0.00018379749195653343, "loss": 0.9804, "step": 18500 }, { "epoch": 0.27, "grad_norm": 0.57421875, "learning_rate": 0.00018378382685624547, "loss": 0.9463, "step": 18505 }, { "epoch": 0.27, "grad_norm": 0.5234375, "learning_rate": 0.00018377015650429182, "loss": 1.065, "step": 18510 }, { "epoch": 0.27, "grad_norm": 0.55078125, "learning_rate": 0.00018375648090152938, "loss": 0.9703, "step": 18515 }, { "epoch": 0.27, "grad_norm": 0.58203125, "learning_rate": 0.00018374280004881531, "loss": 0.8268, "step": 18520 }, { "epoch": 0.27, "grad_norm": 0.609375, "learning_rate": 0.00018372911394700717, "loss": 1.0015, "step": 18525 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.0001837154225969628, "loss": 0.8507, "step": 18530 }, { "epoch": 0.27, "grad_norm": 0.60546875, "learning_rate": 0.00018370172599954041, "loss": 1.0905, "step": 18535 }, { "epoch": 0.27, "grad_norm": 0.5546875, "learning_rate": 0.0001836880241555985, "loss": 0.9969, "step": 18540 }, { "epoch": 0.27, "grad_norm": 0.5859375, "learning_rate": 0.0001836743170659959, "loss": 0.9468, "step": 18545 }, { "epoch": 0.27, "grad_norm": 0.53515625, "learning_rate": 0.00018366060473159183, "loss": 0.9021, "step": 18550 }, { "epoch": 0.27, "grad_norm": 0.5625, "learning_rate": 0.0001836468871532458, "loss": 0.8949, "step": 18555 }, { "epoch": 0.27, "grad_norm": 0.45703125, "learning_rate": 0.00018363316433181757, "loss": 1.0498, "step": 18560 }, { "epoch": 0.27, "grad_norm": 0.671875, "learning_rate": 0.00018361943626816736, "loss": 1.1729, "step": 18565 }, { "epoch": 0.27, "grad_norm": 0.5625, "learning_rate": 0.00018360570296315566, "loss": 1.0258, "step": 18570 }, { "epoch": 0.27, "grad_norm": 0.57421875, "learning_rate": 0.00018359196441764328, "loss": 0.9827, "step": 18575 }, { "epoch": 0.27, "grad_norm": 0.59375, "learning_rate": 0.00018357822063249136, "loss": 0.9098, "step": 18580 }, { "epoch": 0.27, "grad_norm": 0.55078125, "learning_rate": 0.0001835644716085614, "loss": 0.9919, "step": 18585 }, { "epoch": 0.27, "grad_norm": 0.671875, "learning_rate": 0.00018355071734671517, "loss": 1.004, "step": 18590 }, { "epoch": 0.27, "grad_norm": 0.546875, "learning_rate": 0.0001835369578478148, "loss": 0.9617, "step": 18595 }, { "epoch": 0.27, "grad_norm": 0.5703125, "learning_rate": 0.0001835231931127228, "loss": 0.9761, "step": 18600 }, { "epoch": 0.27, "grad_norm": 0.44140625, "learning_rate": 0.00018350942314230195, "loss": 0.8244, "step": 18605 }, { "epoch": 0.27, "grad_norm": 0.5390625, "learning_rate": 0.00018349564793741533, "loss": 0.97, "step": 18610 }, { "epoch": 0.27, "grad_norm": 0.64453125, "learning_rate": 0.00018348186749892639, "loss": 1.1368, "step": 18615 }, { "epoch": 0.27, "grad_norm": 0.65234375, "learning_rate": 0.0001834680818276989, "loss": 1.0208, "step": 18620 }, { "epoch": 0.27, "grad_norm": 0.61328125, "learning_rate": 0.00018345429092459704, "loss": 0.8885, "step": 18625 }, { "epoch": 0.27, "grad_norm": 0.703125, "learning_rate": 0.00018344049479048513, "loss": 1.0854, "step": 18630 }, { "epoch": 0.27, "grad_norm": 0.62109375, "learning_rate": 0.000183426693426228, "loss": 1.1566, "step": 18635 }, { "epoch": 0.27, "grad_norm": 0.55859375, "learning_rate": 0.0001834128868326907, "loss": 0.9691, "step": 18640 }, { "epoch": 0.27, "grad_norm": 0.59375, "learning_rate": 0.00018339907501073867, "loss": 0.968, "step": 18645 }, { "epoch": 0.27, "grad_norm": 0.51953125, "learning_rate": 0.0001833852579612376, "loss": 0.9445, "step": 18650 }, { "epoch": 0.27, "grad_norm": 0.60546875, "learning_rate": 0.00018337143568505362, "loss": 1.0196, "step": 18655 }, { "epoch": 0.27, "grad_norm": 0.6171875, "learning_rate": 0.00018335760818305309, "loss": 0.914, "step": 18660 }, { "epoch": 0.27, "grad_norm": 0.56640625, "learning_rate": 0.00018334377545610274, "loss": 0.9988, "step": 18665 }, { "epoch": 0.27, "grad_norm": 0.55078125, "learning_rate": 0.00018332993750506962, "loss": 0.7872, "step": 18670 }, { "epoch": 0.27, "grad_norm": 0.5, "learning_rate": 0.00018331609433082114, "loss": 0.9388, "step": 18675 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.00018330224593422496, "loss": 0.9146, "step": 18680 }, { "epoch": 0.27, "grad_norm": 0.54296875, "learning_rate": 0.00018328839231614911, "loss": 0.8824, "step": 18685 }, { "epoch": 0.27, "grad_norm": 0.62109375, "learning_rate": 0.00018327453347746203, "loss": 1.1295, "step": 18690 }, { "epoch": 0.27, "grad_norm": 0.53125, "learning_rate": 0.00018326066941903228, "loss": 1.0299, "step": 18695 }, { "epoch": 0.27, "grad_norm": 0.61328125, "learning_rate": 0.000183246800141729, "loss": 0.9898, "step": 18700 }, { "epoch": 0.27, "grad_norm": 0.57421875, "learning_rate": 0.00018323292564642146, "loss": 1.019, "step": 18705 }, { "epoch": 0.27, "grad_norm": 0.5234375, "learning_rate": 0.0001832190459339793, "loss": 0.89, "step": 18710 }, { "epoch": 0.27, "grad_norm": 0.61328125, "learning_rate": 0.0001832051610052726, "loss": 0.9524, "step": 18715 }, { "epoch": 0.27, "grad_norm": 0.59375, "learning_rate": 0.00018319127086117168, "loss": 0.9742, "step": 18720 }, { "epoch": 0.27, "grad_norm": 0.609375, "learning_rate": 0.00018317737550254713, "loss": 1.1546, "step": 18725 }, { "epoch": 0.27, "grad_norm": 0.5234375, "learning_rate": 0.00018316347493026994, "loss": 0.9396, "step": 18730 }, { "epoch": 0.27, "grad_norm": 0.470703125, "learning_rate": 0.00018314956914521142, "loss": 0.8559, "step": 18735 }, { "epoch": 0.27, "grad_norm": 0.62890625, "learning_rate": 0.0001831356581482432, "loss": 1.0151, "step": 18740 }, { "epoch": 0.27, "grad_norm": 0.458984375, "learning_rate": 0.0001831217419402373, "loss": 0.8159, "step": 18745 }, { "epoch": 0.27, "grad_norm": 0.609375, "learning_rate": 0.0001831078205220659, "loss": 1.0798, "step": 18750 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.00018309389389460168, "loss": 0.9936, "step": 18755 }, { "epoch": 0.27, "grad_norm": 0.65234375, "learning_rate": 0.00018307996205871755, "loss": 0.984, "step": 18760 }, { "epoch": 0.27, "grad_norm": 0.5078125, "learning_rate": 0.00018306602501528673, "loss": 0.8814, "step": 18765 }, { "epoch": 0.27, "grad_norm": 0.49609375, "learning_rate": 0.00018305208276518293, "loss": 0.9234, "step": 18770 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.00018303813530927995, "loss": 0.908, "step": 18775 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.00018302418264845208, "loss": 0.8875, "step": 18780 }, { "epoch": 0.27, "grad_norm": 0.61328125, "learning_rate": 0.00018301022478357391, "loss": 0.9008, "step": 18785 }, { "epoch": 0.27, "grad_norm": 0.6640625, "learning_rate": 0.0001829962617155203, "loss": 1.0306, "step": 18790 }, { "epoch": 0.27, "grad_norm": 0.546875, "learning_rate": 0.00018298229344516646, "loss": 0.9696, "step": 18795 }, { "epoch": 0.27, "grad_norm": 0.57421875, "learning_rate": 0.00018296831997338797, "loss": 0.9603, "step": 18800 }, { "epoch": 0.27, "grad_norm": 0.59765625, "learning_rate": 0.0001829543413010607, "loss": 0.8585, "step": 18805 }, { "epoch": 0.27, "grad_norm": 0.58984375, "learning_rate": 0.0001829403574290608, "loss": 0.8206, "step": 18810 }, { "epoch": 0.27, "grad_norm": 0.546875, "learning_rate": 0.0001829263683582649, "loss": 0.9683, "step": 18815 }, { "epoch": 0.27, "grad_norm": 0.51953125, "learning_rate": 0.00018291237408954976, "loss": 0.9284, "step": 18820 }, { "epoch": 0.27, "grad_norm": 0.6796875, "learning_rate": 0.00018289837462379257, "loss": 1.0969, "step": 18825 }, { "epoch": 0.27, "grad_norm": 0.64453125, "learning_rate": 0.00018288436996187084, "loss": 1.0336, "step": 18830 }, { "epoch": 0.27, "grad_norm": 0.546875, "learning_rate": 0.00018287036010466244, "loss": 0.9165, "step": 18835 }, { "epoch": 0.27, "grad_norm": 0.5546875, "learning_rate": 0.00018285634505304545, "loss": 0.9036, "step": 18840 }, { "epoch": 0.27, "grad_norm": 0.54296875, "learning_rate": 0.00018284232480789841, "loss": 0.8956, "step": 18845 }, { "epoch": 0.27, "grad_norm": 0.6484375, "learning_rate": 0.00018282829937010009, "loss": 0.9551, "step": 18850 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.00018281426874052961, "loss": 0.9882, "step": 18855 }, { "epoch": 0.27, "grad_norm": 0.59765625, "learning_rate": 0.00018280023292006648, "loss": 1.0658, "step": 18860 }, { "epoch": 0.27, "grad_norm": 0.52734375, "learning_rate": 0.00018278619190959045, "loss": 0.8386, "step": 18865 }, { "epoch": 0.27, "grad_norm": 0.578125, "learning_rate": 0.00018277214570998161, "loss": 0.9481, "step": 18870 }, { "epoch": 0.27, "grad_norm": 0.51953125, "learning_rate": 0.00018275809432212041, "loss": 1.0821, "step": 18875 }, { "epoch": 0.27, "grad_norm": 0.6328125, "learning_rate": 0.0001827440377468876, "loss": 0.9736, "step": 18880 }, { "epoch": 0.27, "grad_norm": 0.578125, "learning_rate": 0.00018272997598516431, "loss": 0.9497, "step": 18885 }, { "epoch": 0.27, "grad_norm": 0.59375, "learning_rate": 0.00018271590903783184, "loss": 0.9594, "step": 18890 }, { "epoch": 0.27, "grad_norm": 0.64453125, "learning_rate": 0.00018270183690577202, "loss": 0.9803, "step": 18895 }, { "epoch": 0.27, "grad_norm": 0.62109375, "learning_rate": 0.00018268775958986687, "loss": 0.9781, "step": 18900 }, { "epoch": 0.27, "grad_norm": 0.640625, "learning_rate": 0.00018267367709099878, "loss": 1.1337, "step": 18905 }, { "epoch": 0.27, "grad_norm": 0.52734375, "learning_rate": 0.00018265958941005044, "loss": 0.8523, "step": 18910 }, { "epoch": 0.27, "grad_norm": 0.55859375, "learning_rate": 0.00018264549654790487, "loss": 0.8817, "step": 18915 }, { "epoch": 0.27, "grad_norm": 0.5546875, "learning_rate": 0.00018263139850544551, "loss": 1.0435, "step": 18920 }, { "epoch": 0.27, "grad_norm": 0.5859375, "learning_rate": 0.00018261729528355595, "loss": 1.0755, "step": 18925 }, { "epoch": 0.27, "grad_norm": 0.55078125, "learning_rate": 0.0001826031868831202, "loss": 1.0781, "step": 18930 }, { "epoch": 0.27, "grad_norm": 0.5859375, "learning_rate": 0.00018258907330502265, "loss": 1.0118, "step": 18935 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.00018257495455014798, "loss": 0.8649, "step": 18940 }, { "epoch": 0.27, "grad_norm": 0.64453125, "learning_rate": 0.00018256083061938104, "loss": 0.984, "step": 18945 }, { "epoch": 0.27, "grad_norm": 0.5390625, "learning_rate": 0.00018254670151360722, "loss": 1.0077, "step": 18950 }, { "epoch": 0.27, "grad_norm": 0.56640625, "learning_rate": 0.00018253256723371216, "loss": 1.1296, "step": 18955 }, { "epoch": 0.27, "grad_norm": 0.55859375, "learning_rate": 0.00018251842778058177, "loss": 0.965, "step": 18960 }, { "epoch": 0.27, "grad_norm": 0.58203125, "learning_rate": 0.00018250428315510234, "loss": 0.9761, "step": 18965 }, { "epoch": 0.27, "grad_norm": 0.58203125, "learning_rate": 0.00018249013335816048, "loss": 0.9319, "step": 18970 }, { "epoch": 0.27, "grad_norm": 0.54296875, "learning_rate": 0.0001824759783906431, "loss": 1.0028, "step": 18975 }, { "epoch": 0.27, "grad_norm": 0.6328125, "learning_rate": 0.0001824618182534375, "loss": 1.0927, "step": 18980 }, { "epoch": 0.27, "grad_norm": 0.59375, "learning_rate": 0.00018244765294743117, "loss": 0.8849, "step": 18985 }, { "epoch": 0.27, "grad_norm": 0.5859375, "learning_rate": 0.00018243348247351212, "loss": 0.924, "step": 18990 }, { "epoch": 0.27, "grad_norm": 0.53125, "learning_rate": 0.00018241930683256846, "loss": 0.892, "step": 18995 }, { "epoch": 0.27, "grad_norm": 0.62109375, "learning_rate": 0.00018240512602548875, "loss": 1.0804, "step": 19000 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.00018239094005316193, "loss": 0.8454, "step": 19005 }, { "epoch": 0.27, "grad_norm": 0.5625, "learning_rate": 0.00018237674891647716, "loss": 0.9701, "step": 19010 }, { "epoch": 0.27, "grad_norm": 0.59375, "learning_rate": 0.00018236255261632392, "loss": 0.9946, "step": 19015 }, { "epoch": 0.27, "grad_norm": 0.6484375, "learning_rate": 0.0001823483511535921, "loss": 1.0154, "step": 19020 }, { "epoch": 0.27, "grad_norm": 0.58203125, "learning_rate": 0.00018233414452917184, "loss": 0.9028, "step": 19025 }, { "epoch": 0.27, "grad_norm": 0.578125, "learning_rate": 0.00018231993274395362, "loss": 0.8365, "step": 19030 }, { "epoch": 0.27, "grad_norm": 0.5546875, "learning_rate": 0.00018230571579882826, "loss": 0.7945, "step": 19035 }, { "epoch": 0.27, "grad_norm": 0.58203125, "learning_rate": 0.0001822914936946869, "loss": 1.0, "step": 19040 }, { "epoch": 0.27, "grad_norm": 0.52734375, "learning_rate": 0.000182277266432421, "loss": 1.0195, "step": 19045 }, { "epoch": 0.27, "grad_norm": 0.56640625, "learning_rate": 0.00018226303401292233, "loss": 1.0113, "step": 19050 }, { "epoch": 0.27, "grad_norm": 0.56640625, "learning_rate": 0.00018224879643708299, "loss": 0.9697, "step": 19055 }, { "epoch": 0.27, "grad_norm": 0.6484375, "learning_rate": 0.00018223455370579544, "loss": 1.0527, "step": 19060 }, { "epoch": 0.27, "grad_norm": 0.5625, "learning_rate": 0.00018222030581995237, "loss": 1.1755, "step": 19065 }, { "epoch": 0.27, "grad_norm": 0.55078125, "learning_rate": 0.00018220605278044692, "loss": 0.8671, "step": 19070 }, { "epoch": 0.27, "grad_norm": 0.609375, "learning_rate": 0.00018219179458817247, "loss": 0.9569, "step": 19075 }, { "epoch": 0.27, "grad_norm": 0.5625, "learning_rate": 0.00018217753124402268, "loss": 0.9445, "step": 19080 }, { "epoch": 0.27, "grad_norm": 0.55859375, "learning_rate": 0.00018216326274889165, "loss": 1.0961, "step": 19085 }, { "epoch": 0.27, "grad_norm": 0.62109375, "learning_rate": 0.00018214898910367375, "loss": 0.9009, "step": 19090 }, { "epoch": 0.27, "grad_norm": 0.82421875, "learning_rate": 0.00018213471030926367, "loss": 1.0007, "step": 19095 }, { "epoch": 0.27, "grad_norm": 0.5625, "learning_rate": 0.00018212042636655637, "loss": 1.0147, "step": 19100 }, { "epoch": 0.27, "grad_norm": 0.765625, "learning_rate": 0.00018210613727644723, "loss": 0.8774, "step": 19105 }, { "epoch": 0.27, "grad_norm": 0.578125, "learning_rate": 0.0001820918430398319, "loss": 0.9651, "step": 19110 }, { "epoch": 0.27, "grad_norm": 0.5625, "learning_rate": 0.00018207754365760637, "loss": 1.0168, "step": 19115 }, { "epoch": 0.27, "grad_norm": 0.73046875, "learning_rate": 0.00018206323913066687, "loss": 1.0255, "step": 19120 }, { "epoch": 0.27, "grad_norm": 0.58984375, "learning_rate": 0.00018204892945991014, "loss": 1.0751, "step": 19125 }, { "epoch": 0.27, "grad_norm": 0.5625, "learning_rate": 0.00018203461464623302, "loss": 0.9231, "step": 19130 }, { "epoch": 0.27, "grad_norm": 0.6640625, "learning_rate": 0.00018202029469053285, "loss": 1.022, "step": 19135 }, { "epoch": 0.27, "grad_norm": 0.6875, "learning_rate": 0.00018200596959370722, "loss": 1.0119, "step": 19140 }, { "epoch": 0.27, "grad_norm": 0.6328125, "learning_rate": 0.00018199163935665396, "loss": 0.9225, "step": 19145 }, { "epoch": 0.27, "grad_norm": 0.6015625, "learning_rate": 0.00018197730398027142, "loss": 1.0322, "step": 19150 }, { "epoch": 0.27, "grad_norm": 0.625, "learning_rate": 0.00018196296346545805, "loss": 1.0106, "step": 19155 }, { "epoch": 0.27, "grad_norm": 0.62109375, "learning_rate": 0.00018194861781311282, "loss": 0.9608, "step": 19160 }, { "epoch": 0.27, "grad_norm": 0.546875, "learning_rate": 0.00018193426702413487, "loss": 0.9761, "step": 19165 }, { "epoch": 0.27, "grad_norm": 0.546875, "learning_rate": 0.00018191991109942377, "loss": 0.8546, "step": 19170 }, { "epoch": 0.28, "grad_norm": 0.546875, "learning_rate": 0.0001819055500398793, "loss": 0.9388, "step": 19175 }, { "epoch": 0.28, "grad_norm": 0.6015625, "learning_rate": 0.00018189118384640172, "loss": 0.909, "step": 19180 }, { "epoch": 0.28, "grad_norm": 0.51953125, "learning_rate": 0.0001818768125198915, "loss": 1.0421, "step": 19185 }, { "epoch": 0.28, "grad_norm": 0.5078125, "learning_rate": 0.00018186243606124934, "loss": 0.8726, "step": 19190 }, { "epoch": 0.28, "grad_norm": 0.6015625, "learning_rate": 0.0001818480544713765, "loss": 0.9523, "step": 19195 }, { "epoch": 0.28, "grad_norm": 0.56640625, "learning_rate": 0.00018183366775117437, "loss": 0.9786, "step": 19200 }, { "epoch": 0.28, "grad_norm": 0.53515625, "learning_rate": 0.00018181927590154475, "loss": 0.8752, "step": 19205 }, { "epoch": 0.28, "grad_norm": 0.46484375, "learning_rate": 0.00018180487892338973, "loss": 0.7925, "step": 19210 }, { "epoch": 0.28, "grad_norm": 0.59375, "learning_rate": 0.00018179047681761174, "loss": 0.957, "step": 19215 }, { "epoch": 0.28, "grad_norm": 0.6015625, "learning_rate": 0.0001817760695851135, "loss": 1.0573, "step": 19220 }, { "epoch": 0.28, "grad_norm": 0.48828125, "learning_rate": 0.0001817616572267981, "loss": 0.8925, "step": 19225 }, { "epoch": 0.28, "grad_norm": 0.56640625, "learning_rate": 0.0001817472397435689, "loss": 0.8296, "step": 19230 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.0001817328171363296, "loss": 0.9467, "step": 19235 }, { "epoch": 0.28, "grad_norm": 0.5703125, "learning_rate": 0.00018171838940598425, "loss": 0.8865, "step": 19240 }, { "epoch": 0.28, "grad_norm": 0.5859375, "learning_rate": 0.00018170395655343717, "loss": 0.9796, "step": 19245 }, { "epoch": 0.28, "grad_norm": 0.5078125, "learning_rate": 0.00018168951857959305, "loss": 0.8189, "step": 19250 }, { "epoch": 0.28, "grad_norm": 0.671875, "learning_rate": 0.00018167507548535685, "loss": 0.9377, "step": 19255 }, { "epoch": 0.28, "grad_norm": 0.53125, "learning_rate": 0.00018166062727163393, "loss": 1.1658, "step": 19260 }, { "epoch": 0.28, "grad_norm": 0.5703125, "learning_rate": 0.00018164617393932986, "loss": 0.9539, "step": 19265 }, { "epoch": 0.28, "grad_norm": 0.55078125, "learning_rate": 0.00018163171548935062, "loss": 1.0121, "step": 19270 }, { "epoch": 0.28, "grad_norm": 0.55078125, "learning_rate": 0.00018161725192260254, "loss": 0.9584, "step": 19275 }, { "epoch": 0.28, "grad_norm": 0.59375, "learning_rate": 0.0001816027832399921, "loss": 0.9901, "step": 19280 }, { "epoch": 0.28, "grad_norm": 0.5546875, "learning_rate": 0.00018158830944242627, "loss": 0.8591, "step": 19285 }, { "epoch": 0.28, "grad_norm": 0.51953125, "learning_rate": 0.0001815738305308123, "loss": 0.9036, "step": 19290 }, { "epoch": 0.28, "grad_norm": 0.63671875, "learning_rate": 0.0001815593465060577, "loss": 1.0187, "step": 19295 }, { "epoch": 0.28, "grad_norm": 0.6015625, "learning_rate": 0.0001815448573690704, "loss": 0.856, "step": 19300 }, { "epoch": 0.28, "grad_norm": 0.49609375, "learning_rate": 0.00018153036312075854, "loss": 0.9173, "step": 19305 }, { "epoch": 0.28, "grad_norm": 0.5390625, "learning_rate": 0.00018151586376203072, "loss": 0.9322, "step": 19310 }, { "epoch": 0.28, "grad_norm": 0.54296875, "learning_rate": 0.00018150135929379565, "loss": 0.9743, "step": 19315 }, { "epoch": 0.28, "grad_norm": 0.5, "learning_rate": 0.0001814868497169626, "loss": 0.9384, "step": 19320 }, { "epoch": 0.28, "grad_norm": 0.57421875, "learning_rate": 0.000181472335032441, "loss": 0.9545, "step": 19325 }, { "epoch": 0.28, "grad_norm": 0.58203125, "learning_rate": 0.00018145781524114068, "loss": 0.9326, "step": 19330 }, { "epoch": 0.28, "grad_norm": 0.55859375, "learning_rate": 0.00018144329034397167, "loss": 0.7758, "step": 19335 }, { "epoch": 0.28, "grad_norm": 0.640625, "learning_rate": 0.0001814287603418445, "loss": 0.9259, "step": 19340 }, { "epoch": 0.28, "grad_norm": 0.57421875, "learning_rate": 0.00018141422523566987, "loss": 0.7652, "step": 19345 }, { "epoch": 0.28, "grad_norm": 0.62109375, "learning_rate": 0.00018139968502635888, "loss": 1.0755, "step": 19350 }, { "epoch": 0.28, "grad_norm": 0.5625, "learning_rate": 0.00018138513971482296, "loss": 0.9437, "step": 19355 }, { "epoch": 0.28, "grad_norm": 0.59765625, "learning_rate": 0.00018137058930197376, "loss": 0.9806, "step": 19360 }, { "epoch": 0.28, "grad_norm": 0.53515625, "learning_rate": 0.00018135603378872337, "loss": 0.9889, "step": 19365 }, { "epoch": 0.28, "grad_norm": 0.53125, "learning_rate": 0.0001813414731759841, "loss": 0.8439, "step": 19370 }, { "epoch": 0.28, "grad_norm": 0.494140625, "learning_rate": 0.00018132690746466867, "loss": 1.0547, "step": 19375 }, { "epoch": 0.28, "grad_norm": 0.56640625, "learning_rate": 0.00018131233665569005, "loss": 0.7771, "step": 19380 }, { "epoch": 0.28, "grad_norm": 0.61328125, "learning_rate": 0.00018129776074996156, "loss": 1.1168, "step": 19385 }, { "epoch": 0.28, "grad_norm": 0.59765625, "learning_rate": 0.00018128317974839685, "loss": 0.9792, "step": 19390 }, { "epoch": 0.28, "grad_norm": 0.5859375, "learning_rate": 0.00018126859365190986, "loss": 0.9773, "step": 19395 }, { "epoch": 0.28, "grad_norm": 0.62109375, "learning_rate": 0.00018125400246141486, "loss": 1.0232, "step": 19400 }, { "epoch": 0.28, "grad_norm": 0.68359375, "learning_rate": 0.00018123940617782643, "loss": 1.1173, "step": 19405 }, { "epoch": 0.28, "grad_norm": 0.58203125, "learning_rate": 0.0001812248048020595, "loss": 1.0651, "step": 19410 }, { "epoch": 0.28, "grad_norm": 0.60546875, "learning_rate": 0.0001812101983350293, "loss": 1.0051, "step": 19415 }, { "epoch": 0.28, "grad_norm": 0.58984375, "learning_rate": 0.0001811955867776514, "loss": 0.995, "step": 19420 }, { "epoch": 0.28, "grad_norm": 0.6171875, "learning_rate": 0.0001811809701308416, "loss": 0.9651, "step": 19425 }, { "epoch": 0.28, "grad_norm": 0.6328125, "learning_rate": 0.00018116634839551618, "loss": 1.0279, "step": 19430 }, { "epoch": 0.28, "grad_norm": 0.640625, "learning_rate": 0.00018115172157259158, "loss": 1.0307, "step": 19435 }, { "epoch": 0.28, "grad_norm": 0.578125, "learning_rate": 0.00018113708966298466, "loss": 0.8745, "step": 19440 }, { "epoch": 0.28, "grad_norm": 0.61328125, "learning_rate": 0.00018112245266761255, "loss": 0.8352, "step": 19445 }, { "epoch": 0.28, "grad_norm": 0.62109375, "learning_rate": 0.0001811078105873927, "loss": 0.9852, "step": 19450 }, { "epoch": 0.28, "grad_norm": 0.546875, "learning_rate": 0.0001810931634232429, "loss": 0.9463, "step": 19455 }, { "epoch": 0.28, "grad_norm": 0.703125, "learning_rate": 0.00018107851117608127, "loss": 0.965, "step": 19460 }, { "epoch": 0.28, "grad_norm": 0.5625, "learning_rate": 0.00018106385384682625, "loss": 0.9346, "step": 19465 }, { "epoch": 0.28, "grad_norm": 0.61328125, "learning_rate": 0.00018104919143639654, "loss": 1.0714, "step": 19470 }, { "epoch": 0.28, "grad_norm": 0.52734375, "learning_rate": 0.00018103452394571117, "loss": 0.9186, "step": 19475 }, { "epoch": 0.28, "grad_norm": 0.54296875, "learning_rate": 0.00018101985137568955, "loss": 1.0589, "step": 19480 }, { "epoch": 0.28, "grad_norm": 0.546875, "learning_rate": 0.00018100517372725142, "loss": 0.9316, "step": 19485 }, { "epoch": 0.28, "grad_norm": 0.51953125, "learning_rate": 0.0001809904910013167, "loss": 0.9155, "step": 19490 }, { "epoch": 0.28, "grad_norm": 0.796875, "learning_rate": 0.00018097580319880577, "loss": 0.9344, "step": 19495 }, { "epoch": 0.28, "grad_norm": 0.546875, "learning_rate": 0.00018096111032063928, "loss": 1.1705, "step": 19500 }, { "epoch": 0.28, "grad_norm": 0.50390625, "learning_rate": 0.00018094641236773818, "loss": 0.9314, "step": 19505 }, { "epoch": 0.28, "grad_norm": 0.53125, "learning_rate": 0.00018093170934102378, "loss": 0.8923, "step": 19510 }, { "epoch": 0.28, "grad_norm": 0.6171875, "learning_rate": 0.00018091700124141764, "loss": 1.0893, "step": 19515 }, { "epoch": 0.28, "grad_norm": 0.68359375, "learning_rate": 0.0001809022880698417, "loss": 0.9808, "step": 19520 }, { "epoch": 0.28, "grad_norm": 0.53515625, "learning_rate": 0.00018088756982721825, "loss": 0.9501, "step": 19525 }, { "epoch": 0.28, "grad_norm": 0.58984375, "learning_rate": 0.00018087284651446977, "loss": 0.9246, "step": 19530 }, { "epoch": 0.28, "grad_norm": 0.55078125, "learning_rate": 0.00018085811813251917, "loss": 0.9049, "step": 19535 }, { "epoch": 0.28, "grad_norm": 0.470703125, "learning_rate": 0.0001808433846822896, "loss": 1.1336, "step": 19540 }, { "epoch": 0.28, "grad_norm": 0.69140625, "learning_rate": 0.00018082864616470468, "loss": 1.003, "step": 19545 }, { "epoch": 0.28, "grad_norm": 0.49609375, "learning_rate": 0.00018081390258068808, "loss": 1.0711, "step": 19550 }, { "epoch": 0.28, "grad_norm": 0.56640625, "learning_rate": 0.00018079915393116405, "loss": 1.0544, "step": 19555 }, { "epoch": 0.28, "grad_norm": 0.55859375, "learning_rate": 0.00018078440021705708, "loss": 1.1562, "step": 19560 }, { "epoch": 0.28, "grad_norm": 0.6953125, "learning_rate": 0.00018076964143929188, "loss": 1.1116, "step": 19565 }, { "epoch": 0.28, "grad_norm": 0.5546875, "learning_rate": 0.00018075487759879353, "loss": 0.9089, "step": 19570 }, { "epoch": 0.28, "grad_norm": 0.66015625, "learning_rate": 0.0001807401086964875, "loss": 0.9719, "step": 19575 }, { "epoch": 0.28, "grad_norm": 0.68359375, "learning_rate": 0.00018072533473329952, "loss": 1.1497, "step": 19580 }, { "epoch": 0.28, "grad_norm": 0.50390625, "learning_rate": 0.0001807105557101556, "loss": 0.9319, "step": 19585 }, { "epoch": 0.28, "grad_norm": 0.7109375, "learning_rate": 0.0001806957716279821, "loss": 0.8869, "step": 19590 }, { "epoch": 0.28, "grad_norm": 0.59375, "learning_rate": 0.00018068098248770576, "loss": 0.9751, "step": 19595 }, { "epoch": 0.28, "grad_norm": 0.56640625, "learning_rate": 0.00018066618829025354, "loss": 0.8765, "step": 19600 }, { "epoch": 0.28, "grad_norm": 0.64453125, "learning_rate": 0.0001806513890365528, "loss": 1.1602, "step": 19605 }, { "epoch": 0.28, "grad_norm": 0.578125, "learning_rate": 0.0001806365847275311, "loss": 0.9074, "step": 19610 }, { "epoch": 0.28, "grad_norm": 0.59765625, "learning_rate": 0.00018062177536411645, "loss": 0.909, "step": 19615 }, { "epoch": 0.28, "grad_norm": 0.5078125, "learning_rate": 0.00018060696094723708, "loss": 0.8727, "step": 19620 }, { "epoch": 0.28, "grad_norm": 0.55078125, "learning_rate": 0.00018059214147782163, "loss": 1.0834, "step": 19625 }, { "epoch": 0.28, "grad_norm": 0.609375, "learning_rate": 0.00018057731695679893, "loss": 0.9149, "step": 19630 }, { "epoch": 0.28, "grad_norm": 0.5546875, "learning_rate": 0.00018056248738509826, "loss": 1.0154, "step": 19635 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.0001805476527636491, "loss": 1.0771, "step": 19640 }, { "epoch": 0.28, "grad_norm": 0.57421875, "learning_rate": 0.00018053281309338135, "loss": 1.0866, "step": 19645 }, { "epoch": 0.28, "grad_norm": 0.6171875, "learning_rate": 0.00018051796837522516, "loss": 0.9696, "step": 19650 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.000180503118610111, "loss": 0.7969, "step": 19655 }, { "epoch": 0.28, "grad_norm": 0.578125, "learning_rate": 0.00018048826379896967, "loss": 0.9516, "step": 19660 }, { "epoch": 0.28, "grad_norm": 0.57421875, "learning_rate": 0.00018047340394273232, "loss": 0.9688, "step": 19665 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.00018045853904233034, "loss": 1.0596, "step": 19670 }, { "epoch": 0.28, "grad_norm": 0.53125, "learning_rate": 0.00018044366909869552, "loss": 0.9945, "step": 19675 }, { "epoch": 0.28, "grad_norm": 0.62890625, "learning_rate": 0.00018042879411275987, "loss": 1.033, "step": 19680 }, { "epoch": 0.28, "grad_norm": 0.640625, "learning_rate": 0.00018041391408545586, "loss": 0.9772, "step": 19685 }, { "epoch": 0.28, "grad_norm": 0.59765625, "learning_rate": 0.00018039902901771608, "loss": 1.0647, "step": 19690 }, { "epoch": 0.28, "grad_norm": 0.58203125, "learning_rate": 0.00018038413891047358, "loss": 0.9532, "step": 19695 }, { "epoch": 0.28, "grad_norm": 0.625, "learning_rate": 0.00018036924376466174, "loss": 0.801, "step": 19700 }, { "epoch": 0.28, "grad_norm": 0.5234375, "learning_rate": 0.00018035434358121418, "loss": 0.9809, "step": 19705 }, { "epoch": 0.28, "grad_norm": 0.59765625, "learning_rate": 0.00018033943836106482, "loss": 1.0384, "step": 19710 }, { "epoch": 0.28, "grad_norm": 0.6015625, "learning_rate": 0.00018032452810514798, "loss": 0.9302, "step": 19715 }, { "epoch": 0.28, "grad_norm": 0.54296875, "learning_rate": 0.0001803096128143982, "loss": 1.0039, "step": 19720 }, { "epoch": 0.28, "grad_norm": 0.6328125, "learning_rate": 0.00018029469248975047, "loss": 1.1232, "step": 19725 }, { "epoch": 0.28, "grad_norm": 0.5859375, "learning_rate": 0.00018027976713213994, "loss": 0.9102, "step": 19730 }, { "epoch": 0.28, "grad_norm": 0.5703125, "learning_rate": 0.0001802648367425022, "loss": 1.1901, "step": 19735 }, { "epoch": 0.28, "grad_norm": 0.6171875, "learning_rate": 0.00018024990132177305, "loss": 0.8535, "step": 19740 }, { "epoch": 0.28, "grad_norm": 0.5234375, "learning_rate": 0.00018023496087088872, "loss": 0.9464, "step": 19745 }, { "epoch": 0.28, "grad_norm": 0.67578125, "learning_rate": 0.00018022001539078563, "loss": 1.0605, "step": 19750 }, { "epoch": 0.28, "grad_norm": 0.494140625, "learning_rate": 0.00018020506488240065, "loss": 0.892, "step": 19755 }, { "epoch": 0.28, "grad_norm": 0.50390625, "learning_rate": 0.00018019010934667082, "loss": 0.8213, "step": 19760 }, { "epoch": 0.28, "grad_norm": 0.546875, "learning_rate": 0.00018017514878453363, "loss": 0.9154, "step": 19765 }, { "epoch": 0.28, "grad_norm": 0.71484375, "learning_rate": 0.0001801601831969268, "loss": 1.0438, "step": 19770 }, { "epoch": 0.28, "grad_norm": 0.67578125, "learning_rate": 0.00018014521258478839, "loss": 0.9831, "step": 19775 }, { "epoch": 0.28, "grad_norm": 0.63671875, "learning_rate": 0.00018013023694905678, "loss": 1.0661, "step": 19780 }, { "epoch": 0.28, "grad_norm": 0.609375, "learning_rate": 0.00018011525629067063, "loss": 0.8326, "step": 19785 }, { "epoch": 0.28, "grad_norm": 0.546875, "learning_rate": 0.000180100270610569, "loss": 1.0475, "step": 19790 }, { "epoch": 0.28, "grad_norm": 0.59375, "learning_rate": 0.00018008527990969118, "loss": 0.9246, "step": 19795 }, { "epoch": 0.28, "grad_norm": 0.56640625, "learning_rate": 0.0001800702841889768, "loss": 1.0556, "step": 19800 }, { "epoch": 0.28, "grad_norm": 0.578125, "learning_rate": 0.00018005528344936582, "loss": 1.0302, "step": 19805 }, { "epoch": 0.28, "grad_norm": 0.60546875, "learning_rate": 0.0001800402776917985, "loss": 1.0522, "step": 19810 }, { "epoch": 0.28, "grad_norm": 0.546875, "learning_rate": 0.0001800252669172154, "loss": 0.9603, "step": 19815 }, { "epoch": 0.28, "grad_norm": 0.59375, "learning_rate": 0.00018001025112655743, "loss": 1.0379, "step": 19820 }, { "epoch": 0.28, "grad_norm": 0.65234375, "learning_rate": 0.0001799952303207658, "loss": 0.9, "step": 19825 }, { "epoch": 0.28, "grad_norm": 0.66796875, "learning_rate": 0.00017998020450078203, "loss": 0.9419, "step": 19830 }, { "epoch": 0.28, "grad_norm": 0.6171875, "learning_rate": 0.00017996517366754798, "loss": 0.8759, "step": 19835 }, { "epoch": 0.28, "grad_norm": 0.69140625, "learning_rate": 0.00017995013782200574, "loss": 1.1232, "step": 19840 }, { "epoch": 0.28, "grad_norm": 0.5625, "learning_rate": 0.0001799350969650978, "loss": 0.8869, "step": 19845 }, { "epoch": 0.28, "grad_norm": 0.62890625, "learning_rate": 0.00017992005109776694, "loss": 0.9544, "step": 19850 }, { "epoch": 0.28, "grad_norm": 0.52734375, "learning_rate": 0.0001799050002209563, "loss": 0.8654, "step": 19855 }, { "epoch": 0.28, "grad_norm": 0.60546875, "learning_rate": 0.0001798899443356092, "loss": 0.879, "step": 19860 }, { "epoch": 0.28, "grad_norm": 0.51953125, "learning_rate": 0.00017987488344266942, "loss": 1.0349, "step": 19865 }, { "epoch": 0.29, "grad_norm": 0.54296875, "learning_rate": 0.00017985981754308096, "loss": 0.9102, "step": 19870 }, { "epoch": 0.29, "grad_norm": 0.5390625, "learning_rate": 0.0001798447466377882, "loss": 0.8759, "step": 19875 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.00017982967072773578, "loss": 1.0562, "step": 19880 }, { "epoch": 0.29, "grad_norm": 0.5078125, "learning_rate": 0.00017981458981386868, "loss": 0.9362, "step": 19885 }, { "epoch": 0.29, "grad_norm": 0.5546875, "learning_rate": 0.00017979950389713218, "loss": 0.9396, "step": 19890 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.0001797844129784719, "loss": 1.1549, "step": 19895 }, { "epoch": 0.29, "grad_norm": 0.5859375, "learning_rate": 0.00017976931705883376, "loss": 1.0361, "step": 19900 }, { "epoch": 0.29, "grad_norm": 0.55078125, "learning_rate": 0.00017975421613916395, "loss": 0.9686, "step": 19905 }, { "epoch": 0.29, "grad_norm": 0.53125, "learning_rate": 0.00017973911022040905, "loss": 0.9509, "step": 19910 }, { "epoch": 0.29, "grad_norm": 0.59765625, "learning_rate": 0.00017972399930351593, "loss": 1.1096, "step": 19915 }, { "epoch": 0.29, "grad_norm": 0.546875, "learning_rate": 0.00017970888338943172, "loss": 1.0179, "step": 19920 }, { "epoch": 0.29, "grad_norm": 0.79296875, "learning_rate": 0.00017969376247910392, "loss": 1.0277, "step": 19925 }, { "epoch": 0.29, "grad_norm": 0.546875, "learning_rate": 0.0001796786365734803, "loss": 0.891, "step": 19930 }, { "epoch": 0.29, "grad_norm": 0.66015625, "learning_rate": 0.00017966350567350902, "loss": 0.9891, "step": 19935 }, { "epoch": 0.29, "grad_norm": 0.6875, "learning_rate": 0.00017964836978013845, "loss": 1.0103, "step": 19940 }, { "epoch": 0.29, "grad_norm": 0.55859375, "learning_rate": 0.0001796332288943174, "loss": 1.0574, "step": 19945 }, { "epoch": 0.29, "grad_norm": 0.6015625, "learning_rate": 0.00017961808301699482, "loss": 0.9753, "step": 19950 }, { "epoch": 0.29, "grad_norm": 0.83984375, "learning_rate": 0.0001796029321491201, "loss": 1.0732, "step": 19955 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.00017958777629164301, "loss": 0.888, "step": 19960 }, { "epoch": 0.29, "grad_norm": 0.56640625, "learning_rate": 0.00017957261544551342, "loss": 0.9886, "step": 19965 }, { "epoch": 0.29, "grad_norm": 0.578125, "learning_rate": 0.00017955744961168163, "loss": 0.9075, "step": 19970 }, { "epoch": 0.29, "grad_norm": 0.5390625, "learning_rate": 0.00017954227879109834, "loss": 0.9966, "step": 19975 }, { "epoch": 0.29, "grad_norm": 0.5859375, "learning_rate": 0.00017952710298471442, "loss": 1.0853, "step": 19980 }, { "epoch": 0.29, "grad_norm": 0.54296875, "learning_rate": 0.0001795119221934811, "loss": 0.9075, "step": 19985 }, { "epoch": 0.29, "grad_norm": 0.65234375, "learning_rate": 0.00017949673641834993, "loss": 0.956, "step": 19990 }, { "epoch": 0.29, "grad_norm": 0.54296875, "learning_rate": 0.0001794815456602728, "loss": 0.9857, "step": 19995 }, { "epoch": 0.29, "grad_norm": 0.52734375, "learning_rate": 0.00017946634992020187, "loss": 1.0116, "step": 20000 }, { "epoch": 0.29, "grad_norm": 0.5546875, "learning_rate": 0.00017945114919908962, "loss": 1.0116, "step": 20005 }, { "epoch": 0.29, "grad_norm": 0.6484375, "learning_rate": 0.00017943594349788882, "loss": 1.0758, "step": 20010 }, { "epoch": 0.29, "grad_norm": 0.64453125, "learning_rate": 0.00017942073281755264, "loss": 0.9998, "step": 20015 }, { "epoch": 0.29, "grad_norm": 0.58984375, "learning_rate": 0.00017940551715903448, "loss": 1.0343, "step": 20020 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.00017939029652328805, "loss": 1.0741, "step": 20025 }, { "epoch": 0.29, "grad_norm": 0.6328125, "learning_rate": 0.00017937507091126743, "loss": 1.0459, "step": 20030 }, { "epoch": 0.29, "grad_norm": 0.59765625, "learning_rate": 0.00017935984032392695, "loss": 0.9559, "step": 20035 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.0001793446047622213, "loss": 0.8793, "step": 20040 }, { "epoch": 0.29, "grad_norm": 0.53515625, "learning_rate": 0.00017932936422710547, "loss": 0.888, "step": 20045 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.00017931411871953474, "loss": 0.8765, "step": 20050 }, { "epoch": 0.29, "grad_norm": 0.57421875, "learning_rate": 0.0001792988682404647, "loss": 1.0438, "step": 20055 }, { "epoch": 0.29, "grad_norm": 0.6171875, "learning_rate": 0.00017928361279085132, "loss": 1.1002, "step": 20060 }, { "epoch": 0.29, "grad_norm": 0.53125, "learning_rate": 0.00017926835237165074, "loss": 1.0579, "step": 20065 }, { "epoch": 0.29, "grad_norm": 0.59765625, "learning_rate": 0.0001792530869838196, "loss": 1.0192, "step": 20070 }, { "epoch": 0.29, "grad_norm": 0.64453125, "learning_rate": 0.00017923781662831467, "loss": 1.0652, "step": 20075 }, { "epoch": 0.29, "grad_norm": 0.5703125, "learning_rate": 0.00017922254130609317, "loss": 0.8573, "step": 20080 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017920726101811255, "loss": 0.848, "step": 20085 }, { "epoch": 0.29, "grad_norm": 0.56640625, "learning_rate": 0.0001791919757653306, "loss": 0.8645, "step": 20090 }, { "epoch": 0.29, "grad_norm": 0.515625, "learning_rate": 0.00017917668554870544, "loss": 1.0636, "step": 20095 }, { "epoch": 0.29, "grad_norm": 0.5390625, "learning_rate": 0.00017916139036919544, "loss": 0.9071, "step": 20100 }, { "epoch": 0.29, "grad_norm": 0.56640625, "learning_rate": 0.0001791460902277593, "loss": 0.9749, "step": 20105 }, { "epoch": 0.29, "grad_norm": 0.5546875, "learning_rate": 0.00017913078512535611, "loss": 0.9368, "step": 20110 }, { "epoch": 0.29, "grad_norm": 0.55859375, "learning_rate": 0.0001791154750629452, "loss": 0.8993, "step": 20115 }, { "epoch": 0.29, "grad_norm": 0.59765625, "learning_rate": 0.0001791001600414862, "loss": 0.8609, "step": 20120 }, { "epoch": 0.29, "grad_norm": 0.48046875, "learning_rate": 0.0001790848400619391, "loss": 1.0746, "step": 20125 }, { "epoch": 0.29, "grad_norm": 0.54296875, "learning_rate": 0.00017906951512526413, "loss": 1.0527, "step": 20130 }, { "epoch": 0.29, "grad_norm": 0.62890625, "learning_rate": 0.0001790541852324219, "loss": 0.9294, "step": 20135 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017903885038437331, "loss": 0.8542, "step": 20140 }, { "epoch": 0.29, "grad_norm": 0.53125, "learning_rate": 0.00017902351058207957, "loss": 0.9395, "step": 20145 }, { "epoch": 0.29, "grad_norm": 0.60546875, "learning_rate": 0.00017900816582650214, "loss": 1.0316, "step": 20150 }, { "epoch": 0.29, "grad_norm": 0.55859375, "learning_rate": 0.00017899281611860295, "loss": 0.8784, "step": 20155 }, { "epoch": 0.29, "grad_norm": 0.62890625, "learning_rate": 0.00017897746145934408, "loss": 0.9608, "step": 20160 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.00017896210184968793, "loss": 1.044, "step": 20165 }, { "epoch": 0.29, "grad_norm": 0.57421875, "learning_rate": 0.00017894673729059734, "loss": 0.8963, "step": 20170 }, { "epoch": 0.29, "grad_norm": 0.515625, "learning_rate": 0.00017893136778303532, "loss": 1.0971, "step": 20175 }, { "epoch": 0.29, "grad_norm": 0.56640625, "learning_rate": 0.0001789159933279653, "loss": 0.9522, "step": 20180 }, { "epoch": 0.29, "grad_norm": 0.5390625, "learning_rate": 0.00017890061392635093, "loss": 0.8735, "step": 20185 }, { "epoch": 0.29, "grad_norm": 0.478515625, "learning_rate": 0.00017888522957915624, "loss": 0.9023, "step": 20190 }, { "epoch": 0.29, "grad_norm": 0.61328125, "learning_rate": 0.00017886984028734547, "loss": 0.8952, "step": 20195 }, { "epoch": 0.29, "grad_norm": 0.5390625, "learning_rate": 0.0001788544460518833, "loss": 0.8191, "step": 20200 }, { "epoch": 0.29, "grad_norm": 0.58984375, "learning_rate": 0.00017883904687373466, "loss": 0.9671, "step": 20205 }, { "epoch": 0.29, "grad_norm": 0.6171875, "learning_rate": 0.00017882364275386477, "loss": 1.1059, "step": 20210 }, { "epoch": 0.29, "grad_norm": 0.59375, "learning_rate": 0.00017880823369323918, "loss": 0.9204, "step": 20215 }, { "epoch": 0.29, "grad_norm": 0.63671875, "learning_rate": 0.00017879281969282373, "loss": 0.9978, "step": 20220 }, { "epoch": 0.29, "grad_norm": 0.53125, "learning_rate": 0.00017877740075358461, "loss": 1.0445, "step": 20225 }, { "epoch": 0.29, "grad_norm": 0.7578125, "learning_rate": 0.00017876197687648833, "loss": 1.1439, "step": 20230 }, { "epoch": 0.29, "grad_norm": 0.55078125, "learning_rate": 0.0001787465480625016, "loss": 0.9755, "step": 20235 }, { "epoch": 0.29, "grad_norm": 0.54296875, "learning_rate": 0.00017873111431259154, "loss": 0.947, "step": 20240 }, { "epoch": 0.29, "grad_norm": 0.546875, "learning_rate": 0.00017871567562772559, "loss": 0.9988, "step": 20245 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017870023200887143, "loss": 1.0291, "step": 20250 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.0001786847834569971, "loss": 0.8948, "step": 20255 }, { "epoch": 0.29, "grad_norm": 0.67578125, "learning_rate": 0.00017866932997307093, "loss": 0.9763, "step": 20260 }, { "epoch": 0.29, "grad_norm": 0.5703125, "learning_rate": 0.00017865387155806156, "loss": 0.9449, "step": 20265 }, { "epoch": 0.29, "grad_norm": 0.66796875, "learning_rate": 0.00017863840821293793, "loss": 0.9888, "step": 20270 }, { "epoch": 0.29, "grad_norm": 0.55078125, "learning_rate": 0.00017862293993866935, "loss": 0.9415, "step": 20275 }, { "epoch": 0.29, "grad_norm": 0.58984375, "learning_rate": 0.00017860746673622533, "loss": 1.0488, "step": 20280 }, { "epoch": 0.29, "grad_norm": 0.6875, "learning_rate": 0.00017859198860657575, "loss": 0.9914, "step": 20285 }, { "epoch": 0.29, "grad_norm": 0.51171875, "learning_rate": 0.0001785765055506908, "loss": 0.9855, "step": 20290 }, { "epoch": 0.29, "grad_norm": 0.68359375, "learning_rate": 0.00017856101756954105, "loss": 1.1754, "step": 20295 }, { "epoch": 0.29, "grad_norm": 0.54296875, "learning_rate": 0.00017854552466409722, "loss": 0.9697, "step": 20300 }, { "epoch": 0.29, "grad_norm": 0.60546875, "learning_rate": 0.00017853002683533044, "loss": 0.9202, "step": 20305 }, { "epoch": 0.29, "grad_norm": 0.55078125, "learning_rate": 0.00017851452408421215, "loss": 0.981, "step": 20310 }, { "epoch": 0.29, "grad_norm": 0.578125, "learning_rate": 0.00017849901641171404, "loss": 0.9659, "step": 20315 }, { "epoch": 0.29, "grad_norm": 0.5546875, "learning_rate": 0.00017848350381880824, "loss": 1.0127, "step": 20320 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017846798630646697, "loss": 0.8861, "step": 20325 }, { "epoch": 0.29, "grad_norm": 0.84765625, "learning_rate": 0.00017845246387566296, "loss": 0.9737, "step": 20330 }, { "epoch": 0.29, "grad_norm": 0.5703125, "learning_rate": 0.00017843693652736922, "loss": 0.9233, "step": 20335 }, { "epoch": 0.29, "grad_norm": 0.55859375, "learning_rate": 0.0001784214042625589, "loss": 0.8343, "step": 20340 }, { "epoch": 0.29, "grad_norm": 0.5546875, "learning_rate": 0.00017840586708220567, "loss": 0.9862, "step": 20345 }, { "epoch": 0.29, "grad_norm": 0.56640625, "learning_rate": 0.0001783903249872834, "loss": 0.9632, "step": 20350 }, { "epoch": 0.29, "grad_norm": 0.55078125, "learning_rate": 0.00017837477797876629, "loss": 1.0639, "step": 20355 }, { "epoch": 0.29, "grad_norm": 0.53125, "learning_rate": 0.00017835922605762884, "loss": 0.9325, "step": 20360 }, { "epoch": 0.29, "grad_norm": 0.5546875, "learning_rate": 0.0001783436692248458, "loss": 0.8678, "step": 20365 }, { "epoch": 0.29, "grad_norm": 0.4921875, "learning_rate": 0.00017832810748139242, "loss": 1.0376, "step": 20370 }, { "epoch": 0.29, "grad_norm": 0.56640625, "learning_rate": 0.00017831254082824403, "loss": 0.9049, "step": 20375 }, { "epoch": 0.29, "grad_norm": 0.515625, "learning_rate": 0.00017829696926637638, "loss": 0.9619, "step": 20380 }, { "epoch": 0.29, "grad_norm": 0.56640625, "learning_rate": 0.00017828139279676551, "loss": 0.9297, "step": 20385 }, { "epoch": 0.29, "grad_norm": 0.57421875, "learning_rate": 0.00017826581142038782, "loss": 1.0391, "step": 20390 }, { "epoch": 0.29, "grad_norm": 0.59375, "learning_rate": 0.0001782502251382199, "loss": 1.0339, "step": 20395 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.00017823463395123877, "loss": 1.0506, "step": 20400 }, { "epoch": 0.29, "grad_norm": 0.515625, "learning_rate": 0.00017821903786042166, "loss": 1.0093, "step": 20405 }, { "epoch": 0.29, "grad_norm": 0.578125, "learning_rate": 0.0001782034368667462, "loss": 0.8458, "step": 20410 }, { "epoch": 0.29, "grad_norm": 0.578125, "learning_rate": 0.00017818783097119022, "loss": 1.141, "step": 20415 }, { "epoch": 0.29, "grad_norm": 0.59375, "learning_rate": 0.00017817222017473198, "loss": 0.891, "step": 20420 }, { "epoch": 0.29, "grad_norm": 0.5390625, "learning_rate": 0.00017815660447834995, "loss": 0.838, "step": 20425 }, { "epoch": 0.29, "grad_norm": 0.59375, "learning_rate": 0.0001781409838830229, "loss": 0.9979, "step": 20430 }, { "epoch": 0.29, "grad_norm": 0.5625, "learning_rate": 0.00017812535838973004, "loss": 1.0084, "step": 20435 }, { "epoch": 0.29, "grad_norm": 0.55859375, "learning_rate": 0.0001781097279994507, "loss": 0.9987, "step": 20440 }, { "epoch": 0.29, "grad_norm": 0.5703125, "learning_rate": 0.00017809409271316465, "loss": 1.0072, "step": 20445 }, { "epoch": 0.29, "grad_norm": 0.578125, "learning_rate": 0.00017807845253185194, "loss": 0.9746, "step": 20450 }, { "epoch": 0.29, "grad_norm": 0.6015625, "learning_rate": 0.00017806280745649288, "loss": 1.0014, "step": 20455 }, { "epoch": 0.29, "grad_norm": 0.6171875, "learning_rate": 0.00017804715748806818, "loss": 1.0265, "step": 20460 }, { "epoch": 0.29, "grad_norm": 0.5390625, "learning_rate": 0.00017803150262755873, "loss": 0.874, "step": 20465 }, { "epoch": 0.29, "grad_norm": 0.5546875, "learning_rate": 0.00017801584287594583, "loss": 0.8944, "step": 20470 }, { "epoch": 0.29, "grad_norm": 0.458984375, "learning_rate": 0.00017800017823421102, "loss": 0.9781, "step": 20475 }, { "epoch": 0.29, "grad_norm": 0.65234375, "learning_rate": 0.00017798450870333625, "loss": 0.9153, "step": 20480 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.00017796883428430364, "loss": 1.0358, "step": 20485 }, { "epoch": 0.29, "grad_norm": 0.5390625, "learning_rate": 0.00017795315497809571, "loss": 1.0432, "step": 20490 }, { "epoch": 0.29, "grad_norm": 0.5625, "learning_rate": 0.00017793747078569523, "loss": 0.943, "step": 20495 }, { "epoch": 0.29, "grad_norm": 0.51171875, "learning_rate": 0.0001779217817080853, "loss": 0.8886, "step": 20500 }, { "epoch": 0.29, "grad_norm": 0.54296875, "learning_rate": 0.00017790608774624937, "loss": 0.8394, "step": 20505 }, { "epoch": 0.29, "grad_norm": 0.53515625, "learning_rate": 0.00017789038890117113, "loss": 0.9902, "step": 20510 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.00017787468517383462, "loss": 0.9307, "step": 20515 }, { "epoch": 0.29, "grad_norm": 0.494140625, "learning_rate": 0.00017785897656522412, "loss": 0.833, "step": 20520 }, { "epoch": 0.29, "grad_norm": 0.54296875, "learning_rate": 0.0001778432630763243, "loss": 0.8931, "step": 20525 }, { "epoch": 0.29, "grad_norm": 0.609375, "learning_rate": 0.00017782754470812014, "loss": 0.8259, "step": 20530 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.0001778118214615968, "loss": 0.9197, "step": 20535 }, { "epoch": 0.29, "grad_norm": 0.5625, "learning_rate": 0.00017779609333773988, "loss": 0.7937, "step": 20540 }, { "epoch": 0.29, "grad_norm": 0.58203125, "learning_rate": 0.0001777803603375352, "loss": 0.959, "step": 20545 }, { "epoch": 0.29, "grad_norm": 0.5, "learning_rate": 0.000177764622461969, "loss": 0.7378, "step": 20550 }, { "epoch": 0.29, "grad_norm": 0.6328125, "learning_rate": 0.00017774887971202765, "loss": 0.9992, "step": 20555 }, { "epoch": 0.29, "grad_norm": 0.60546875, "learning_rate": 0.00017773313208869799, "loss": 1.0006, "step": 20560 }, { "epoch": 0.29, "grad_norm": 0.5625, "learning_rate": 0.00017771737959296706, "loss": 0.8735, "step": 20565 }, { "epoch": 0.3, "grad_norm": 0.5390625, "learning_rate": 0.0001777016222258223, "loss": 0.8964, "step": 20570 }, { "epoch": 0.3, "grad_norm": 0.56640625, "learning_rate": 0.00017768585998825134, "loss": 1.0407, "step": 20575 }, { "epoch": 0.3, "grad_norm": 0.53515625, "learning_rate": 0.00017767009288124222, "loss": 0.9184, "step": 20580 }, { "epoch": 0.3, "grad_norm": 0.52734375, "learning_rate": 0.0001776543209057832, "loss": 0.9547, "step": 20585 }, { "epoch": 0.3, "grad_norm": 0.53515625, "learning_rate": 0.0001776385440628629, "loss": 0.994, "step": 20590 }, { "epoch": 0.3, "grad_norm": 0.57421875, "learning_rate": 0.00017762276235347026, "loss": 0.8365, "step": 20595 }, { "epoch": 0.3, "grad_norm": 0.6328125, "learning_rate": 0.00017760697577859447, "loss": 0.9293, "step": 20600 }, { "epoch": 0.3, "grad_norm": 0.58203125, "learning_rate": 0.000177591184339225, "loss": 1.171, "step": 20605 }, { "epoch": 0.3, "grad_norm": 0.5859375, "learning_rate": 0.0001775753880363518, "loss": 1.011, "step": 20610 }, { "epoch": 0.3, "grad_norm": 0.55078125, "learning_rate": 0.00017755958687096487, "loss": 0.9621, "step": 20615 }, { "epoch": 0.3, "grad_norm": 0.68359375, "learning_rate": 0.00017754378084405473, "loss": 1.1206, "step": 20620 }, { "epoch": 0.3, "grad_norm": 0.59375, "learning_rate": 0.0001775279699566121, "loss": 0.9734, "step": 20625 }, { "epoch": 0.3, "grad_norm": 0.55078125, "learning_rate": 0.00017751215420962798, "loss": 0.9819, "step": 20630 }, { "epoch": 0.3, "grad_norm": 0.55078125, "learning_rate": 0.00017749633360409378, "loss": 1.086, "step": 20635 }, { "epoch": 0.3, "grad_norm": 0.5859375, "learning_rate": 0.00017748050814100113, "loss": 1.0491, "step": 20640 }, { "epoch": 0.3, "grad_norm": 0.53125, "learning_rate": 0.000177464677821342, "loss": 0.869, "step": 20645 }, { "epoch": 0.3, "grad_norm": 0.58203125, "learning_rate": 0.00017744884264610865, "loss": 1.0623, "step": 20650 }, { "epoch": 0.3, "grad_norm": 0.62890625, "learning_rate": 0.00017743300261629358, "loss": 0.8689, "step": 20655 }, { "epoch": 0.3, "grad_norm": 0.58984375, "learning_rate": 0.00017741715773288976, "loss": 1.1893, "step": 20660 }, { "epoch": 0.3, "grad_norm": 0.578125, "learning_rate": 0.0001774013079968903, "loss": 1.0839, "step": 20665 }, { "epoch": 0.3, "grad_norm": 0.61328125, "learning_rate": 0.0001773854534092887, "loss": 0.8849, "step": 20670 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.00017736959397107879, "loss": 1.0321, "step": 20675 }, { "epoch": 0.3, "grad_norm": 0.5859375, "learning_rate": 0.00017735372968325454, "loss": 0.9963, "step": 20680 }, { "epoch": 0.3, "grad_norm": 0.5234375, "learning_rate": 0.00017733786054681047, "loss": 1.074, "step": 20685 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.00017732198656274123, "loss": 1.1176, "step": 20690 }, { "epoch": 0.3, "grad_norm": 0.57421875, "learning_rate": 0.00017730610773204175, "loss": 0.9677, "step": 20695 }, { "epoch": 0.3, "grad_norm": 0.56640625, "learning_rate": 0.00017729022405570745, "loss": 1.0464, "step": 20700 }, { "epoch": 0.3, "grad_norm": 0.53125, "learning_rate": 0.00017727433553473384, "loss": 1.0147, "step": 20705 }, { "epoch": 0.3, "grad_norm": 0.5625, "learning_rate": 0.0001772584421701169, "loss": 0.9867, "step": 20710 }, { "epoch": 0.3, "grad_norm": 0.55078125, "learning_rate": 0.0001772425439628528, "loss": 0.8194, "step": 20715 }, { "epoch": 0.3, "grad_norm": 0.6640625, "learning_rate": 0.00017722664091393808, "loss": 0.9378, "step": 20720 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.00017721073302436955, "loss": 0.9764, "step": 20725 }, { "epoch": 0.3, "grad_norm": 0.55859375, "learning_rate": 0.00017719482029514435, "loss": 0.9781, "step": 20730 }, { "epoch": 0.3, "grad_norm": 0.478515625, "learning_rate": 0.00017717890272725986, "loss": 0.9302, "step": 20735 }, { "epoch": 0.3, "grad_norm": 0.61328125, "learning_rate": 0.0001771629803217139, "loss": 0.9262, "step": 20740 }, { "epoch": 0.3, "grad_norm": 0.609375, "learning_rate": 0.0001771470530795044, "loss": 0.8817, "step": 20745 }, { "epoch": 0.3, "grad_norm": 0.57421875, "learning_rate": 0.00017713112100162977, "loss": 0.9105, "step": 20750 }, { "epoch": 0.3, "grad_norm": 0.6328125, "learning_rate": 0.00017711518408908866, "loss": 0.8789, "step": 20755 }, { "epoch": 0.3, "grad_norm": 0.63671875, "learning_rate": 0.00017709924234287993, "loss": 1.0325, "step": 20760 }, { "epoch": 0.3, "grad_norm": 0.5234375, "learning_rate": 0.00017708329576400294, "loss": 1.0813, "step": 20765 }, { "epoch": 0.3, "grad_norm": 0.57421875, "learning_rate": 0.00017706734435345714, "loss": 0.8091, "step": 20770 }, { "epoch": 0.3, "grad_norm": 0.61328125, "learning_rate": 0.00017705138811224241, "loss": 0.9214, "step": 20775 }, { "epoch": 0.3, "grad_norm": 0.56640625, "learning_rate": 0.00017703542704135895, "loss": 1.1351, "step": 20780 }, { "epoch": 0.3, "grad_norm": 0.60546875, "learning_rate": 0.0001770194611418072, "loss": 0.9709, "step": 20785 }, { "epoch": 0.3, "grad_norm": 0.578125, "learning_rate": 0.0001770034904145879, "loss": 1.0779, "step": 20790 }, { "epoch": 0.3, "grad_norm": 0.5390625, "learning_rate": 0.0001769875148607021, "loss": 1.0441, "step": 20795 }, { "epoch": 0.3, "grad_norm": 0.6015625, "learning_rate": 0.0001769715344811512, "loss": 1.1271, "step": 20800 }, { "epoch": 0.3, "grad_norm": 0.6640625, "learning_rate": 0.00017695554927693686, "loss": 0.7982, "step": 20805 }, { "epoch": 0.3, "grad_norm": 0.55859375, "learning_rate": 0.00017693955924906102, "loss": 0.9824, "step": 20810 }, { "epoch": 0.3, "grad_norm": 0.54296875, "learning_rate": 0.000176923564398526, "loss": 1.0395, "step": 20815 }, { "epoch": 0.3, "grad_norm": 0.59765625, "learning_rate": 0.00017690756472633437, "loss": 0.8858, "step": 20820 }, { "epoch": 0.3, "grad_norm": 0.61328125, "learning_rate": 0.00017689156023348898, "loss": 1.0001, "step": 20825 }, { "epoch": 0.3, "grad_norm": 0.58203125, "learning_rate": 0.000176875550920993, "loss": 0.9744, "step": 20830 }, { "epoch": 0.3, "grad_norm": 0.466796875, "learning_rate": 0.00017685953678984998, "loss": 0.8278, "step": 20835 }, { "epoch": 0.3, "grad_norm": 0.625, "learning_rate": 0.0001768435178410636, "loss": 0.8934, "step": 20840 }, { "epoch": 0.3, "grad_norm": 0.5703125, "learning_rate": 0.00017682749407563805, "loss": 0.958, "step": 20845 }, { "epoch": 0.3, "grad_norm": 0.7578125, "learning_rate": 0.00017681146549457764, "loss": 1.0769, "step": 20850 }, { "epoch": 0.3, "grad_norm": 0.59375, "learning_rate": 0.00017679543209888712, "loss": 1.1344, "step": 20855 }, { "epoch": 0.3, "grad_norm": 0.6015625, "learning_rate": 0.00017677939388957144, "loss": 0.9309, "step": 20860 }, { "epoch": 0.3, "grad_norm": 0.70703125, "learning_rate": 0.00017676335086763592, "loss": 0.9748, "step": 20865 }, { "epoch": 0.3, "grad_norm": 0.62890625, "learning_rate": 0.00017674730303408613, "loss": 1.0452, "step": 20870 }, { "epoch": 0.3, "grad_norm": 0.5625, "learning_rate": 0.00017673125038992796, "loss": 1.0058, "step": 20875 }, { "epoch": 0.3, "grad_norm": 0.55078125, "learning_rate": 0.00017671519293616767, "loss": 1.0156, "step": 20880 }, { "epoch": 0.3, "grad_norm": 0.62890625, "learning_rate": 0.00017669913067381167, "loss": 0.8449, "step": 20885 }, { "epoch": 0.3, "grad_norm": 0.498046875, "learning_rate": 0.00017668306360386685, "loss": 0.9541, "step": 20890 }, { "epoch": 0.3, "grad_norm": 0.53515625, "learning_rate": 0.00017666699172734022, "loss": 0.931, "step": 20895 }, { "epoch": 0.3, "grad_norm": 0.55859375, "learning_rate": 0.00017665091504523927, "loss": 0.9573, "step": 20900 }, { "epoch": 0.3, "grad_norm": 0.546875, "learning_rate": 0.00017663483355857164, "loss": 0.9587, "step": 20905 }, { "epoch": 0.3, "grad_norm": 0.53125, "learning_rate": 0.0001766187472683454, "loss": 1.0087, "step": 20910 }, { "epoch": 0.3, "grad_norm": 0.66015625, "learning_rate": 0.00017660265617556878, "loss": 1.1384, "step": 20915 }, { "epoch": 0.3, "grad_norm": 0.578125, "learning_rate": 0.00017658656028125046, "loss": 1.1095, "step": 20920 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.00017657045958639932, "loss": 1.0633, "step": 20925 }, { "epoch": 0.3, "grad_norm": 0.5625, "learning_rate": 0.00017655435409202452, "loss": 1.0446, "step": 20930 }, { "epoch": 0.3, "grad_norm": 0.66015625, "learning_rate": 0.00017653824379913565, "loss": 1.067, "step": 20935 }, { "epoch": 0.3, "grad_norm": 0.546875, "learning_rate": 0.0001765221287087425, "loss": 1.037, "step": 20940 }, { "epoch": 0.3, "grad_norm": 0.62109375, "learning_rate": 0.00017650600882185513, "loss": 0.9658, "step": 20945 }, { "epoch": 0.3, "grad_norm": 0.59765625, "learning_rate": 0.000176489884139484, "loss": 0.9823, "step": 20950 }, { "epoch": 0.3, "grad_norm": 0.5703125, "learning_rate": 0.00017647375466263988, "loss": 0.9067, "step": 20955 }, { "epoch": 0.3, "grad_norm": 0.51953125, "learning_rate": 0.00017645762039233368, "loss": 0.9366, "step": 20960 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.00017644148132957673, "loss": 0.9275, "step": 20965 }, { "epoch": 0.3, "grad_norm": 0.63671875, "learning_rate": 0.0001764253374753807, "loss": 0.8771, "step": 20970 }, { "epoch": 0.3, "grad_norm": 0.64453125, "learning_rate": 0.00017640918883075747, "loss": 1.0067, "step": 20975 }, { "epoch": 0.3, "grad_norm": 0.60546875, "learning_rate": 0.00017639303539671928, "loss": 1.0476, "step": 20980 }, { "epoch": 0.3, "grad_norm": 0.4609375, "learning_rate": 0.00017637687717427859, "loss": 1.1025, "step": 20985 }, { "epoch": 0.3, "grad_norm": 0.5859375, "learning_rate": 0.00017636071416444828, "loss": 1.0378, "step": 20990 }, { "epoch": 0.3, "grad_norm": 0.52734375, "learning_rate": 0.00017634454636824144, "loss": 0.9369, "step": 20995 }, { "epoch": 0.3, "grad_norm": 0.66015625, "learning_rate": 0.0001763283737866715, "loss": 1.0305, "step": 21000 }, { "epoch": 0.3, "grad_norm": 0.70703125, "learning_rate": 0.0001763121964207521, "loss": 1.0911, "step": 21005 }, { "epoch": 0.3, "grad_norm": 0.62109375, "learning_rate": 0.00017629601427149736, "loss": 0.963, "step": 21010 }, { "epoch": 0.3, "grad_norm": 0.5703125, "learning_rate": 0.00017627982733992153, "loss": 0.9377, "step": 21015 }, { "epoch": 0.3, "grad_norm": 0.55859375, "learning_rate": 0.0001762636356270393, "loss": 1.0163, "step": 21020 }, { "epoch": 0.3, "grad_norm": 0.5390625, "learning_rate": 0.00017624743913386546, "loss": 1.0069, "step": 21025 }, { "epoch": 0.3, "grad_norm": 0.56640625, "learning_rate": 0.00017623123786141532, "loss": 0.9348, "step": 21030 }, { "epoch": 0.3, "grad_norm": 0.58203125, "learning_rate": 0.0001762150318107044, "loss": 0.9641, "step": 21035 }, { "epoch": 0.3, "grad_norm": 0.57421875, "learning_rate": 0.00017619882098274844, "loss": 0.9824, "step": 21040 }, { "epoch": 0.3, "grad_norm": 0.59765625, "learning_rate": 0.00017618260537856365, "loss": 0.978, "step": 21045 }, { "epoch": 0.3, "grad_norm": 0.59375, "learning_rate": 0.00017616638499916634, "loss": 1.2459, "step": 21050 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.0001761501598455733, "loss": 0.9898, "step": 21055 }, { "epoch": 0.3, "grad_norm": 0.56640625, "learning_rate": 0.0001761339299188015, "loss": 1.0247, "step": 21060 }, { "epoch": 0.3, "grad_norm": 0.6328125, "learning_rate": 0.0001761176952198683, "loss": 1.0085, "step": 21065 }, { "epoch": 0.3, "grad_norm": 0.5625, "learning_rate": 0.00017610145574979122, "loss": 1.0736, "step": 21070 }, { "epoch": 0.3, "grad_norm": 0.5390625, "learning_rate": 0.00017608521150958825, "loss": 1.0348, "step": 21075 }, { "epoch": 0.3, "grad_norm": 0.51171875, "learning_rate": 0.00017606896250027757, "loss": 0.8328, "step": 21080 }, { "epoch": 0.3, "grad_norm": 0.57421875, "learning_rate": 0.00017605270872287767, "loss": 0.8138, "step": 21085 }, { "epoch": 0.3, "grad_norm": 0.54296875, "learning_rate": 0.0001760364501784074, "loss": 1.0117, "step": 21090 }, { "epoch": 0.3, "grad_norm": 0.59375, "learning_rate": 0.0001760201868678858, "loss": 0.9574, "step": 21095 }, { "epoch": 0.3, "grad_norm": 0.61328125, "learning_rate": 0.00017600391879233233, "loss": 1.0016, "step": 21100 }, { "epoch": 0.3, "grad_norm": 0.5625, "learning_rate": 0.00017598764595276665, "loss": 1.1719, "step": 21105 }, { "epoch": 0.3, "grad_norm": 0.51953125, "learning_rate": 0.0001759713683502088, "loss": 0.8461, "step": 21110 }, { "epoch": 0.3, "grad_norm": 0.5, "learning_rate": 0.00017595508598567902, "loss": 0.9367, "step": 21115 }, { "epoch": 0.3, "grad_norm": 0.609375, "learning_rate": 0.00017593879886019799, "loss": 1.0118, "step": 21120 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.00017592250697478652, "loss": 0.8151, "step": 21125 }, { "epoch": 0.3, "grad_norm": 0.54296875, "learning_rate": 0.0001759062103304659, "loss": 0.8744, "step": 21130 }, { "epoch": 0.3, "grad_norm": 0.55078125, "learning_rate": 0.0001758899089282575, "loss": 0.853, "step": 21135 }, { "epoch": 0.3, "grad_norm": 0.55859375, "learning_rate": 0.00017587360276918323, "loss": 0.9114, "step": 21140 }, { "epoch": 0.3, "grad_norm": 0.59765625, "learning_rate": 0.0001758572918542651, "loss": 1.012, "step": 21145 }, { "epoch": 0.3, "grad_norm": 0.5390625, "learning_rate": 0.00017584097618452552, "loss": 0.9139, "step": 21150 }, { "epoch": 0.3, "grad_norm": 0.53515625, "learning_rate": 0.0001758246557609872, "loss": 0.8946, "step": 21155 }, { "epoch": 0.3, "grad_norm": 0.54296875, "learning_rate": 0.0001758083305846731, "loss": 1.1681, "step": 21160 }, { "epoch": 0.3, "grad_norm": 0.55859375, "learning_rate": 0.00017579200065660647, "loss": 0.9387, "step": 21165 }, { "epoch": 0.3, "grad_norm": 0.61328125, "learning_rate": 0.00017577566597781093, "loss": 1.0792, "step": 21170 }, { "epoch": 0.3, "grad_norm": 0.75, "learning_rate": 0.00017575932654931035, "loss": 1.1199, "step": 21175 }, { "epoch": 0.3, "grad_norm": 0.546875, "learning_rate": 0.00017574298237212887, "loss": 0.913, "step": 21180 }, { "epoch": 0.3, "grad_norm": 0.51171875, "learning_rate": 0.00017572663344729103, "loss": 0.9306, "step": 21185 }, { "epoch": 0.3, "grad_norm": 0.5078125, "learning_rate": 0.00017571027977582158, "loss": 0.9791, "step": 21190 }, { "epoch": 0.3, "grad_norm": 0.51171875, "learning_rate": 0.00017569392135874552, "loss": 0.9209, "step": 21195 }, { "epoch": 0.3, "grad_norm": 0.62890625, "learning_rate": 0.00017567755819708828, "loss": 1.0335, "step": 21200 }, { "epoch": 0.3, "grad_norm": 0.58203125, "learning_rate": 0.0001756611902918755, "loss": 0.9791, "step": 21205 }, { "epoch": 0.3, "grad_norm": 0.69140625, "learning_rate": 0.00017564481764413312, "loss": 0.9551, "step": 21210 }, { "epoch": 0.3, "grad_norm": 0.55078125, "learning_rate": 0.00017562844025488747, "loss": 0.9948, "step": 21215 }, { "epoch": 0.3, "grad_norm": 0.55078125, "learning_rate": 0.000175612058125165, "loss": 0.9004, "step": 21220 }, { "epoch": 0.3, "grad_norm": 0.55859375, "learning_rate": 0.00017559567125599265, "loss": 0.9636, "step": 21225 }, { "epoch": 0.3, "grad_norm": 0.59375, "learning_rate": 0.0001755792796483975, "loss": 0.9681, "step": 21230 }, { "epoch": 0.3, "grad_norm": 0.6015625, "learning_rate": 0.00017556288330340705, "loss": 0.9337, "step": 21235 }, { "epoch": 0.3, "grad_norm": 0.546875, "learning_rate": 0.000175546482222049, "loss": 0.8969, "step": 21240 }, { "epoch": 0.3, "grad_norm": 0.5703125, "learning_rate": 0.0001755300764053514, "loss": 0.8336, "step": 21245 }, { "epoch": 0.3, "grad_norm": 0.68359375, "learning_rate": 0.00017551366585434258, "loss": 0.9793, "step": 21250 }, { "epoch": 0.3, "grad_norm": 0.578125, "learning_rate": 0.00017549725057005122, "loss": 0.9903, "step": 21255 }, { "epoch": 0.3, "grad_norm": 0.53515625, "learning_rate": 0.0001754808305535062, "loss": 0.8948, "step": 21260 }, { "epoch": 0.31, "grad_norm": 0.61328125, "learning_rate": 0.00017546440580573674, "loss": 1.1077, "step": 21265 }, { "epoch": 0.31, "grad_norm": 0.5390625, "learning_rate": 0.00017544797632777236, "loss": 0.9889, "step": 21270 }, { "epoch": 0.31, "grad_norm": 0.51953125, "learning_rate": 0.0001754315421206429, "loss": 0.999, "step": 21275 }, { "epoch": 0.31, "grad_norm": 0.6328125, "learning_rate": 0.0001754151031853785, "loss": 1.0356, "step": 21280 }, { "epoch": 0.31, "grad_norm": 0.54296875, "learning_rate": 0.00017539865952300953, "loss": 0.9596, "step": 21285 }, { "epoch": 0.31, "grad_norm": 0.5390625, "learning_rate": 0.0001753822111345667, "loss": 1.0203, "step": 21290 }, { "epoch": 0.31, "grad_norm": 0.58203125, "learning_rate": 0.00017536575802108102, "loss": 1.1007, "step": 21295 }, { "epoch": 0.31, "grad_norm": 0.5390625, "learning_rate": 0.00017534930018358382, "loss": 0.9677, "step": 21300 }, { "epoch": 0.31, "grad_norm": 0.58984375, "learning_rate": 0.00017533283762310667, "loss": 0.8472, "step": 21305 }, { "epoch": 0.31, "grad_norm": 1.0, "learning_rate": 0.00017531637034068142, "loss": 1.0809, "step": 21310 }, { "epoch": 0.31, "grad_norm": 0.625, "learning_rate": 0.0001752998983373403, "loss": 1.0434, "step": 21315 }, { "epoch": 0.31, "grad_norm": 0.5546875, "learning_rate": 0.00017528342161411583, "loss": 1.0721, "step": 21320 }, { "epoch": 0.31, "grad_norm": 0.52734375, "learning_rate": 0.00017526694017204074, "loss": 0.9382, "step": 21325 }, { "epoch": 0.31, "grad_norm": 0.482421875, "learning_rate": 0.0001752504540121481, "loss": 0.9678, "step": 21330 }, { "epoch": 0.31, "grad_norm": 0.58984375, "learning_rate": 0.00017523396313547134, "loss": 0.8703, "step": 21335 }, { "epoch": 0.31, "grad_norm": 0.66796875, "learning_rate": 0.00017521746754304407, "loss": 0.9302, "step": 21340 }, { "epoch": 0.31, "grad_norm": 0.59765625, "learning_rate": 0.00017520096723590024, "loss": 1.0109, "step": 21345 }, { "epoch": 0.31, "grad_norm": 0.62890625, "learning_rate": 0.00017518446221507415, "loss": 0.9523, "step": 21350 }, { "epoch": 0.31, "grad_norm": 0.6796875, "learning_rate": 0.00017516795248160037, "loss": 0.9953, "step": 21355 }, { "epoch": 0.31, "grad_norm": 0.640625, "learning_rate": 0.00017515143803651368, "loss": 0.9026, "step": 21360 }, { "epoch": 0.31, "grad_norm": 0.515625, "learning_rate": 0.00017513491888084928, "loss": 0.8433, "step": 21365 }, { "epoch": 0.31, "grad_norm": 0.6015625, "learning_rate": 0.0001751183950156426, "loss": 1.0761, "step": 21370 }, { "epoch": 0.31, "grad_norm": 0.62890625, "learning_rate": 0.00017510186644192937, "loss": 0.9921, "step": 21375 }, { "epoch": 0.31, "grad_norm": 0.490234375, "learning_rate": 0.0001750853331607456, "loss": 0.9785, "step": 21380 }, { "epoch": 0.31, "grad_norm": 0.5078125, "learning_rate": 0.00017506879517312763, "loss": 0.8458, "step": 21385 }, { "epoch": 0.31, "grad_norm": 0.52734375, "learning_rate": 0.0001750522524801121, "loss": 1.0463, "step": 21390 }, { "epoch": 0.31, "grad_norm": 0.56640625, "learning_rate": 0.0001750357050827359, "loss": 0.8554, "step": 21395 }, { "epoch": 0.31, "grad_norm": 0.6171875, "learning_rate": 0.00017501915298203624, "loss": 0.9932, "step": 21400 }, { "epoch": 0.31, "grad_norm": 0.451171875, "learning_rate": 0.00017500259617905063, "loss": 0.8362, "step": 21405 }, { "epoch": 0.31, "grad_norm": 0.5546875, "learning_rate": 0.00017498603467481689, "loss": 0.9355, "step": 21410 }, { "epoch": 0.31, "grad_norm": 0.54296875, "learning_rate": 0.0001749694684703731, "loss": 0.9487, "step": 21415 }, { "epoch": 0.31, "grad_norm": 0.578125, "learning_rate": 0.0001749528975667576, "loss": 0.7915, "step": 21420 }, { "epoch": 0.31, "grad_norm": 0.6640625, "learning_rate": 0.00017493632196500914, "loss": 1.0153, "step": 21425 }, { "epoch": 0.31, "grad_norm": 0.59375, "learning_rate": 0.0001749197416661667, "loss": 1.1029, "step": 21430 }, { "epoch": 0.31, "grad_norm": 0.5078125, "learning_rate": 0.0001749031566712695, "loss": 0.8185, "step": 21435 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 0.00017488656698135712, "loss": 0.8799, "step": 21440 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 0.00017486997259746946, "loss": 0.9096, "step": 21445 }, { "epoch": 0.31, "grad_norm": 0.62109375, "learning_rate": 0.00017485337352064666, "loss": 1.0953, "step": 21450 }, { "epoch": 0.31, "grad_norm": 0.5859375, "learning_rate": 0.00017483676975192913, "loss": 0.8612, "step": 21455 }, { "epoch": 0.31, "grad_norm": 0.66796875, "learning_rate": 0.00017482016129235763, "loss": 1.1556, "step": 21460 }, { "epoch": 0.31, "grad_norm": 0.60546875, "learning_rate": 0.00017480354814297322, "loss": 0.9108, "step": 21465 }, { "epoch": 0.31, "grad_norm": 0.50390625, "learning_rate": 0.00017478693030481722, "loss": 0.9021, "step": 21470 }, { "epoch": 0.31, "grad_norm": 0.494140625, "learning_rate": 0.00017477030777893123, "loss": 0.9815, "step": 21475 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 0.00017475368056635724, "loss": 1.0603, "step": 21480 }, { "epoch": 0.31, "grad_norm": 0.5078125, "learning_rate": 0.00017473704866813737, "loss": 0.9242, "step": 21485 }, { "epoch": 0.31, "grad_norm": 0.57421875, "learning_rate": 0.00017472041208531423, "loss": 0.8487, "step": 21490 }, { "epoch": 0.31, "grad_norm": 0.56640625, "learning_rate": 0.00017470377081893048, "loss": 1.0269, "step": 21495 }, { "epoch": 0.31, "grad_norm": 0.61328125, "learning_rate": 0.00017468712487002933, "loss": 0.9441, "step": 21500 }, { "epoch": 0.31, "grad_norm": 0.5859375, "learning_rate": 0.00017467047423965415, "loss": 1.1466, "step": 21505 }, { "epoch": 0.31, "grad_norm": 0.53515625, "learning_rate": 0.0001746538189288486, "loss": 1.0247, "step": 21510 }, { "epoch": 0.31, "grad_norm": 0.53125, "learning_rate": 0.00017463715893865664, "loss": 1.0308, "step": 21515 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 0.00017462049427012256, "loss": 0.9886, "step": 21520 }, { "epoch": 0.31, "grad_norm": 0.52734375, "learning_rate": 0.0001746038249242909, "loss": 0.914, "step": 21525 }, { "epoch": 0.31, "grad_norm": 0.6328125, "learning_rate": 0.00017458715090220656, "loss": 0.9864, "step": 21530 }, { "epoch": 0.31, "grad_norm": 0.57421875, "learning_rate": 0.00017457047220491463, "loss": 1.0514, "step": 21535 }, { "epoch": 0.31, "grad_norm": 0.578125, "learning_rate": 0.00017455378883346056, "loss": 1.0187, "step": 21540 }, { "epoch": 0.31, "grad_norm": 0.75, "learning_rate": 0.00017453710078889012, "loss": 0.9507, "step": 21545 }, { "epoch": 0.31, "grad_norm": 0.486328125, "learning_rate": 0.0001745204080722493, "loss": 1.0244, "step": 21550 }, { "epoch": 0.31, "grad_norm": 0.51171875, "learning_rate": 0.00017450371068458446, "loss": 0.9657, "step": 21555 }, { "epoch": 0.31, "grad_norm": 0.76171875, "learning_rate": 0.00017448700862694215, "loss": 1.0195, "step": 21560 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 0.00017447030190036932, "loss": 0.9239, "step": 21565 }, { "epoch": 0.31, "grad_norm": 0.49609375, "learning_rate": 0.00017445359050591313, "loss": 0.9354, "step": 21570 }, { "epoch": 0.31, "grad_norm": 0.7109375, "learning_rate": 0.0001744368744446211, "loss": 1.0583, "step": 21575 }, { "epoch": 0.31, "grad_norm": 0.58203125, "learning_rate": 0.00017442015371754103, "loss": 0.9223, "step": 21580 }, { "epoch": 0.31, "grad_norm": 0.55859375, "learning_rate": 0.00017440342832572095, "loss": 1.046, "step": 21585 }, { "epoch": 0.31, "grad_norm": 0.58203125, "learning_rate": 0.00017438669827020924, "loss": 1.1004, "step": 21590 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 0.00017436996355205456, "loss": 0.8468, "step": 21595 }, { "epoch": 0.31, "grad_norm": 0.51171875, "learning_rate": 0.00017435322417230586, "loss": 1.0398, "step": 21600 }, { "epoch": 0.31, "grad_norm": 0.578125, "learning_rate": 0.0001743364801320124, "loss": 1.0182, "step": 21605 }, { "epoch": 0.31, "grad_norm": 0.63671875, "learning_rate": 0.00017431973143222372, "loss": 1.0255, "step": 21610 }, { "epoch": 0.31, "grad_norm": 0.5546875, "learning_rate": 0.0001743029780739896, "loss": 0.8777, "step": 21615 }, { "epoch": 0.31, "grad_norm": 0.6015625, "learning_rate": 0.00017428622005836018, "loss": 0.9486, "step": 21620 }, { "epoch": 0.31, "grad_norm": 0.51953125, "learning_rate": 0.0001742694573863859, "loss": 0.9823, "step": 21625 }, { "epoch": 0.31, "grad_norm": 0.66796875, "learning_rate": 0.00017425269005911744, "loss": 0.9608, "step": 21630 }, { "epoch": 0.31, "grad_norm": 0.609375, "learning_rate": 0.00017423591807760582, "loss": 0.9378, "step": 21635 }, { "epoch": 0.31, "grad_norm": 0.578125, "learning_rate": 0.0001742191414429023, "loss": 0.8654, "step": 21640 }, { "epoch": 0.31, "grad_norm": 0.6171875, "learning_rate": 0.00017420236015605847, "loss": 0.9431, "step": 21645 }, { "epoch": 0.31, "grad_norm": 0.470703125, "learning_rate": 0.0001741855742181262, "loss": 0.9499, "step": 21650 }, { "epoch": 0.31, "grad_norm": 0.57421875, "learning_rate": 0.00017416878363015763, "loss": 1.0357, "step": 21655 }, { "epoch": 0.31, "grad_norm": 0.59765625, "learning_rate": 0.00017415198839320525, "loss": 0.9856, "step": 21660 }, { "epoch": 0.31, "grad_norm": 0.55859375, "learning_rate": 0.00017413518850832173, "loss": 0.992, "step": 21665 }, { "epoch": 0.31, "grad_norm": 0.57421875, "learning_rate": 0.00017411838397656023, "loss": 0.9234, "step": 21670 }, { "epoch": 0.31, "grad_norm": 0.625, "learning_rate": 0.00017410157479897398, "loss": 0.9881, "step": 21675 }, { "epoch": 0.31, "grad_norm": 0.5390625, "learning_rate": 0.00017408476097661662, "loss": 1.0233, "step": 21680 }, { "epoch": 0.31, "grad_norm": 0.56640625, "learning_rate": 0.00017406794251054208, "loss": 1.0662, "step": 21685 }, { "epoch": 0.31, "grad_norm": 0.56640625, "learning_rate": 0.00017405111940180454, "loss": 0.9822, "step": 21690 }, { "epoch": 0.31, "grad_norm": 0.6171875, "learning_rate": 0.0001740342916514585, "loss": 0.9748, "step": 21695 }, { "epoch": 0.31, "grad_norm": 0.466796875, "learning_rate": 0.00017401745926055875, "loss": 0.7273, "step": 21700 }, { "epoch": 0.31, "grad_norm": 0.61328125, "learning_rate": 0.00017400062223016033, "loss": 0.8134, "step": 21705 }, { "epoch": 0.31, "grad_norm": 0.64453125, "learning_rate": 0.00017398378056131866, "loss": 1.0092, "step": 21710 }, { "epoch": 0.31, "grad_norm": 0.61328125, "learning_rate": 0.00017396693425508934, "loss": 1.0338, "step": 21715 }, { "epoch": 0.31, "grad_norm": 0.5546875, "learning_rate": 0.00017395008331252834, "loss": 0.9591, "step": 21720 }, { "epoch": 0.31, "grad_norm": 0.52734375, "learning_rate": 0.00017393322773469192, "loss": 0.9183, "step": 21725 }, { "epoch": 0.31, "grad_norm": 0.5390625, "learning_rate": 0.00017391636752263657, "loss": 1.0216, "step": 21730 }, { "epoch": 0.31, "grad_norm": 0.703125, "learning_rate": 0.0001738995026774191, "loss": 0.9468, "step": 21735 }, { "epoch": 0.31, "grad_norm": 0.65625, "learning_rate": 0.00017388263320009667, "loss": 0.9883, "step": 21740 }, { "epoch": 0.31, "grad_norm": 0.62109375, "learning_rate": 0.0001738657590917266, "loss": 0.9116, "step": 21745 }, { "epoch": 0.31, "grad_norm": 0.64453125, "learning_rate": 0.0001738488803533667, "loss": 1.0142, "step": 21750 }, { "epoch": 0.31, "grad_norm": 0.62890625, "learning_rate": 0.00017383199698607483, "loss": 1.0342, "step": 21755 }, { "epoch": 0.31, "grad_norm": 0.56640625, "learning_rate": 0.0001738151089909093, "loss": 0.9699, "step": 21760 }, { "epoch": 0.31, "grad_norm": 0.61328125, "learning_rate": 0.00017379821636892865, "loss": 1.0844, "step": 21765 }, { "epoch": 0.31, "grad_norm": 0.60546875, "learning_rate": 0.0001737813191211918, "loss": 1.0149, "step": 21770 }, { "epoch": 0.31, "grad_norm": 0.546875, "learning_rate": 0.0001737644172487578, "loss": 0.9203, "step": 21775 }, { "epoch": 0.31, "grad_norm": 0.578125, "learning_rate": 0.0001737475107526861, "loss": 0.928, "step": 21780 }, { "epoch": 0.31, "grad_norm": 0.61328125, "learning_rate": 0.00017373059963403647, "loss": 1.048, "step": 21785 }, { "epoch": 0.31, "grad_norm": 0.65625, "learning_rate": 0.00017371368389386887, "loss": 0.955, "step": 21790 }, { "epoch": 0.31, "grad_norm": 0.56640625, "learning_rate": 0.0001736967635332436, "loss": 0.9854, "step": 21795 }, { "epoch": 0.31, "grad_norm": 0.5859375, "learning_rate": 0.0001736798385532213, "loss": 0.8858, "step": 21800 }, { "epoch": 0.31, "grad_norm": 0.6171875, "learning_rate": 0.00017366290895486276, "loss": 0.9952, "step": 21805 }, { "epoch": 0.31, "grad_norm": 0.56640625, "learning_rate": 0.0001736459747392292, "loss": 0.9809, "step": 21810 }, { "epoch": 0.31, "grad_norm": 0.51953125, "learning_rate": 0.00017362903590738204, "loss": 0.9138, "step": 21815 }, { "epoch": 0.31, "grad_norm": 0.66796875, "learning_rate": 0.0001736120924603831, "loss": 1.1694, "step": 21820 }, { "epoch": 0.31, "grad_norm": 0.578125, "learning_rate": 0.00017359514439929435, "loss": 0.9206, "step": 21825 }, { "epoch": 0.31, "grad_norm": 0.59765625, "learning_rate": 0.00017357819172517816, "loss": 0.7781, "step": 21830 }, { "epoch": 0.31, "grad_norm": 0.5546875, "learning_rate": 0.00017356123443909707, "loss": 0.9021, "step": 21835 }, { "epoch": 0.31, "grad_norm": 0.65625, "learning_rate": 0.00017354427254211404, "loss": 1.0017, "step": 21840 }, { "epoch": 0.31, "grad_norm": 0.5625, "learning_rate": 0.00017352730603529225, "loss": 0.8842, "step": 21845 }, { "epoch": 0.31, "grad_norm": 0.546875, "learning_rate": 0.0001735103349196952, "loss": 0.9533, "step": 21850 }, { "epoch": 0.31, "grad_norm": 0.578125, "learning_rate": 0.0001734933591963866, "loss": 1.0208, "step": 21855 }, { "epoch": 0.31, "grad_norm": 0.5859375, "learning_rate": 0.00017347637886643057, "loss": 0.8829, "step": 21860 }, { "epoch": 0.31, "grad_norm": 0.53125, "learning_rate": 0.0001734593939308914, "loss": 1.0942, "step": 21865 }, { "epoch": 0.31, "grad_norm": 0.60546875, "learning_rate": 0.00017344240439083378, "loss": 0.9608, "step": 21870 }, { "epoch": 0.31, "grad_norm": 0.625, "learning_rate": 0.0001734254102473226, "loss": 0.916, "step": 21875 }, { "epoch": 0.31, "grad_norm": 0.6171875, "learning_rate": 0.00017340841150142308, "loss": 1.1468, "step": 21880 }, { "epoch": 0.31, "grad_norm": 0.5390625, "learning_rate": 0.00017339140815420074, "loss": 0.8373, "step": 21885 }, { "epoch": 0.31, "grad_norm": 0.578125, "learning_rate": 0.00017337440020672134, "loss": 1.135, "step": 21890 }, { "epoch": 0.31, "grad_norm": 0.53515625, "learning_rate": 0.00017335738766005097, "loss": 1.0092, "step": 21895 }, { "epoch": 0.31, "grad_norm": 0.6796875, "learning_rate": 0.000173340370515256, "loss": 0.9375, "step": 21900 }, { "epoch": 0.31, "grad_norm": 0.625, "learning_rate": 0.00017332334877340303, "loss": 0.9806, "step": 21905 }, { "epoch": 0.31, "grad_norm": 0.56640625, "learning_rate": 0.0001733063224355591, "loss": 1.0827, "step": 21910 }, { "epoch": 0.31, "grad_norm": 0.5234375, "learning_rate": 0.00017328929150279142, "loss": 1.0186, "step": 21915 }, { "epoch": 0.31, "grad_norm": 0.58984375, "learning_rate": 0.0001732722559761674, "loss": 1.0553, "step": 21920 }, { "epoch": 0.31, "grad_norm": 0.5390625, "learning_rate": 0.000173255215856755, "loss": 0.921, "step": 21925 }, { "epoch": 0.31, "grad_norm": 0.48828125, "learning_rate": 0.00017323817114562218, "loss": 0.9589, "step": 21930 }, { "epoch": 0.31, "grad_norm": 0.53515625, "learning_rate": 0.00017322112184383742, "loss": 0.9213, "step": 21935 }, { "epoch": 0.31, "grad_norm": 0.5546875, "learning_rate": 0.00017320406795246933, "loss": 0.9863, "step": 21940 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 0.00017318700947258688, "loss": 0.8876, "step": 21945 }, { "epoch": 0.31, "grad_norm": 0.62890625, "learning_rate": 0.00017316994640525935, "loss": 0.7832, "step": 21950 }, { "epoch": 0.31, "grad_norm": 0.625, "learning_rate": 0.00017315287875155623, "loss": 0.9013, "step": 21955 }, { "epoch": 0.32, "grad_norm": 0.68359375, "learning_rate": 0.00017313580651254738, "loss": 1.0958, "step": 21960 }, { "epoch": 0.32, "grad_norm": 0.51953125, "learning_rate": 0.00017311872968930281, "loss": 1.1191, "step": 21965 }, { "epoch": 0.32, "grad_norm": 0.65625, "learning_rate": 0.00017310164828289305, "loss": 1.0535, "step": 21970 }, { "epoch": 0.32, "grad_norm": 0.5234375, "learning_rate": 0.00017308456229438873, "loss": 1.0017, "step": 21975 }, { "epoch": 0.32, "grad_norm": 0.5859375, "learning_rate": 0.00017306747172486078, "loss": 1.079, "step": 21980 }, { "epoch": 0.32, "grad_norm": 0.609375, "learning_rate": 0.0001730503765753805, "loss": 0.9727, "step": 21985 }, { "epoch": 0.32, "grad_norm": 0.64453125, "learning_rate": 0.0001730332768470194, "loss": 1.0199, "step": 21990 }, { "epoch": 0.32, "grad_norm": 0.578125, "learning_rate": 0.00017301617254084938, "loss": 0.936, "step": 21995 }, { "epoch": 0.32, "grad_norm": 0.52734375, "learning_rate": 0.00017299906365794246, "loss": 1.0371, "step": 22000 }, { "epoch": 0.32, "grad_norm": 0.640625, "learning_rate": 0.0001729819501993711, "loss": 0.9254, "step": 22005 }, { "epoch": 0.32, "grad_norm": 0.5625, "learning_rate": 0.000172964832166208, "loss": 1.041, "step": 22010 }, { "epoch": 0.32, "grad_norm": 0.55859375, "learning_rate": 0.00017294770955952608, "loss": 0.9152, "step": 22015 }, { "epoch": 0.32, "grad_norm": 0.53515625, "learning_rate": 0.00017293058238039867, "loss": 0.9246, "step": 22020 }, { "epoch": 0.32, "grad_norm": 0.53125, "learning_rate": 0.00017291345062989927, "loss": 0.7779, "step": 22025 }, { "epoch": 0.32, "grad_norm": 0.5546875, "learning_rate": 0.00017289631430910177, "loss": 1.0301, "step": 22030 }, { "epoch": 0.32, "grad_norm": 0.609375, "learning_rate": 0.00017287917341908025, "loss": 1.1244, "step": 22035 }, { "epoch": 0.32, "grad_norm": 0.59375, "learning_rate": 0.00017286202796090917, "loss": 0.7826, "step": 22040 }, { "epoch": 0.32, "grad_norm": 0.6171875, "learning_rate": 0.00017284487793566317, "loss": 0.8691, "step": 22045 }, { "epoch": 0.32, "grad_norm": 0.66796875, "learning_rate": 0.00017282772334441729, "loss": 1.0271, "step": 22050 }, { "epoch": 0.32, "grad_norm": 0.412109375, "learning_rate": 0.0001728105641882467, "loss": 0.7768, "step": 22055 }, { "epoch": 0.32, "grad_norm": 0.55859375, "learning_rate": 0.00017279340046822707, "loss": 0.9809, "step": 22060 }, { "epoch": 0.32, "grad_norm": 0.5546875, "learning_rate": 0.00017277623218543418, "loss": 0.8828, "step": 22065 }, { "epoch": 0.32, "grad_norm": 0.59375, "learning_rate": 0.0001727590593409442, "loss": 1.0735, "step": 22070 }, { "epoch": 0.32, "grad_norm": 0.56640625, "learning_rate": 0.00017274188193583346, "loss": 0.7952, "step": 22075 }, { "epoch": 0.32, "grad_norm": 0.62890625, "learning_rate": 0.00017272469997117878, "loss": 1.0377, "step": 22080 }, { "epoch": 0.32, "grad_norm": 0.73828125, "learning_rate": 0.00017270751344805702, "loss": 0.9094, "step": 22085 }, { "epoch": 0.32, "grad_norm": 0.56640625, "learning_rate": 0.00017269032236754556, "loss": 0.9699, "step": 22090 }, { "epoch": 0.32, "grad_norm": 0.640625, "learning_rate": 0.00017267312673072187, "loss": 1.0409, "step": 22095 }, { "epoch": 0.32, "grad_norm": 0.54296875, "learning_rate": 0.00017265592653866385, "loss": 0.8788, "step": 22100 }, { "epoch": 0.32, "grad_norm": 0.50390625, "learning_rate": 0.00017263872179244956, "loss": 0.947, "step": 22105 }, { "epoch": 0.32, "grad_norm": 0.58984375, "learning_rate": 0.00017262151249315753, "loss": 0.9475, "step": 22110 }, { "epoch": 0.32, "grad_norm": 0.5703125, "learning_rate": 0.00017260429864186634, "loss": 0.9822, "step": 22115 }, { "epoch": 0.32, "grad_norm": 0.59765625, "learning_rate": 0.00017258708023965504, "loss": 0.9842, "step": 22120 }, { "epoch": 0.32, "grad_norm": 0.54296875, "learning_rate": 0.00017256985728760288, "loss": 0.8861, "step": 22125 }, { "epoch": 0.32, "grad_norm": 0.53515625, "learning_rate": 0.00017255262978678942, "loss": 0.9062, "step": 22130 }, { "epoch": 0.32, "grad_norm": 0.5625, "learning_rate": 0.00017253539773829448, "loss": 0.9959, "step": 22135 }, { "epoch": 0.32, "grad_norm": 0.58203125, "learning_rate": 0.0001725181611431982, "loss": 1.0269, "step": 22140 }, { "epoch": 0.32, "grad_norm": 0.58984375, "learning_rate": 0.000172500920002581, "loss": 1.0099, "step": 22145 }, { "epoch": 0.32, "grad_norm": 0.57421875, "learning_rate": 0.00017248367431752355, "loss": 1.0521, "step": 22150 }, { "epoch": 0.32, "grad_norm": 0.6171875, "learning_rate": 0.00017246642408910685, "loss": 0.9477, "step": 22155 }, { "epoch": 0.32, "grad_norm": 0.65625, "learning_rate": 0.00017244916931841216, "loss": 1.0187, "step": 22160 }, { "epoch": 0.32, "grad_norm": 0.62890625, "learning_rate": 0.000172431910006521, "loss": 0.9319, "step": 22165 }, { "epoch": 0.32, "grad_norm": 0.5703125, "learning_rate": 0.00017241464615451525, "loss": 0.9638, "step": 22170 }, { "epoch": 0.32, "grad_norm": 0.72265625, "learning_rate": 0.000172397377763477, "loss": 1.0252, "step": 22175 }, { "epoch": 0.32, "grad_norm": 0.5, "learning_rate": 0.00017238010483448866, "loss": 1.0172, "step": 22180 }, { "epoch": 0.32, "grad_norm": 0.6796875, "learning_rate": 0.00017236282736863293, "loss": 0.9769, "step": 22185 }, { "epoch": 0.32, "grad_norm": 0.59765625, "learning_rate": 0.00017234554536699274, "loss": 1.0083, "step": 22190 }, { "epoch": 0.32, "grad_norm": 0.55859375, "learning_rate": 0.0001723282588306514, "loss": 1.0638, "step": 22195 }, { "epoch": 0.32, "grad_norm": 0.494140625, "learning_rate": 0.0001723109677606924, "loss": 0.9845, "step": 22200 }, { "epoch": 0.32, "grad_norm": 0.55078125, "learning_rate": 0.00017229367215819958, "loss": 0.9822, "step": 22205 }, { "epoch": 0.32, "grad_norm": 0.5859375, "learning_rate": 0.00017227637202425706, "loss": 1.1154, "step": 22210 }, { "epoch": 0.32, "grad_norm": 0.53515625, "learning_rate": 0.00017225906735994923, "loss": 0.9841, "step": 22215 }, { "epoch": 0.32, "grad_norm": 0.60546875, "learning_rate": 0.00017224175816636075, "loss": 0.9897, "step": 22220 }, { "epoch": 0.32, "grad_norm": 0.5234375, "learning_rate": 0.00017222444444457664, "loss": 0.9637, "step": 22225 }, { "epoch": 0.32, "grad_norm": 0.59375, "learning_rate": 0.00017220712619568204, "loss": 1.0782, "step": 22230 }, { "epoch": 0.32, "grad_norm": 0.67578125, "learning_rate": 0.00017218980342076254, "loss": 1.1999, "step": 22235 }, { "epoch": 0.32, "grad_norm": 0.54296875, "learning_rate": 0.00017217247612090396, "loss": 0.9888, "step": 22240 }, { "epoch": 0.32, "grad_norm": 0.57421875, "learning_rate": 0.00017215514429719237, "loss": 0.8246, "step": 22245 }, { "epoch": 0.32, "grad_norm": 0.52734375, "learning_rate": 0.00017213780795071417, "loss": 0.8804, "step": 22250 }, { "epoch": 0.32, "grad_norm": 0.61328125, "learning_rate": 0.000172120467082556, "loss": 1.2292, "step": 22255 }, { "epoch": 0.32, "grad_norm": 0.53125, "learning_rate": 0.0001721031216938048, "loss": 0.9579, "step": 22260 }, { "epoch": 0.32, "grad_norm": 0.5234375, "learning_rate": 0.00017208577178554787, "loss": 0.9219, "step": 22265 }, { "epoch": 0.32, "grad_norm": 0.578125, "learning_rate": 0.0001720684173588726, "loss": 0.9643, "step": 22270 }, { "epoch": 0.32, "grad_norm": 0.609375, "learning_rate": 0.00017205105841486688, "loss": 0.8721, "step": 22275 }, { "epoch": 0.32, "grad_norm": 0.51171875, "learning_rate": 0.00017203369495461877, "loss": 0.9526, "step": 22280 }, { "epoch": 0.32, "grad_norm": 0.515625, "learning_rate": 0.00017201632697921659, "loss": 0.9051, "step": 22285 }, { "epoch": 0.32, "grad_norm": 0.5703125, "learning_rate": 0.00017199895448974904, "loss": 1.0717, "step": 22290 }, { "epoch": 0.32, "grad_norm": 0.63671875, "learning_rate": 0.00017198157748730502, "loss": 0.9993, "step": 22295 }, { "epoch": 0.32, "grad_norm": 0.48828125, "learning_rate": 0.00017196419597297372, "loss": 1.0139, "step": 22300 }, { "epoch": 0.32, "grad_norm": 0.6015625, "learning_rate": 0.00017194680994784468, "loss": 0.8819, "step": 22305 }, { "epoch": 0.32, "grad_norm": 0.58203125, "learning_rate": 0.00017192941941300763, "loss": 1.0517, "step": 22310 }, { "epoch": 0.32, "grad_norm": 0.515625, "learning_rate": 0.00017191202436955268, "loss": 0.9249, "step": 22315 }, { "epoch": 0.32, "grad_norm": 0.62109375, "learning_rate": 0.00017189462481857014, "loss": 1.0895, "step": 22320 }, { "epoch": 0.32, "grad_norm": 0.5703125, "learning_rate": 0.00017187722076115062, "loss": 0.9648, "step": 22325 }, { "epoch": 0.32, "grad_norm": 0.625, "learning_rate": 0.00017185981219838503, "loss": 0.8843, "step": 22330 }, { "epoch": 0.32, "grad_norm": 0.671875, "learning_rate": 0.00017184239913136458, "loss": 1.0308, "step": 22335 }, { "epoch": 0.32, "grad_norm": 0.52734375, "learning_rate": 0.00017182498156118075, "loss": 0.969, "step": 22340 }, { "epoch": 0.32, "grad_norm": 0.515625, "learning_rate": 0.00017180755948892524, "loss": 0.8646, "step": 22345 }, { "epoch": 0.32, "grad_norm": 0.58203125, "learning_rate": 0.00017179013291569018, "loss": 0.9084, "step": 22350 }, { "epoch": 0.32, "grad_norm": 0.65625, "learning_rate": 0.00017177270184256775, "loss": 1.0191, "step": 22355 }, { "epoch": 0.32, "grad_norm": 0.578125, "learning_rate": 0.00017175526627065065, "loss": 1.1, "step": 22360 }, { "epoch": 0.32, "grad_norm": 0.6171875, "learning_rate": 0.00017173782620103176, "loss": 1.0031, "step": 22365 }, { "epoch": 0.32, "grad_norm": 0.54296875, "learning_rate": 0.0001717203816348042, "loss": 1.0448, "step": 22370 }, { "epoch": 0.32, "grad_norm": 0.49609375, "learning_rate": 0.00017170293257306148, "loss": 0.8559, "step": 22375 }, { "epoch": 0.32, "grad_norm": 0.5703125, "learning_rate": 0.00017168547901689723, "loss": 0.9636, "step": 22380 }, { "epoch": 0.32, "grad_norm": 0.54296875, "learning_rate": 0.00017166802096740553, "loss": 1.0384, "step": 22385 }, { "epoch": 0.32, "grad_norm": 0.53515625, "learning_rate": 0.00017165055842568067, "loss": 0.9458, "step": 22390 }, { "epoch": 0.32, "grad_norm": 0.58984375, "learning_rate": 0.0001716330913928172, "loss": 1.0262, "step": 22395 }, { "epoch": 0.32, "grad_norm": 0.53125, "learning_rate": 0.00017161561986990995, "loss": 1.0135, "step": 22400 }, { "epoch": 0.32, "grad_norm": 0.61328125, "learning_rate": 0.0001715981438580541, "loss": 1.0603, "step": 22405 }, { "epoch": 0.32, "grad_norm": 0.59375, "learning_rate": 0.00017158066335834507, "loss": 1.0851, "step": 22410 }, { "epoch": 0.32, "grad_norm": 0.59375, "learning_rate": 0.0001715631783718785, "loss": 0.8572, "step": 22415 }, { "epoch": 0.32, "grad_norm": 0.58984375, "learning_rate": 0.00017154568889975042, "loss": 0.9603, "step": 22420 }, { "epoch": 0.32, "grad_norm": 0.6328125, "learning_rate": 0.0001715281949430571, "loss": 1.0527, "step": 22425 }, { "epoch": 0.32, "grad_norm": 0.5859375, "learning_rate": 0.000171510696502895, "loss": 0.8258, "step": 22430 }, { "epoch": 0.32, "grad_norm": 0.5703125, "learning_rate": 0.00017149319358036107, "loss": 1.0781, "step": 22435 }, { "epoch": 0.32, "grad_norm": 0.5546875, "learning_rate": 0.0001714756861765523, "loss": 1.0145, "step": 22440 }, { "epoch": 0.32, "grad_norm": 0.55078125, "learning_rate": 0.00017145817429256612, "loss": 0.8742, "step": 22445 }, { "epoch": 0.32, "grad_norm": 0.6953125, "learning_rate": 0.0001714406579295002, "loss": 0.9866, "step": 22450 }, { "epoch": 0.32, "grad_norm": 0.5234375, "learning_rate": 0.0001714231370884525, "loss": 0.8931, "step": 22455 }, { "epoch": 0.32, "grad_norm": 0.60546875, "learning_rate": 0.00017140561177052117, "loss": 0.9762, "step": 22460 }, { "epoch": 0.32, "grad_norm": 0.58203125, "learning_rate": 0.0001713880819768048, "loss": 1.0102, "step": 22465 }, { "epoch": 0.32, "grad_norm": 0.515625, "learning_rate": 0.00017137054770840213, "loss": 1.1021, "step": 22470 }, { "epoch": 0.32, "grad_norm": 0.625, "learning_rate": 0.00017135300896641229, "loss": 1.1629, "step": 22475 }, { "epoch": 0.32, "grad_norm": 0.5546875, "learning_rate": 0.00017133546575193452, "loss": 1.0134, "step": 22480 }, { "epoch": 0.32, "grad_norm": 0.57421875, "learning_rate": 0.00017131791806606857, "loss": 1.0137, "step": 22485 }, { "epoch": 0.32, "grad_norm": 0.61328125, "learning_rate": 0.00017130036590991426, "loss": 0.9579, "step": 22490 }, { "epoch": 0.32, "grad_norm": 0.55859375, "learning_rate": 0.00017128280928457182, "loss": 0.8749, "step": 22495 }, { "epoch": 0.32, "grad_norm": 0.63671875, "learning_rate": 0.0001712652481911417, "loss": 1.0704, "step": 22500 }, { "epoch": 0.32, "grad_norm": 0.72265625, "learning_rate": 0.00017124768263072467, "loss": 1.0315, "step": 22505 }, { "epoch": 0.32, "grad_norm": 0.6640625, "learning_rate": 0.00017123011260442174, "loss": 1.0279, "step": 22510 }, { "epoch": 0.32, "grad_norm": 0.57421875, "learning_rate": 0.00017121253811333423, "loss": 0.9216, "step": 22515 }, { "epoch": 0.32, "grad_norm": 0.6171875, "learning_rate": 0.00017119495915856375, "loss": 1.0082, "step": 22520 }, { "epoch": 0.32, "grad_norm": 0.5078125, "learning_rate": 0.0001711773757412121, "loss": 0.8658, "step": 22525 }, { "epoch": 0.32, "grad_norm": 0.5234375, "learning_rate": 0.00017115978786238153, "loss": 1.0071, "step": 22530 }, { "epoch": 0.32, "grad_norm": 0.609375, "learning_rate": 0.00017114219552317436, "loss": 0.9824, "step": 22535 }, { "epoch": 0.32, "grad_norm": 0.49609375, "learning_rate": 0.00017112459872469337, "loss": 1.0176, "step": 22540 }, { "epoch": 0.32, "grad_norm": 0.63671875, "learning_rate": 0.00017110699746804154, "loss": 0.9751, "step": 22545 }, { "epoch": 0.32, "grad_norm": 0.5703125, "learning_rate": 0.0001710893917543221, "loss": 0.8675, "step": 22550 }, { "epoch": 0.32, "grad_norm": 0.53515625, "learning_rate": 0.00017107178158463863, "loss": 0.9408, "step": 22555 }, { "epoch": 0.32, "grad_norm": 0.58203125, "learning_rate": 0.00017105416696009497, "loss": 0.9809, "step": 22560 }, { "epoch": 0.32, "grad_norm": 0.578125, "learning_rate": 0.0001710365478817952, "loss": 0.9696, "step": 22565 }, { "epoch": 0.32, "grad_norm": 0.6015625, "learning_rate": 0.0001710189243508437, "loss": 0.9519, "step": 22570 }, { "epoch": 0.32, "grad_norm": 0.490234375, "learning_rate": 0.0001710012963683451, "loss": 1.0546, "step": 22575 }, { "epoch": 0.32, "grad_norm": 0.5703125, "learning_rate": 0.00017098366393540442, "loss": 0.9163, "step": 22580 }, { "epoch": 0.32, "grad_norm": 0.54296875, "learning_rate": 0.00017096602705312682, "loss": 1.0133, "step": 22585 }, { "epoch": 0.32, "grad_norm": 0.53125, "learning_rate": 0.00017094838572261783, "loss": 0.9241, "step": 22590 }, { "epoch": 0.32, "grad_norm": 0.55078125, "learning_rate": 0.00017093073994498318, "loss": 0.9855, "step": 22595 }, { "epoch": 0.32, "grad_norm": 0.546875, "learning_rate": 0.00017091308972132905, "loss": 0.9835, "step": 22600 }, { "epoch": 0.32, "grad_norm": 0.58203125, "learning_rate": 0.00017089543505276162, "loss": 1.0074, "step": 22605 }, { "epoch": 0.32, "grad_norm": 0.55859375, "learning_rate": 0.0001708777759403876, "loss": 0.8852, "step": 22610 }, { "epoch": 0.32, "grad_norm": 0.55859375, "learning_rate": 0.00017086011238531386, "loss": 0.9527, "step": 22615 }, { "epoch": 0.32, "grad_norm": 0.54296875, "learning_rate": 0.0001708424443886476, "loss": 0.9278, "step": 22620 }, { "epoch": 0.32, "grad_norm": 0.6171875, "learning_rate": 0.00017082477195149622, "loss": 0.9386, "step": 22625 }, { "epoch": 0.32, "grad_norm": 0.52734375, "learning_rate": 0.0001708070950749675, "loss": 0.8705, "step": 22630 }, { "epoch": 0.32, "grad_norm": 0.59765625, "learning_rate": 0.00017078941376016938, "loss": 1.0218, "step": 22635 }, { "epoch": 0.32, "grad_norm": 0.6328125, "learning_rate": 0.00017077172800821018, "loss": 1.1595, "step": 22640 }, { "epoch": 0.32, "grad_norm": 0.7890625, "learning_rate": 0.00017075403782019848, "loss": 1.0909, "step": 22645 }, { "epoch": 0.32, "grad_norm": 0.53125, "learning_rate": 0.00017073634319724309, "loss": 0.8685, "step": 22650 }, { "epoch": 0.32, "grad_norm": 0.60546875, "learning_rate": 0.00017071864414045318, "loss": 0.9616, "step": 22655 }, { "epoch": 0.33, "grad_norm": 0.6484375, "learning_rate": 0.0001707009406509381, "loss": 0.9912, "step": 22660 }, { "epoch": 0.33, "grad_norm": 0.5703125, "learning_rate": 0.00017068323272980752, "loss": 0.9322, "step": 22665 }, { "epoch": 0.33, "grad_norm": 0.58984375, "learning_rate": 0.00017066552037817143, "loss": 1.0414, "step": 22670 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00017064780359714004, "loss": 0.9212, "step": 22675 }, { "epoch": 0.33, "grad_norm": 0.671875, "learning_rate": 0.00017063008238782387, "loss": 1.0547, "step": 22680 }, { "epoch": 0.33, "grad_norm": 0.54296875, "learning_rate": 0.00017061235675133366, "loss": 0.8523, "step": 22685 }, { "epoch": 0.33, "grad_norm": 0.5625, "learning_rate": 0.00017059462668878055, "loss": 0.9595, "step": 22690 }, { "epoch": 0.33, "grad_norm": 0.515625, "learning_rate": 0.0001705768922012758, "loss": 0.9903, "step": 22695 }, { "epoch": 0.33, "grad_norm": 0.62109375, "learning_rate": 0.0001705591532899311, "loss": 1.0138, "step": 22700 }, { "epoch": 0.33, "grad_norm": 0.640625, "learning_rate": 0.0001705414099558583, "loss": 0.9734, "step": 22705 }, { "epoch": 0.33, "grad_norm": 0.68359375, "learning_rate": 0.00017052366220016957, "loss": 1.0777, "step": 22710 }, { "epoch": 0.33, "grad_norm": 0.69921875, "learning_rate": 0.0001705059100239774, "loss": 1.0454, "step": 22715 }, { "epoch": 0.33, "grad_norm": 0.59375, "learning_rate": 0.00017048815342839447, "loss": 1.1218, "step": 22720 }, { "epoch": 0.33, "grad_norm": 0.6328125, "learning_rate": 0.00017047039241453382, "loss": 0.9762, "step": 22725 }, { "epoch": 0.33, "grad_norm": 0.61328125, "learning_rate": 0.0001704526269835087, "loss": 0.9931, "step": 22730 }, { "epoch": 0.33, "grad_norm": 0.50390625, "learning_rate": 0.0001704348571364327, "loss": 0.919, "step": 22735 }, { "epoch": 0.33, "grad_norm": 0.5234375, "learning_rate": 0.00017041708287441959, "loss": 0.9993, "step": 22740 }, { "epoch": 0.33, "grad_norm": 0.53515625, "learning_rate": 0.0001703993041985836, "loss": 1.067, "step": 22745 }, { "epoch": 0.33, "grad_norm": 0.515625, "learning_rate": 0.00017038152111003898, "loss": 0.9398, "step": 22750 }, { "epoch": 0.33, "grad_norm": 0.54296875, "learning_rate": 0.00017036373360990048, "loss": 1.1059, "step": 22755 }, { "epoch": 0.33, "grad_norm": 0.6328125, "learning_rate": 0.00017034594169928303, "loss": 1.0214, "step": 22760 }, { "epoch": 0.33, "grad_norm": 0.625, "learning_rate": 0.00017032814537930183, "loss": 0.9617, "step": 22765 }, { "epoch": 0.33, "grad_norm": 0.46484375, "learning_rate": 0.00017031034465107237, "loss": 0.8269, "step": 22770 }, { "epoch": 0.33, "grad_norm": 0.58203125, "learning_rate": 0.00017029253951571046, "loss": 0.8859, "step": 22775 }, { "epoch": 0.33, "grad_norm": 0.5234375, "learning_rate": 0.00017027472997433208, "loss": 0.9395, "step": 22780 }, { "epoch": 0.33, "grad_norm": 0.5546875, "learning_rate": 0.0001702569160280536, "loss": 0.9338, "step": 22785 }, { "epoch": 0.33, "grad_norm": 0.515625, "learning_rate": 0.00017023909767799163, "loss": 0.9962, "step": 22790 }, { "epoch": 0.33, "grad_norm": 0.5234375, "learning_rate": 0.000170221274925263, "loss": 0.8451, "step": 22795 }, { "epoch": 0.33, "grad_norm": 0.6328125, "learning_rate": 0.00017020344777098488, "loss": 1.0406, "step": 22800 }, { "epoch": 0.33, "grad_norm": 0.65625, "learning_rate": 0.0001701856162162747, "loss": 1.0097, "step": 22805 }, { "epoch": 0.33, "grad_norm": 0.69140625, "learning_rate": 0.0001701677802622502, "loss": 0.9807, "step": 22810 }, { "epoch": 0.33, "grad_norm": 0.55078125, "learning_rate": 0.00017014993991002926, "loss": 0.886, "step": 22815 }, { "epoch": 0.33, "grad_norm": 0.546875, "learning_rate": 0.00017013209516073024, "loss": 1.0094, "step": 22820 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00017011424601547158, "loss": 1.0373, "step": 22825 }, { "epoch": 0.33, "grad_norm": 0.462890625, "learning_rate": 0.00017009639247537214, "loss": 0.9939, "step": 22830 }, { "epoch": 0.33, "grad_norm": 0.546875, "learning_rate": 0.00017007853454155102, "loss": 1.0919, "step": 22835 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00017006067221512748, "loss": 0.9612, "step": 22840 }, { "epoch": 0.33, "grad_norm": 0.625, "learning_rate": 0.00017004280549722127, "loss": 0.9532, "step": 22845 }, { "epoch": 0.33, "grad_norm": 0.546875, "learning_rate": 0.0001700249343889522, "loss": 0.8965, "step": 22850 }, { "epoch": 0.33, "grad_norm": 0.60546875, "learning_rate": 0.0001700070588914405, "loss": 1.0316, "step": 22855 }, { "epoch": 0.33, "grad_norm": 0.578125, "learning_rate": 0.00016998917900580665, "loss": 1.1005, "step": 22860 }, { "epoch": 0.33, "grad_norm": 0.515625, "learning_rate": 0.00016997129473317132, "loss": 0.9847, "step": 22865 }, { "epoch": 0.33, "grad_norm": 0.7421875, "learning_rate": 0.00016995340607465554, "loss": 0.8212, "step": 22870 }, { "epoch": 0.33, "grad_norm": 0.5390625, "learning_rate": 0.00016993551303138063, "loss": 0.9523, "step": 22875 }, { "epoch": 0.33, "grad_norm": 0.56640625, "learning_rate": 0.00016991761560446807, "loss": 0.942, "step": 22880 }, { "epoch": 0.33, "grad_norm": 0.5625, "learning_rate": 0.00016989971379503978, "loss": 0.9276, "step": 22885 }, { "epoch": 0.33, "grad_norm": 0.56640625, "learning_rate": 0.0001698818076042178, "loss": 0.9119, "step": 22890 }, { "epoch": 0.33, "grad_norm": 0.55078125, "learning_rate": 0.0001698638970331245, "loss": 0.938, "step": 22895 }, { "epoch": 0.33, "grad_norm": 0.53125, "learning_rate": 0.0001698459820828826, "loss": 0.9448, "step": 22900 }, { "epoch": 0.33, "grad_norm": 0.6953125, "learning_rate": 0.00016982806275461497, "loss": 1.0021, "step": 22905 }, { "epoch": 0.33, "grad_norm": 0.5859375, "learning_rate": 0.00016981013904944487, "loss": 1.0421, "step": 22910 }, { "epoch": 0.33, "grad_norm": 0.63671875, "learning_rate": 0.00016979221096849573, "loss": 1.02, "step": 22915 }, { "epoch": 0.33, "grad_norm": 0.59765625, "learning_rate": 0.00016977427851289133, "loss": 1.1506, "step": 22920 }, { "epoch": 0.33, "grad_norm": 0.66796875, "learning_rate": 0.00016975634168375566, "loss": 0.867, "step": 22925 }, { "epoch": 0.33, "grad_norm": 0.56640625, "learning_rate": 0.00016973840048221307, "loss": 0.999, "step": 22930 }, { "epoch": 0.33, "grad_norm": 0.58203125, "learning_rate": 0.00016972045490938812, "loss": 0.9237, "step": 22935 }, { "epoch": 0.33, "grad_norm": 0.76953125, "learning_rate": 0.00016970250496640564, "loss": 1.0466, "step": 22940 }, { "epoch": 0.33, "grad_norm": 0.49609375, "learning_rate": 0.00016968455065439076, "loss": 0.832, "step": 22945 }, { "epoch": 0.33, "grad_norm": 0.7578125, "learning_rate": 0.00016966659197446889, "loss": 0.9661, "step": 22950 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.0001696486289277657, "loss": 0.8908, "step": 22955 }, { "epoch": 0.33, "grad_norm": 0.54296875, "learning_rate": 0.0001696306615154071, "loss": 0.9853, "step": 22960 }, { "epoch": 0.33, "grad_norm": 0.58984375, "learning_rate": 0.00016961268973851937, "loss": 1.0202, "step": 22965 }, { "epoch": 0.33, "grad_norm": 0.640625, "learning_rate": 0.00016959471359822895, "loss": 0.8762, "step": 22970 }, { "epoch": 0.33, "grad_norm": 0.57421875, "learning_rate": 0.00016957673309566258, "loss": 1.0241, "step": 22975 }, { "epoch": 0.33, "grad_norm": 0.578125, "learning_rate": 0.00016955874823194737, "loss": 1.0761, "step": 22980 }, { "epoch": 0.33, "grad_norm": 0.53125, "learning_rate": 0.0001695407590082106, "loss": 1.0919, "step": 22985 }, { "epoch": 0.33, "grad_norm": 0.61328125, "learning_rate": 0.00016952276542557985, "loss": 1.0389, "step": 22990 }, { "epoch": 0.33, "grad_norm": 0.59375, "learning_rate": 0.000169504767485183, "loss": 1.0269, "step": 22995 }, { "epoch": 0.33, "grad_norm": 0.55078125, "learning_rate": 0.00016948676518814816, "loss": 0.9425, "step": 23000 }, { "epoch": 0.33, "grad_norm": 0.7578125, "learning_rate": 0.0001694687585356037, "loss": 1.1517, "step": 23005 }, { "epoch": 0.33, "grad_norm": 0.6171875, "learning_rate": 0.0001694507475286784, "loss": 0.9304, "step": 23010 }, { "epoch": 0.33, "grad_norm": 0.5546875, "learning_rate": 0.0001694327321685011, "loss": 0.8311, "step": 23015 }, { "epoch": 0.33, "grad_norm": 0.56640625, "learning_rate": 0.0001694147124562011, "loss": 0.9417, "step": 23020 }, { "epoch": 0.33, "grad_norm": 0.5703125, "learning_rate": 0.00016939668839290785, "loss": 0.871, "step": 23025 }, { "epoch": 0.33, "grad_norm": 0.671875, "learning_rate": 0.00016937865997975116, "loss": 0.9416, "step": 23030 }, { "epoch": 0.33, "grad_norm": 0.68359375, "learning_rate": 0.000169360627217861, "loss": 0.9172, "step": 23035 }, { "epoch": 0.33, "grad_norm": 0.515625, "learning_rate": 0.00016934259010836775, "loss": 1.0004, "step": 23040 }, { "epoch": 0.33, "grad_norm": 0.5703125, "learning_rate": 0.000169324548652402, "loss": 1.0068, "step": 23045 }, { "epoch": 0.33, "grad_norm": 0.62890625, "learning_rate": 0.00016930650285109454, "loss": 0.9082, "step": 23050 }, { "epoch": 0.33, "grad_norm": 0.640625, "learning_rate": 0.0001692884527055766, "loss": 0.9053, "step": 23055 }, { "epoch": 0.33, "grad_norm": 0.5625, "learning_rate": 0.0001692703982169795, "loss": 1.0665, "step": 23060 }, { "epoch": 0.33, "grad_norm": 0.54296875, "learning_rate": 0.00016925233938643497, "loss": 1.0995, "step": 23065 }, { "epoch": 0.33, "grad_norm": 0.625, "learning_rate": 0.00016923427621507491, "loss": 0.954, "step": 23070 }, { "epoch": 0.33, "grad_norm": 0.59765625, "learning_rate": 0.0001692162087040316, "loss": 1.0213, "step": 23075 }, { "epoch": 0.33, "grad_norm": 0.56640625, "learning_rate": 0.00016919813685443744, "loss": 0.9033, "step": 23080 }, { "epoch": 0.33, "grad_norm": 0.56640625, "learning_rate": 0.0001691800606674253, "loss": 1.0971, "step": 23085 }, { "epoch": 0.33, "grad_norm": 0.53125, "learning_rate": 0.00016916198014412816, "loss": 1.0208, "step": 23090 }, { "epoch": 0.33, "grad_norm": 0.52734375, "learning_rate": 0.00016914389528567932, "loss": 0.8997, "step": 23095 }, { "epoch": 0.33, "grad_norm": 0.578125, "learning_rate": 0.0001691258060932124, "loss": 0.8185, "step": 23100 }, { "epoch": 0.33, "grad_norm": 0.54296875, "learning_rate": 0.0001691077125678612, "loss": 0.9328, "step": 23105 }, { "epoch": 0.33, "grad_norm": 0.55078125, "learning_rate": 0.0001690896147107599, "loss": 0.8601, "step": 23110 }, { "epoch": 0.33, "grad_norm": 0.5859375, "learning_rate": 0.00016907151252304283, "loss": 0.9554, "step": 23115 }, { "epoch": 0.33, "grad_norm": 0.65234375, "learning_rate": 0.0001690534060058447, "loss": 0.931, "step": 23120 }, { "epoch": 0.33, "grad_norm": 0.5546875, "learning_rate": 0.00016903529516030044, "loss": 0.8989, "step": 23125 }, { "epoch": 0.33, "grad_norm": 0.6015625, "learning_rate": 0.00016901717998754528, "loss": 1.0203, "step": 23130 }, { "epoch": 0.33, "grad_norm": 0.6171875, "learning_rate": 0.00016899906048871462, "loss": 1.0554, "step": 23135 }, { "epoch": 0.33, "grad_norm": 0.53515625, "learning_rate": 0.0001689809366649443, "loss": 1.0396, "step": 23140 }, { "epoch": 0.33, "grad_norm": 0.6015625, "learning_rate": 0.0001689628085173703, "loss": 0.9693, "step": 23145 }, { "epoch": 0.33, "grad_norm": 0.546875, "learning_rate": 0.00016894467604712892, "loss": 0.8956, "step": 23150 }, { "epoch": 0.33, "grad_norm": 0.578125, "learning_rate": 0.00016892653925535672, "loss": 0.9386, "step": 23155 }, { "epoch": 0.33, "grad_norm": 0.60546875, "learning_rate": 0.00016890839814319057, "loss": 1.0394, "step": 23160 }, { "epoch": 0.33, "grad_norm": 0.59375, "learning_rate": 0.0001688902527117675, "loss": 0.9978, "step": 23165 }, { "epoch": 0.33, "grad_norm": 0.484375, "learning_rate": 0.00016887210296222496, "loss": 0.8688, "step": 23170 }, { "epoch": 0.33, "grad_norm": 0.56640625, "learning_rate": 0.00016885394889570057, "loss": 1.0429, "step": 23175 }, { "epoch": 0.33, "grad_norm": 0.66015625, "learning_rate": 0.0001688357905133322, "loss": 1.021, "step": 23180 }, { "epoch": 0.33, "grad_norm": 0.73828125, "learning_rate": 0.00016881762781625813, "loss": 0.8929, "step": 23185 }, { "epoch": 0.33, "grad_norm": 0.66015625, "learning_rate": 0.00016879946080561675, "loss": 1.0132, "step": 23190 }, { "epoch": 0.33, "grad_norm": 0.5625, "learning_rate": 0.0001687812894825468, "loss": 1.0581, "step": 23195 }, { "epoch": 0.33, "grad_norm": 0.54296875, "learning_rate": 0.00016876311384818733, "loss": 0.9398, "step": 23200 }, { "epoch": 0.33, "grad_norm": 0.5390625, "learning_rate": 0.00016874493390367756, "loss": 1.0428, "step": 23205 }, { "epoch": 0.33, "grad_norm": 0.56640625, "learning_rate": 0.000168726749650157, "loss": 0.972, "step": 23210 }, { "epoch": 0.33, "grad_norm": 0.67578125, "learning_rate": 0.00016870856108876554, "loss": 0.9241, "step": 23215 }, { "epoch": 0.33, "grad_norm": 0.61328125, "learning_rate": 0.00016869036822064323, "loss": 0.9303, "step": 23220 }, { "epoch": 0.33, "grad_norm": 0.6328125, "learning_rate": 0.00016867217104693036, "loss": 0.9909, "step": 23225 }, { "epoch": 0.33, "grad_norm": 0.62890625, "learning_rate": 0.0001686539695687676, "loss": 0.9815, "step": 23230 }, { "epoch": 0.33, "grad_norm": 0.578125, "learning_rate": 0.00016863576378729588, "loss": 1.0365, "step": 23235 }, { "epoch": 0.33, "grad_norm": 0.546875, "learning_rate": 0.00016861755370365628, "loss": 0.8689, "step": 23240 }, { "epoch": 0.33, "grad_norm": 0.51171875, "learning_rate": 0.00016859933931899027, "loss": 0.9151, "step": 23245 }, { "epoch": 0.33, "grad_norm": 0.58984375, "learning_rate": 0.00016858112063443956, "loss": 0.9882, "step": 23250 }, { "epoch": 0.33, "grad_norm": 0.59765625, "learning_rate": 0.0001685628976511461, "loss": 1.182, "step": 23255 }, { "epoch": 0.33, "grad_norm": 0.546875, "learning_rate": 0.00016854467037025214, "loss": 1.0126, "step": 23260 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00016852643879290016, "loss": 0.9376, "step": 23265 }, { "epoch": 0.33, "grad_norm": 0.609375, "learning_rate": 0.00016850820292023294, "loss": 0.9557, "step": 23270 }, { "epoch": 0.33, "grad_norm": 0.6015625, "learning_rate": 0.00016848996275339352, "loss": 0.9623, "step": 23275 }, { "epoch": 0.33, "grad_norm": 0.60546875, "learning_rate": 0.00016847171829352528, "loss": 1.0208, "step": 23280 }, { "epoch": 0.33, "grad_norm": 0.5859375, "learning_rate": 0.00016845346954177172, "loss": 0.9661, "step": 23285 }, { "epoch": 0.33, "grad_norm": 0.53515625, "learning_rate": 0.00016843521649927674, "loss": 1.0168, "step": 23290 }, { "epoch": 0.33, "grad_norm": 0.5859375, "learning_rate": 0.00016841695916718443, "loss": 0.9827, "step": 23295 }, { "epoch": 0.33, "grad_norm": 0.61328125, "learning_rate": 0.0001683986975466392, "loss": 1.0155, "step": 23300 }, { "epoch": 0.33, "grad_norm": 0.55078125, "learning_rate": 0.00016838043163878573, "loss": 0.9813, "step": 23305 }, { "epoch": 0.33, "grad_norm": 0.67578125, "learning_rate": 0.00016836216144476893, "loss": 0.8198, "step": 23310 }, { "epoch": 0.33, "grad_norm": 0.5546875, "learning_rate": 0.000168343886965734, "loss": 1.0811, "step": 23315 }, { "epoch": 0.33, "grad_norm": 0.5703125, "learning_rate": 0.00016832560820282636, "loss": 0.9135, "step": 23320 }, { "epoch": 0.33, "grad_norm": 0.5390625, "learning_rate": 0.0001683073251571918, "loss": 0.9304, "step": 23325 }, { "epoch": 0.33, "grad_norm": 0.69140625, "learning_rate": 0.0001682890378299763, "loss": 1.0073, "step": 23330 }, { "epoch": 0.33, "grad_norm": 0.58984375, "learning_rate": 0.00016827074622232616, "loss": 0.99, "step": 23335 }, { "epoch": 0.33, "grad_norm": 0.55859375, "learning_rate": 0.00016825245033538785, "loss": 0.8615, "step": 23340 }, { "epoch": 0.33, "grad_norm": 0.51171875, "learning_rate": 0.00016823415017030825, "loss": 0.8889, "step": 23345 }, { "epoch": 0.33, "grad_norm": 0.6015625, "learning_rate": 0.00016821584572823442, "loss": 0.982, "step": 23350 }, { "epoch": 0.34, "grad_norm": 0.6015625, "learning_rate": 0.00016819753701031363, "loss": 1.0136, "step": 23355 }, { "epoch": 0.34, "grad_norm": 0.5546875, "learning_rate": 0.00016817922401769363, "loss": 1.0189, "step": 23360 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.00016816090675152214, "loss": 1.0293, "step": 23365 }, { "epoch": 0.34, "grad_norm": 0.62109375, "learning_rate": 0.00016814258521294744, "loss": 0.9796, "step": 23370 }, { "epoch": 0.34, "grad_norm": 0.640625, "learning_rate": 0.00016812425940311787, "loss": 0.9483, "step": 23375 }, { "epoch": 0.34, "grad_norm": 0.52734375, "learning_rate": 0.00016810592932318212, "loss": 0.926, "step": 23380 }, { "epoch": 0.34, "grad_norm": 0.5234375, "learning_rate": 0.0001680875949742892, "loss": 0.907, "step": 23385 }, { "epoch": 0.34, "grad_norm": 0.474609375, "learning_rate": 0.0001680692563575882, "loss": 0.9811, "step": 23390 }, { "epoch": 0.34, "grad_norm": 0.5546875, "learning_rate": 0.00016805091347422876, "loss": 1.0047, "step": 23395 }, { "epoch": 0.34, "grad_norm": 0.59375, "learning_rate": 0.00016803256632536053, "loss": 0.8818, "step": 23400 }, { "epoch": 0.34, "grad_norm": 0.609375, "learning_rate": 0.00016801421491213358, "loss": 1.011, "step": 23405 }, { "epoch": 0.34, "grad_norm": 0.5390625, "learning_rate": 0.00016799585923569816, "loss": 0.9497, "step": 23410 }, { "epoch": 0.34, "grad_norm": 0.5625, "learning_rate": 0.00016797749929720485, "loss": 1.0023, "step": 23415 }, { "epoch": 0.34, "grad_norm": 0.5703125, "learning_rate": 0.00016795913509780447, "loss": 0.8086, "step": 23420 }, { "epoch": 0.34, "grad_norm": 0.6015625, "learning_rate": 0.0001679407666386481, "loss": 0.9474, "step": 23425 }, { "epoch": 0.34, "grad_norm": 0.5, "learning_rate": 0.00016792239392088708, "loss": 1.0872, "step": 23430 }, { "epoch": 0.34, "grad_norm": 0.56640625, "learning_rate": 0.00016790401694567305, "loss": 0.9645, "step": 23435 }, { "epoch": 0.34, "grad_norm": 0.6171875, "learning_rate": 0.00016788563571415793, "loss": 0.9173, "step": 23440 }, { "epoch": 0.34, "grad_norm": 0.61328125, "learning_rate": 0.00016786725022749382, "loss": 0.8202, "step": 23445 }, { "epoch": 0.34, "grad_norm": 0.56640625, "learning_rate": 0.00016784886048683322, "loss": 0.9017, "step": 23450 }, { "epoch": 0.34, "grad_norm": 0.6484375, "learning_rate": 0.00016783046649332872, "loss": 1.0489, "step": 23455 }, { "epoch": 0.34, "grad_norm": 0.5859375, "learning_rate": 0.00016781206824813337, "loss": 0.923, "step": 23460 }, { "epoch": 0.34, "grad_norm": 0.7265625, "learning_rate": 0.00016779366575240032, "loss": 0.9346, "step": 23465 }, { "epoch": 0.34, "grad_norm": 0.59765625, "learning_rate": 0.0001677752590072831, "loss": 0.9595, "step": 23470 }, { "epoch": 0.34, "grad_norm": 0.60546875, "learning_rate": 0.00016775684801393546, "loss": 0.8984, "step": 23475 }, { "epoch": 0.34, "grad_norm": 0.56640625, "learning_rate": 0.00016773843277351138, "loss": 1.0412, "step": 23480 }, { "epoch": 0.34, "grad_norm": 0.59765625, "learning_rate": 0.00016772001328716523, "loss": 0.9927, "step": 23485 }, { "epoch": 0.34, "grad_norm": 0.53515625, "learning_rate": 0.00016770158955605152, "loss": 0.8891, "step": 23490 }, { "epoch": 0.34, "grad_norm": 0.70703125, "learning_rate": 0.00016768316158132505, "loss": 1.0668, "step": 23495 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.00016766472936414093, "loss": 0.9172, "step": 23500 }, { "epoch": 0.34, "grad_norm": 0.57421875, "learning_rate": 0.0001676462929056545, "loss": 0.9436, "step": 23505 }, { "epoch": 0.34, "grad_norm": 0.640625, "learning_rate": 0.00016762785220702142, "loss": 1.0329, "step": 23510 }, { "epoch": 0.34, "grad_norm": 0.55078125, "learning_rate": 0.0001676094072693975, "loss": 0.862, "step": 23515 }, { "epoch": 0.34, "grad_norm": 0.6015625, "learning_rate": 0.000167590958093939, "loss": 1.1708, "step": 23520 }, { "epoch": 0.34, "grad_norm": 0.578125, "learning_rate": 0.0001675725046818022, "loss": 1.0274, "step": 23525 }, { "epoch": 0.34, "grad_norm": 0.578125, "learning_rate": 0.00016755404703414388, "loss": 0.9976, "step": 23530 }, { "epoch": 0.34, "grad_norm": 0.6328125, "learning_rate": 0.00016753558515212095, "loss": 1.0193, "step": 23535 }, { "epoch": 0.34, "grad_norm": 0.5390625, "learning_rate": 0.00016751711903689062, "loss": 1.018, "step": 23540 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.00016749864868961038, "loss": 1.0502, "step": 23545 }, { "epoch": 0.34, "grad_norm": 0.625, "learning_rate": 0.00016748017411143798, "loss": 1.0112, "step": 23550 }, { "epoch": 0.34, "grad_norm": 0.56640625, "learning_rate": 0.00016746169530353137, "loss": 0.9149, "step": 23555 }, { "epoch": 0.34, "grad_norm": 0.578125, "learning_rate": 0.00016744321226704888, "loss": 1.0269, "step": 23560 }, { "epoch": 0.34, "grad_norm": 0.5234375, "learning_rate": 0.00016742472500314904, "loss": 0.8589, "step": 23565 }, { "epoch": 0.34, "grad_norm": 0.6171875, "learning_rate": 0.00016740623351299067, "loss": 0.9598, "step": 23570 }, { "epoch": 0.34, "grad_norm": 0.65625, "learning_rate": 0.00016738773779773278, "loss": 1.0552, "step": 23575 }, { "epoch": 0.34, "grad_norm": 0.5546875, "learning_rate": 0.00016736923785853476, "loss": 1.0401, "step": 23580 }, { "epoch": 0.34, "grad_norm": 0.6640625, "learning_rate": 0.00016735073369655615, "loss": 1.0078, "step": 23585 }, { "epoch": 0.34, "grad_norm": 0.61328125, "learning_rate": 0.0001673322253129569, "loss": 0.9583, "step": 23590 }, { "epoch": 0.34, "grad_norm": 0.5390625, "learning_rate": 0.00016731371270889707, "loss": 0.9447, "step": 23595 }, { "epoch": 0.34, "grad_norm": 0.53125, "learning_rate": 0.00016729519588553704, "loss": 0.8466, "step": 23600 }, { "epoch": 0.34, "grad_norm": 0.53515625, "learning_rate": 0.00016727667484403748, "loss": 0.9608, "step": 23605 }, { "epoch": 0.34, "grad_norm": 0.6015625, "learning_rate": 0.00016725814958555932, "loss": 0.9509, "step": 23610 }, { "epoch": 0.34, "grad_norm": 0.6015625, "learning_rate": 0.00016723962011126376, "loss": 0.9129, "step": 23615 }, { "epoch": 0.34, "grad_norm": 0.51171875, "learning_rate": 0.00016722108642231224, "loss": 0.9439, "step": 23620 }, { "epoch": 0.34, "grad_norm": 0.73046875, "learning_rate": 0.00016720254851986647, "loss": 1.2049, "step": 23625 }, { "epoch": 0.34, "grad_norm": 0.578125, "learning_rate": 0.0001671840064050884, "loss": 1.0429, "step": 23630 }, { "epoch": 0.34, "grad_norm": 0.6328125, "learning_rate": 0.0001671654600791403, "loss": 0.9667, "step": 23635 }, { "epoch": 0.34, "grad_norm": 0.5546875, "learning_rate": 0.00016714690954318465, "loss": 0.9415, "step": 23640 }, { "epoch": 0.34, "grad_norm": 0.5625, "learning_rate": 0.00016712835479838428, "loss": 0.9917, "step": 23645 }, { "epoch": 0.34, "grad_norm": 0.50390625, "learning_rate": 0.00016710979584590215, "loss": 0.9524, "step": 23650 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.00016709123268690158, "loss": 1.0102, "step": 23655 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.00016707266532254615, "loss": 1.0184, "step": 23660 }, { "epoch": 0.34, "grad_norm": 0.5546875, "learning_rate": 0.00016705409375399963, "loss": 0.9577, "step": 23665 }, { "epoch": 0.34, "grad_norm": 0.55859375, "learning_rate": 0.00016703551798242621, "loss": 0.9503, "step": 23670 }, { "epoch": 0.34, "grad_norm": 0.5078125, "learning_rate": 0.00016701693800899014, "loss": 1.2045, "step": 23675 }, { "epoch": 0.34, "grad_norm": 0.515625, "learning_rate": 0.00016699835383485604, "loss": 1.0171, "step": 23680 }, { "epoch": 0.34, "grad_norm": 0.52734375, "learning_rate": 0.00016697976546118886, "loss": 0.859, "step": 23685 }, { "epoch": 0.34, "grad_norm": 0.55859375, "learning_rate": 0.00016696117288915368, "loss": 0.9017, "step": 23690 }, { "epoch": 0.34, "grad_norm": 0.53125, "learning_rate": 0.00016694257611991594, "loss": 1.0393, "step": 23695 }, { "epoch": 0.34, "grad_norm": 0.546875, "learning_rate": 0.00016692397515464125, "loss": 0.9315, "step": 23700 }, { "epoch": 0.34, "grad_norm": 0.703125, "learning_rate": 0.00016690536999449561, "loss": 0.933, "step": 23705 }, { "epoch": 0.34, "grad_norm": 0.5078125, "learning_rate": 0.00016688676064064516, "loss": 0.8206, "step": 23710 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.0001668681470942564, "loss": 0.9649, "step": 23715 }, { "epoch": 0.34, "grad_norm": 0.53515625, "learning_rate": 0.000166849529356496, "loss": 0.939, "step": 23720 }, { "epoch": 0.34, "grad_norm": 0.51953125, "learning_rate": 0.00016683090742853097, "loss": 0.9569, "step": 23725 }, { "epoch": 0.34, "grad_norm": 0.55078125, "learning_rate": 0.00016681228131152856, "loss": 0.9348, "step": 23730 }, { "epoch": 0.34, "grad_norm": 0.6015625, "learning_rate": 0.00016679365100665626, "loss": 1.0092, "step": 23735 }, { "epoch": 0.34, "grad_norm": 0.5390625, "learning_rate": 0.00016677501651508184, "loss": 0.9932, "step": 23740 }, { "epoch": 0.34, "grad_norm": 0.62890625, "learning_rate": 0.0001667563778379733, "loss": 0.9631, "step": 23745 }, { "epoch": 0.34, "grad_norm": 0.5234375, "learning_rate": 0.000166737734976499, "loss": 1.0865, "step": 23750 }, { "epoch": 0.34, "grad_norm": 0.55859375, "learning_rate": 0.0001667190879318275, "loss": 1.0609, "step": 23755 }, { "epoch": 0.34, "grad_norm": 0.6015625, "learning_rate": 0.00016670043670512753, "loss": 1.0621, "step": 23760 }, { "epoch": 0.34, "grad_norm": 0.66015625, "learning_rate": 0.00016668178129756824, "loss": 1.0944, "step": 23765 }, { "epoch": 0.34, "grad_norm": 0.5703125, "learning_rate": 0.00016666312171031896, "loss": 0.8663, "step": 23770 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.00016664445794454928, "loss": 1.04, "step": 23775 }, { "epoch": 0.34, "grad_norm": 0.5703125, "learning_rate": 0.00016662579000142907, "loss": 0.93, "step": 23780 }, { "epoch": 0.34, "grad_norm": 0.53515625, "learning_rate": 0.00016660711788212847, "loss": 0.9787, "step": 23785 }, { "epoch": 0.34, "grad_norm": 0.640625, "learning_rate": 0.0001665884415878179, "loss": 0.9356, "step": 23790 }, { "epoch": 0.34, "grad_norm": 0.546875, "learning_rate": 0.0001665697611196679, "loss": 0.9909, "step": 23795 }, { "epoch": 0.34, "grad_norm": 0.58203125, "learning_rate": 0.00016655107647884946, "loss": 0.9665, "step": 23800 }, { "epoch": 0.34, "grad_norm": 0.5625, "learning_rate": 0.0001665323876665338, "loss": 0.774, "step": 23805 }, { "epoch": 0.34, "grad_norm": 0.55859375, "learning_rate": 0.00016651369468389228, "loss": 1.0398, "step": 23810 }, { "epoch": 0.34, "grad_norm": 0.68359375, "learning_rate": 0.00016649499753209666, "loss": 1.0225, "step": 23815 }, { "epoch": 0.34, "grad_norm": 0.56640625, "learning_rate": 0.00016647629621231882, "loss": 0.8501, "step": 23820 }, { "epoch": 0.34, "grad_norm": 0.53125, "learning_rate": 0.00016645759072573104, "loss": 1.0198, "step": 23825 }, { "epoch": 0.34, "grad_norm": 0.55859375, "learning_rate": 0.00016643888107350577, "loss": 0.9066, "step": 23830 }, { "epoch": 0.34, "grad_norm": 0.52734375, "learning_rate": 0.0001664201672568158, "loss": 0.9364, "step": 23835 }, { "epoch": 0.34, "grad_norm": 0.5390625, "learning_rate": 0.00016640144927683407, "loss": 0.8964, "step": 23840 }, { "epoch": 0.34, "grad_norm": 0.515625, "learning_rate": 0.00016638272713473387, "loss": 0.9231, "step": 23845 }, { "epoch": 0.34, "grad_norm": 0.59375, "learning_rate": 0.00016636400083168878, "loss": 1.0691, "step": 23850 }, { "epoch": 0.34, "grad_norm": 0.5234375, "learning_rate": 0.00016634527036887245, "loss": 0.8508, "step": 23855 }, { "epoch": 0.34, "grad_norm": 0.55078125, "learning_rate": 0.0001663265357474591, "loss": 0.9175, "step": 23860 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.0001663077969686229, "loss": 0.927, "step": 23865 }, { "epoch": 0.34, "grad_norm": 0.498046875, "learning_rate": 0.0001662890540335385, "loss": 0.9039, "step": 23870 }, { "epoch": 0.34, "grad_norm": 0.515625, "learning_rate": 0.00016627030694338067, "loss": 1.0943, "step": 23875 }, { "epoch": 0.34, "grad_norm": 0.609375, "learning_rate": 0.00016625155569932455, "loss": 0.9051, "step": 23880 }, { "epoch": 0.34, "grad_norm": 0.5234375, "learning_rate": 0.00016623280030254542, "loss": 0.9562, "step": 23885 }, { "epoch": 0.34, "grad_norm": 0.55078125, "learning_rate": 0.000166214040754219, "loss": 0.9317, "step": 23890 }, { "epoch": 0.34, "grad_norm": 0.53125, "learning_rate": 0.00016619527705552103, "loss": 0.8951, "step": 23895 }, { "epoch": 0.34, "grad_norm": 0.58984375, "learning_rate": 0.00016617650920762773, "loss": 0.8851, "step": 23900 }, { "epoch": 0.34, "grad_norm": 0.5703125, "learning_rate": 0.00016615773721171545, "loss": 1.0109, "step": 23905 }, { "epoch": 0.34, "grad_norm": 0.58984375, "learning_rate": 0.00016613896106896085, "loss": 1.1413, "step": 23910 }, { "epoch": 0.34, "grad_norm": 0.6796875, "learning_rate": 0.0001661201807805409, "loss": 1.0307, "step": 23915 }, { "epoch": 0.34, "grad_norm": 0.66796875, "learning_rate": 0.00016610139634763265, "loss": 0.9929, "step": 23920 }, { "epoch": 0.34, "grad_norm": 0.51953125, "learning_rate": 0.00016608260777141361, "loss": 0.8565, "step": 23925 }, { "epoch": 0.34, "grad_norm": 0.58203125, "learning_rate": 0.00016606381505306149, "loss": 1.0399, "step": 23930 }, { "epoch": 0.34, "grad_norm": 0.609375, "learning_rate": 0.00016604501819375415, "loss": 1.004, "step": 23935 }, { "epoch": 0.34, "grad_norm": 0.546875, "learning_rate": 0.00016602621719466988, "loss": 0.9961, "step": 23940 }, { "epoch": 0.34, "grad_norm": 0.59765625, "learning_rate": 0.00016600741205698714, "loss": 0.9562, "step": 23945 }, { "epoch": 0.34, "grad_norm": 0.515625, "learning_rate": 0.00016598860278188457, "loss": 0.9537, "step": 23950 }, { "epoch": 0.34, "grad_norm": 0.58203125, "learning_rate": 0.00016596978937054129, "loss": 1.1004, "step": 23955 }, { "epoch": 0.34, "grad_norm": 0.5234375, "learning_rate": 0.00016595097182413643, "loss": 0.9118, "step": 23960 }, { "epoch": 0.34, "grad_norm": 0.546875, "learning_rate": 0.00016593215014384957, "loss": 0.9165, "step": 23965 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.00016591332433086044, "loss": 1.1373, "step": 23970 }, { "epoch": 0.34, "grad_norm": 0.494140625, "learning_rate": 0.0001658944943863491, "loss": 0.9159, "step": 23975 }, { "epoch": 0.34, "grad_norm": 0.59375, "learning_rate": 0.00016587566031149576, "loss": 0.8789, "step": 23980 }, { "epoch": 0.34, "grad_norm": 0.5859375, "learning_rate": 0.00016585682210748103, "loss": 1.0135, "step": 23985 }, { "epoch": 0.34, "grad_norm": 0.59375, "learning_rate": 0.0001658379797754857, "loss": 0.864, "step": 23990 }, { "epoch": 0.34, "grad_norm": 0.6015625, "learning_rate": 0.0001658191333166908, "loss": 1.0405, "step": 23995 }, { "epoch": 0.34, "grad_norm": 0.5625, "learning_rate": 0.00016580028273227763, "loss": 0.909, "step": 24000 }, { "epoch": 0.34, "grad_norm": 0.59375, "learning_rate": 0.0001657814280234278, "loss": 0.8624, "step": 24005 }, { "epoch": 0.34, "grad_norm": 0.5859375, "learning_rate": 0.00016576256919132321, "loss": 0.9113, "step": 24010 }, { "epoch": 0.34, "grad_norm": 0.59375, "learning_rate": 0.00016574370623714582, "loss": 0.9907, "step": 24015 }, { "epoch": 0.34, "grad_norm": 0.5546875, "learning_rate": 0.00016572483916207808, "loss": 0.9848, "step": 24020 }, { "epoch": 0.34, "grad_norm": 0.69921875, "learning_rate": 0.00016570596796730257, "loss": 0.9872, "step": 24025 }, { "epoch": 0.34, "grad_norm": 0.5625, "learning_rate": 0.00016568709265400212, "loss": 0.9887, "step": 24030 }, { "epoch": 0.34, "grad_norm": 0.6171875, "learning_rate": 0.00016566821322335992, "loss": 0.9673, "step": 24035 }, { "epoch": 0.34, "grad_norm": 0.609375, "learning_rate": 0.00016564932967655933, "loss": 0.9441, "step": 24040 }, { "epoch": 0.34, "grad_norm": 0.65234375, "learning_rate": 0.00016563044201478396, "loss": 1.0899, "step": 24045 }, { "epoch": 0.34, "grad_norm": 0.50390625, "learning_rate": 0.0001656115502392178, "loss": 0.8546, "step": 24050 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.00016559265435104486, "loss": 0.8485, "step": 24055 }, { "epoch": 0.35, "grad_norm": 0.5859375, "learning_rate": 0.00016557375435144968, "loss": 0.9084, "step": 24060 }, { "epoch": 0.35, "grad_norm": 0.5703125, "learning_rate": 0.00016555485024161693, "loss": 0.8803, "step": 24065 }, { "epoch": 0.35, "grad_norm": 0.6328125, "learning_rate": 0.00016553594202273146, "loss": 0.949, "step": 24070 }, { "epoch": 0.35, "grad_norm": 0.59765625, "learning_rate": 0.00016551702969597854, "loss": 1.1374, "step": 24075 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016549811326254353, "loss": 1.0132, "step": 24080 }, { "epoch": 0.35, "grad_norm": 0.546875, "learning_rate": 0.00016547919272361222, "loss": 0.9281, "step": 24085 }, { "epoch": 0.35, "grad_norm": 0.5234375, "learning_rate": 0.00016546026808037054, "loss": 0.8351, "step": 24090 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.0001654413393340047, "loss": 0.9989, "step": 24095 }, { "epoch": 0.35, "grad_norm": 0.53515625, "learning_rate": 0.00016542240648570114, "loss": 0.9545, "step": 24100 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016540346953664668, "loss": 0.9323, "step": 24105 }, { "epoch": 0.35, "grad_norm": 0.5859375, "learning_rate": 0.00016538452848802824, "loss": 1.0501, "step": 24110 }, { "epoch": 0.35, "grad_norm": 0.5625, "learning_rate": 0.0001653655833410331, "loss": 0.9696, "step": 24115 }, { "epoch": 0.35, "grad_norm": 0.609375, "learning_rate": 0.00016534663409684876, "loss": 0.9939, "step": 24120 }, { "epoch": 0.35, "grad_norm": 0.5625, "learning_rate": 0.00016532768075666295, "loss": 1.0044, "step": 24125 }, { "epoch": 0.35, "grad_norm": 0.5390625, "learning_rate": 0.0001653087233216637, "loss": 0.9425, "step": 24130 }, { "epoch": 0.35, "grad_norm": 0.60546875, "learning_rate": 0.00016528976179303932, "loss": 0.9097, "step": 24135 }, { "epoch": 0.35, "grad_norm": 0.66015625, "learning_rate": 0.0001652707961719783, "loss": 0.927, "step": 24140 }, { "epoch": 0.35, "grad_norm": 0.63671875, "learning_rate": 0.0001652518264596694, "loss": 1.0928, "step": 24145 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.00016523285265730177, "loss": 1.136, "step": 24150 }, { "epoch": 0.35, "grad_norm": 0.640625, "learning_rate": 0.00016521387476606462, "loss": 1.1036, "step": 24155 }, { "epoch": 0.35, "grad_norm": 0.515625, "learning_rate": 0.0001651948927871475, "loss": 0.9845, "step": 24160 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016517590672174027, "loss": 0.8934, "step": 24165 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016515691657103298, "loss": 0.9262, "step": 24170 }, { "epoch": 0.35, "grad_norm": 0.56640625, "learning_rate": 0.00016513792233621594, "loss": 0.9136, "step": 24175 }, { "epoch": 0.35, "grad_norm": 0.53125, "learning_rate": 0.00016511892401847975, "loss": 0.9593, "step": 24180 }, { "epoch": 0.35, "grad_norm": 0.5546875, "learning_rate": 0.00016509992161901525, "loss": 0.9288, "step": 24185 }, { "epoch": 0.35, "grad_norm": 0.83203125, "learning_rate": 0.0001650809151390135, "loss": 0.9901, "step": 24190 }, { "epoch": 0.35, "grad_norm": 0.5546875, "learning_rate": 0.00016506190457966585, "loss": 0.9776, "step": 24195 }, { "epoch": 0.35, "grad_norm": 0.66796875, "learning_rate": 0.00016504288994216397, "loss": 0.9804, "step": 24200 }, { "epoch": 0.35, "grad_norm": 0.5390625, "learning_rate": 0.00016502387122769964, "loss": 0.8687, "step": 24205 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.000165004848437465, "loss": 0.9509, "step": 24210 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016498582157265245, "loss": 0.9531, "step": 24215 }, { "epoch": 0.35, "grad_norm": 0.5390625, "learning_rate": 0.00016496679063445456, "loss": 0.9361, "step": 24220 }, { "epoch": 0.35, "grad_norm": 0.51953125, "learning_rate": 0.00016494775562406425, "loss": 0.841, "step": 24225 }, { "epoch": 0.35, "grad_norm": 0.69140625, "learning_rate": 0.0001649287165426747, "loss": 1.1791, "step": 24230 }, { "epoch": 0.35, "grad_norm": 0.578125, "learning_rate": 0.0001649096733914792, "loss": 0.9415, "step": 24235 }, { "epoch": 0.35, "grad_norm": 0.5234375, "learning_rate": 0.00016489062617167145, "loss": 0.9468, "step": 24240 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016487157488444536, "loss": 0.9885, "step": 24245 }, { "epoch": 0.35, "grad_norm": 0.68359375, "learning_rate": 0.00016485251953099505, "loss": 0.95, "step": 24250 }, { "epoch": 0.35, "grad_norm": 0.69140625, "learning_rate": 0.00016483346011251498, "loss": 1.0973, "step": 24255 }, { "epoch": 0.35, "grad_norm": 0.61328125, "learning_rate": 0.0001648143966301998, "loss": 0.9447, "step": 24260 }, { "epoch": 0.35, "grad_norm": 0.5703125, "learning_rate": 0.00016479532908524438, "loss": 0.9326, "step": 24265 }, { "epoch": 0.35, "grad_norm": 0.6171875, "learning_rate": 0.000164776257478844, "loss": 0.909, "step": 24270 }, { "epoch": 0.35, "grad_norm": 0.625, "learning_rate": 0.00016475718181219398, "loss": 0.985, "step": 24275 }, { "epoch": 0.35, "grad_norm": 0.546875, "learning_rate": 0.00016473810208649003, "loss": 0.8886, "step": 24280 }, { "epoch": 0.35, "grad_norm": 0.578125, "learning_rate": 0.00016471901830292815, "loss": 1.0953, "step": 24285 }, { "epoch": 0.35, "grad_norm": 0.61328125, "learning_rate": 0.0001646999304627045, "loss": 0.9343, "step": 24290 }, { "epoch": 0.35, "grad_norm": 0.57421875, "learning_rate": 0.0001646808385670155, "loss": 0.9034, "step": 24295 }, { "epoch": 0.35, "grad_norm": 0.625, "learning_rate": 0.00016466174261705785, "loss": 0.9041, "step": 24300 }, { "epoch": 0.35, "grad_norm": 0.609375, "learning_rate": 0.00016464264261402859, "loss": 0.8977, "step": 24305 }, { "epoch": 0.35, "grad_norm": 0.55078125, "learning_rate": 0.0001646235385591248, "loss": 1.0345, "step": 24310 }, { "epoch": 0.35, "grad_norm": 0.494140625, "learning_rate": 0.0001646044304535441, "loss": 1.0311, "step": 24315 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016458531829848406, "loss": 1.018, "step": 24320 }, { "epoch": 0.35, "grad_norm": 0.69921875, "learning_rate": 0.0001645662020951427, "loss": 0.8999, "step": 24325 }, { "epoch": 0.35, "grad_norm": 0.59765625, "learning_rate": 0.0001645470818447183, "loss": 1.0338, "step": 24330 }, { "epoch": 0.35, "grad_norm": 0.69921875, "learning_rate": 0.00016452795754840928, "loss": 0.9788, "step": 24335 }, { "epoch": 0.35, "grad_norm": 0.56640625, "learning_rate": 0.0001645088292074144, "loss": 0.9003, "step": 24340 }, { "epoch": 0.35, "grad_norm": 0.59765625, "learning_rate": 0.0001644896968229326, "loss": 0.9124, "step": 24345 }, { "epoch": 0.35, "grad_norm": 0.5390625, "learning_rate": 0.0001644705603961632, "loss": 0.9349, "step": 24350 }, { "epoch": 0.35, "grad_norm": 0.56640625, "learning_rate": 0.00016445141992830562, "loss": 0.9792, "step": 24355 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.00016443227542055965, "loss": 1.0906, "step": 24360 }, { "epoch": 0.35, "grad_norm": 0.59375, "learning_rate": 0.00016441312687412527, "loss": 0.8854, "step": 24365 }, { "epoch": 0.35, "grad_norm": 0.63671875, "learning_rate": 0.00016439397429020272, "loss": 1.1025, "step": 24370 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016437481766999254, "loss": 0.9103, "step": 24375 }, { "epoch": 0.35, "grad_norm": 0.59765625, "learning_rate": 0.00016435565701469548, "loss": 1.1122, "step": 24380 }, { "epoch": 0.35, "grad_norm": 0.5703125, "learning_rate": 0.00016433649232551253, "loss": 0.9879, "step": 24385 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016431732360364497, "loss": 0.9769, "step": 24390 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.0001642981508502943, "loss": 0.8533, "step": 24395 }, { "epoch": 0.35, "grad_norm": 0.55859375, "learning_rate": 0.00016427897406666233, "loss": 1.0708, "step": 24400 }, { "epoch": 0.35, "grad_norm": 0.5078125, "learning_rate": 0.00016425979325395104, "loss": 0.8953, "step": 24405 }, { "epoch": 0.35, "grad_norm": 0.55859375, "learning_rate": 0.00016424060841336275, "loss": 0.9551, "step": 24410 }, { "epoch": 0.35, "grad_norm": 0.56640625, "learning_rate": 0.00016422141954609994, "loss": 1.033, "step": 24415 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.0001642022266533654, "loss": 1.1218, "step": 24420 }, { "epoch": 0.35, "grad_norm": 0.6015625, "learning_rate": 0.00016418302973636223, "loss": 0.8115, "step": 24425 }, { "epoch": 0.35, "grad_norm": 0.65625, "learning_rate": 0.0001641638287962936, "loss": 1.0355, "step": 24430 }, { "epoch": 0.35, "grad_norm": 0.5234375, "learning_rate": 0.00016414462383436312, "loss": 0.9497, "step": 24435 }, { "epoch": 0.35, "grad_norm": 0.59765625, "learning_rate": 0.00016412541485177456, "loss": 0.9697, "step": 24440 }, { "epoch": 0.35, "grad_norm": 0.703125, "learning_rate": 0.00016410620184973196, "loss": 1.0461, "step": 24445 }, { "epoch": 0.35, "grad_norm": 0.6015625, "learning_rate": 0.00016408698482943962, "loss": 0.9266, "step": 24450 }, { "epoch": 0.35, "grad_norm": 0.62890625, "learning_rate": 0.0001640677637921021, "loss": 0.9798, "step": 24455 }, { "epoch": 0.35, "grad_norm": 0.56640625, "learning_rate": 0.00016404853873892416, "loss": 0.9772, "step": 24460 }, { "epoch": 0.35, "grad_norm": 0.57421875, "learning_rate": 0.00016402930967111088, "loss": 0.8672, "step": 24465 }, { "epoch": 0.35, "grad_norm": 0.62890625, "learning_rate": 0.00016401007658986753, "loss": 1.0654, "step": 24470 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.00016399083949639968, "loss": 0.9599, "step": 24475 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.00016397159839191315, "loss": 1.083, "step": 24480 }, { "epoch": 0.35, "grad_norm": 0.5546875, "learning_rate": 0.00016395235327761395, "loss": 0.9926, "step": 24485 }, { "epoch": 0.35, "grad_norm": 0.59765625, "learning_rate": 0.00016393310415470844, "loss": 1.0446, "step": 24490 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.00016391385102440314, "loss": 1.0361, "step": 24495 }, { "epoch": 0.35, "grad_norm": 0.578125, "learning_rate": 0.00016389459388790485, "loss": 1.0226, "step": 24500 }, { "epoch": 0.35, "grad_norm": 0.62109375, "learning_rate": 0.00016387533274642065, "loss": 0.9835, "step": 24505 }, { "epoch": 0.35, "grad_norm": 0.60546875, "learning_rate": 0.00016385606760115784, "loss": 1.0376, "step": 24510 }, { "epoch": 0.35, "grad_norm": 0.56640625, "learning_rate": 0.00016383679845332401, "loss": 0.9766, "step": 24515 }, { "epoch": 0.35, "grad_norm": 0.546875, "learning_rate": 0.00016381752530412693, "loss": 0.9625, "step": 24520 }, { "epoch": 0.35, "grad_norm": 0.5859375, "learning_rate": 0.00016379824815477466, "loss": 1.0135, "step": 24525 }, { "epoch": 0.35, "grad_norm": 0.671875, "learning_rate": 0.0001637789670064756, "loss": 0.905, "step": 24530 }, { "epoch": 0.35, "grad_norm": 0.53125, "learning_rate": 0.0001637596818604382, "loss": 0.963, "step": 24535 }, { "epoch": 0.35, "grad_norm": 0.56640625, "learning_rate": 0.00016374039271787133, "loss": 0.9131, "step": 24540 }, { "epoch": 0.35, "grad_norm": 0.75, "learning_rate": 0.00016372109957998404, "loss": 0.8397, "step": 24545 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016370180244798567, "loss": 0.7781, "step": 24550 }, { "epoch": 0.35, "grad_norm": 0.5546875, "learning_rate": 0.00016368250132308578, "loss": 0.9317, "step": 24555 }, { "epoch": 0.35, "grad_norm": 0.5234375, "learning_rate": 0.0001636631962064942, "loss": 0.9688, "step": 24560 }, { "epoch": 0.35, "grad_norm": 0.91015625, "learning_rate": 0.00016364388709942093, "loss": 1.1305, "step": 24565 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016362457400307637, "loss": 0.9365, "step": 24570 }, { "epoch": 0.35, "grad_norm": 0.58984375, "learning_rate": 0.000163605256918671, "loss": 1.0317, "step": 24575 }, { "epoch": 0.35, "grad_norm": 1.0703125, "learning_rate": 0.00016358593584741576, "loss": 1.0129, "step": 24580 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016356661079052157, "loss": 0.882, "step": 24585 }, { "epoch": 0.35, "grad_norm": 0.5625, "learning_rate": 0.00016354728174919984, "loss": 0.9529, "step": 24590 }, { "epoch": 0.35, "grad_norm": 0.53515625, "learning_rate": 0.00016352794872466215, "loss": 0.9644, "step": 24595 }, { "epoch": 0.35, "grad_norm": 0.6015625, "learning_rate": 0.00016350861171812023, "loss": 0.8416, "step": 24600 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.00016348927073078624, "loss": 0.9917, "step": 24605 }, { "epoch": 0.35, "grad_norm": 0.57421875, "learning_rate": 0.00016346992576387242, "loss": 1.1237, "step": 24610 }, { "epoch": 0.35, "grad_norm": 0.578125, "learning_rate": 0.0001634505768185914, "loss": 0.8601, "step": 24615 }, { "epoch": 0.35, "grad_norm": 0.6328125, "learning_rate": 0.00016343122389615594, "loss": 1.0272, "step": 24620 }, { "epoch": 0.35, "grad_norm": 0.5859375, "learning_rate": 0.00016341186699777912, "loss": 0.9921, "step": 24625 }, { "epoch": 0.35, "grad_norm": 0.578125, "learning_rate": 0.00016339250612467426, "loss": 0.7782, "step": 24630 }, { "epoch": 0.35, "grad_norm": 0.462890625, "learning_rate": 0.00016337314127805495, "loss": 0.9831, "step": 24635 }, { "epoch": 0.35, "grad_norm": 0.53125, "learning_rate": 0.0001633537724591349, "loss": 0.8844, "step": 24640 }, { "epoch": 0.35, "grad_norm": 0.6875, "learning_rate": 0.00016333439966912828, "loss": 0.9384, "step": 24645 }, { "epoch": 0.35, "grad_norm": 0.55859375, "learning_rate": 0.00016331502290924937, "loss": 0.8616, "step": 24650 }, { "epoch": 0.35, "grad_norm": 0.51953125, "learning_rate": 0.00016329564218071273, "loss": 0.7969, "step": 24655 }, { "epoch": 0.35, "grad_norm": 0.56640625, "learning_rate": 0.0001632762574847331, "loss": 1.045, "step": 24660 }, { "epoch": 0.35, "grad_norm": 0.53515625, "learning_rate": 0.0001632568688225256, "loss": 1.0361, "step": 24665 }, { "epoch": 0.35, "grad_norm": 0.69921875, "learning_rate": 0.00016323747619530554, "loss": 0.9363, "step": 24670 }, { "epoch": 0.35, "grad_norm": 0.609375, "learning_rate": 0.00016321807960428843, "loss": 0.9388, "step": 24675 }, { "epoch": 0.35, "grad_norm": 0.5859375, "learning_rate": 0.00016319867905069009, "loss": 1.0529, "step": 24680 }, { "epoch": 0.35, "grad_norm": 0.578125, "learning_rate": 0.00016317927453572657, "loss": 0.8853, "step": 24685 }, { "epoch": 0.35, "grad_norm": 0.57421875, "learning_rate": 0.00016315986606061416, "loss": 0.9914, "step": 24690 }, { "epoch": 0.35, "grad_norm": 0.53515625, "learning_rate": 0.00016314045362656945, "loss": 1.0792, "step": 24695 }, { "epoch": 0.35, "grad_norm": 0.62890625, "learning_rate": 0.0001631210372348092, "loss": 0.9683, "step": 24700 }, { "epoch": 0.35, "grad_norm": 0.56640625, "learning_rate": 0.00016310161688655036, "loss": 1.0693, "step": 24705 }, { "epoch": 0.35, "grad_norm": 0.66796875, "learning_rate": 0.00016308219258301038, "loss": 1.0178, "step": 24710 }, { "epoch": 0.35, "grad_norm": 0.5234375, "learning_rate": 0.0001630627643254067, "loss": 1.0458, "step": 24715 }, { "epoch": 0.35, "grad_norm": 0.59765625, "learning_rate": 0.00016304333211495715, "loss": 0.9108, "step": 24720 }, { "epoch": 0.35, "grad_norm": 0.54296875, "learning_rate": 0.00016302389595287975, "loss": 0.8966, "step": 24725 }, { "epoch": 0.35, "grad_norm": 0.498046875, "learning_rate": 0.00016300445584039274, "loss": 0.8942, "step": 24730 }, { "epoch": 0.35, "grad_norm": 0.58203125, "learning_rate": 0.0001629850117787147, "loss": 0.8664, "step": 24735 }, { "epoch": 0.35, "grad_norm": 0.67578125, "learning_rate": 0.0001629655637690644, "loss": 0.9213, "step": 24740 }, { "epoch": 0.35, "grad_norm": 0.515625, "learning_rate": 0.00016294611181266082, "loss": 0.9186, "step": 24745 }, { "epoch": 0.36, "grad_norm": 0.61328125, "learning_rate": 0.00016292665591072328, "loss": 0.8974, "step": 24750 }, { "epoch": 0.36, "grad_norm": 0.67578125, "learning_rate": 0.0001629071960644713, "loss": 0.9719, "step": 24755 }, { "epoch": 0.36, "grad_norm": 0.5, "learning_rate": 0.00016288773227512459, "loss": 0.9197, "step": 24760 }, { "epoch": 0.36, "grad_norm": 0.59765625, "learning_rate": 0.0001628682645439032, "loss": 0.9864, "step": 24765 }, { "epoch": 0.36, "grad_norm": 0.5546875, "learning_rate": 0.0001628487928720274, "loss": 0.9193, "step": 24770 }, { "epoch": 0.36, "grad_norm": 0.5703125, "learning_rate": 0.0001628293172607177, "loss": 0.8703, "step": 24775 }, { "epoch": 0.36, "grad_norm": 0.474609375, "learning_rate": 0.0001628098377111948, "loss": 1.0337, "step": 24780 }, { "epoch": 0.36, "grad_norm": 0.65234375, "learning_rate": 0.00016279035422467976, "loss": 0.8703, "step": 24785 }, { "epoch": 0.36, "grad_norm": 0.61328125, "learning_rate": 0.00016277086680239382, "loss": 0.8611, "step": 24790 }, { "epoch": 0.36, "grad_norm": 0.51171875, "learning_rate": 0.00016275137544555842, "loss": 1.1237, "step": 24795 }, { "epoch": 0.36, "grad_norm": 0.63671875, "learning_rate": 0.00016273188015539537, "loss": 1.1163, "step": 24800 }, { "epoch": 0.36, "grad_norm": 0.53515625, "learning_rate": 0.00016271238093312662, "loss": 0.9018, "step": 24805 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.00016269287777997442, "loss": 0.9558, "step": 24810 }, { "epoch": 0.36, "grad_norm": 0.640625, "learning_rate": 0.00016267337069716123, "loss": 0.926, "step": 24815 }, { "epoch": 0.36, "grad_norm": 0.5703125, "learning_rate": 0.00016265385968590977, "loss": 1.006, "step": 24820 }, { "epoch": 0.36, "grad_norm": 0.57421875, "learning_rate": 0.00016263434474744304, "loss": 0.9435, "step": 24825 }, { "epoch": 0.36, "grad_norm": 0.58984375, "learning_rate": 0.00016261482588298426, "loss": 0.9433, "step": 24830 }, { "epoch": 0.36, "grad_norm": 0.57421875, "learning_rate": 0.00016259530309375685, "loss": 0.952, "step": 24835 }, { "epoch": 0.36, "grad_norm": 0.5078125, "learning_rate": 0.00016257577638098457, "loss": 0.9448, "step": 24840 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.00016255624574589136, "loss": 0.9726, "step": 24845 }, { "epoch": 0.36, "grad_norm": 0.546875, "learning_rate": 0.0001625367111897014, "loss": 0.9088, "step": 24850 }, { "epoch": 0.36, "grad_norm": 0.65625, "learning_rate": 0.0001625171727136392, "loss": 0.8722, "step": 24855 }, { "epoch": 0.36, "grad_norm": 0.69921875, "learning_rate": 0.0001624976303189294, "loss": 1.1047, "step": 24860 }, { "epoch": 0.36, "grad_norm": 0.61328125, "learning_rate": 0.00016247808400679693, "loss": 1.0619, "step": 24865 }, { "epoch": 0.36, "grad_norm": 0.52734375, "learning_rate": 0.00016245853377846702, "loss": 1.0366, "step": 24870 }, { "epoch": 0.36, "grad_norm": 0.59375, "learning_rate": 0.00016243897963516508, "loss": 1.0427, "step": 24875 }, { "epoch": 0.36, "grad_norm": 0.5, "learning_rate": 0.00016241942157811678, "loss": 1.0127, "step": 24880 }, { "epoch": 0.36, "grad_norm": 0.578125, "learning_rate": 0.00016239985960854805, "loss": 0.9216, "step": 24885 }, { "epoch": 0.36, "grad_norm": 0.55859375, "learning_rate": 0.00016238029372768505, "loss": 0.8924, "step": 24890 }, { "epoch": 0.36, "grad_norm": 0.5625, "learning_rate": 0.00016236072393675417, "loss": 0.9695, "step": 24895 }, { "epoch": 0.36, "grad_norm": 0.5703125, "learning_rate": 0.0001623411502369821, "loss": 0.9501, "step": 24900 }, { "epoch": 0.36, "grad_norm": 0.6015625, "learning_rate": 0.00016232157262959573, "loss": 1.0881, "step": 24905 }, { "epoch": 0.36, "grad_norm": 0.7265625, "learning_rate": 0.00016230199111582226, "loss": 0.9827, "step": 24910 }, { "epoch": 0.36, "grad_norm": 0.55859375, "learning_rate": 0.00016228240569688898, "loss": 0.9261, "step": 24915 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.0001622628163740236, "loss": 0.9561, "step": 24920 }, { "epoch": 0.36, "grad_norm": 0.578125, "learning_rate": 0.00016224322314845394, "loss": 0.979, "step": 24925 }, { "epoch": 0.36, "grad_norm": 0.59765625, "learning_rate": 0.00016222362602140818, "loss": 1.0244, "step": 24930 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.0001622040249941147, "loss": 0.9016, "step": 24935 }, { "epoch": 0.36, "grad_norm": 0.578125, "learning_rate": 0.00016218442006780208, "loss": 0.928, "step": 24940 }, { "epoch": 0.36, "grad_norm": 0.5859375, "learning_rate": 0.00016216481124369918, "loss": 1.0769, "step": 24945 }, { "epoch": 0.36, "grad_norm": 0.5859375, "learning_rate": 0.0001621451985230351, "loss": 1.0409, "step": 24950 }, { "epoch": 0.36, "grad_norm": 0.65234375, "learning_rate": 0.00016212558190703923, "loss": 0.987, "step": 24955 }, { "epoch": 0.36, "grad_norm": 0.5546875, "learning_rate": 0.00016210596139694112, "loss": 1.0778, "step": 24960 }, { "epoch": 0.36, "grad_norm": 0.54296875, "learning_rate": 0.0001620863369939706, "loss": 0.9534, "step": 24965 }, { "epoch": 0.36, "grad_norm": 0.7421875, "learning_rate": 0.0001620667086993578, "loss": 1.0017, "step": 24970 }, { "epoch": 0.36, "grad_norm": 0.52734375, "learning_rate": 0.00016204707651433297, "loss": 0.8229, "step": 24975 }, { "epoch": 0.36, "grad_norm": 0.59375, "learning_rate": 0.00016202744044012675, "loss": 0.8823, "step": 24980 }, { "epoch": 0.36, "grad_norm": 0.640625, "learning_rate": 0.0001620078004779699, "loss": 0.9721, "step": 24985 }, { "epoch": 0.36, "grad_norm": 0.65234375, "learning_rate": 0.0001619881566290935, "loss": 1.0124, "step": 24990 }, { "epoch": 0.36, "grad_norm": 0.5234375, "learning_rate": 0.00016196850889472887, "loss": 0.9635, "step": 24995 }, { "epoch": 0.36, "grad_norm": 0.703125, "learning_rate": 0.00016194885727610747, "loss": 1.0222, "step": 25000 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.00016192920177446118, "loss": 0.9272, "step": 25005 }, { "epoch": 0.36, "grad_norm": 0.57421875, "learning_rate": 0.00016190954239102197, "loss": 0.8514, "step": 25010 }, { "epoch": 0.36, "grad_norm": 0.5546875, "learning_rate": 0.00016188987912702215, "loss": 0.9072, "step": 25015 }, { "epoch": 0.36, "grad_norm": 0.578125, "learning_rate": 0.00016187021198369426, "loss": 1.005, "step": 25020 }, { "epoch": 0.36, "grad_norm": 0.546875, "learning_rate": 0.00016185054096227094, "loss": 0.9324, "step": 25025 }, { "epoch": 0.36, "grad_norm": 0.5, "learning_rate": 0.00016183086606398533, "loss": 0.8911, "step": 25030 }, { "epoch": 0.36, "grad_norm": 0.61328125, "learning_rate": 0.0001618111872900706, "loss": 1.1515, "step": 25035 }, { "epoch": 0.36, "grad_norm": 0.59765625, "learning_rate": 0.00016179150464176023, "loss": 1.0191, "step": 25040 }, { "epoch": 0.36, "grad_norm": 0.58984375, "learning_rate": 0.000161771818120288, "loss": 0.8963, "step": 25045 }, { "epoch": 0.36, "grad_norm": 0.53515625, "learning_rate": 0.00016175212772688786, "loss": 0.9288, "step": 25050 }, { "epoch": 0.36, "grad_norm": 0.52734375, "learning_rate": 0.00016173243346279402, "loss": 0.9832, "step": 25055 }, { "epoch": 0.36, "grad_norm": 0.5703125, "learning_rate": 0.0001617127353292409, "loss": 0.8714, "step": 25060 }, { "epoch": 0.36, "grad_norm": 0.478515625, "learning_rate": 0.0001616930333274633, "loss": 1.0185, "step": 25065 }, { "epoch": 0.36, "grad_norm": 0.5234375, "learning_rate": 0.0001616733274586961, "loss": 0.9872, "step": 25070 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.00016165361772417448, "loss": 0.9913, "step": 25075 }, { "epoch": 0.36, "grad_norm": 0.5546875, "learning_rate": 0.0001616339041251339, "loss": 0.9422, "step": 25080 }, { "epoch": 0.36, "grad_norm": 0.58203125, "learning_rate": 0.00016161418666281, "loss": 0.9239, "step": 25085 }, { "epoch": 0.36, "grad_norm": 0.56640625, "learning_rate": 0.0001615944653384387, "loss": 0.8856, "step": 25090 }, { "epoch": 0.36, "grad_norm": 0.54296875, "learning_rate": 0.00016157474015325617, "loss": 0.9683, "step": 25095 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.0001615550111084988, "loss": 0.9395, "step": 25100 }, { "epoch": 0.36, "grad_norm": 0.60546875, "learning_rate": 0.00016153527820540324, "loss": 0.8642, "step": 25105 }, { "epoch": 0.36, "grad_norm": 0.58203125, "learning_rate": 0.00016151554144520637, "loss": 1.0026, "step": 25110 }, { "epoch": 0.36, "grad_norm": 0.5390625, "learning_rate": 0.00016149580082914526, "loss": 0.9313, "step": 25115 }, { "epoch": 0.36, "grad_norm": 0.58203125, "learning_rate": 0.00016147605635845734, "loss": 1.0754, "step": 25120 }, { "epoch": 0.36, "grad_norm": 0.6171875, "learning_rate": 0.00016145630803438018, "loss": 0.9872, "step": 25125 }, { "epoch": 0.36, "grad_norm": 0.51953125, "learning_rate": 0.00016143655585815165, "loss": 0.9619, "step": 25130 }, { "epoch": 0.36, "grad_norm": 0.5625, "learning_rate": 0.00016141679983100983, "loss": 0.965, "step": 25135 }, { "epoch": 0.36, "grad_norm": 0.62890625, "learning_rate": 0.00016139703995419303, "loss": 0.9198, "step": 25140 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.00016137727622893988, "loss": 1.0207, "step": 25145 }, { "epoch": 0.36, "grad_norm": 0.52734375, "learning_rate": 0.00016135750865648913, "loss": 0.8741, "step": 25150 }, { "epoch": 0.36, "grad_norm": 0.69140625, "learning_rate": 0.00016133773723807986, "loss": 0.9791, "step": 25155 }, { "epoch": 0.36, "grad_norm": 0.609375, "learning_rate": 0.00016131796197495134, "loss": 0.8629, "step": 25160 }, { "epoch": 0.36, "grad_norm": 0.58203125, "learning_rate": 0.00016129818286834315, "loss": 0.875, "step": 25165 }, { "epoch": 0.36, "grad_norm": 0.546875, "learning_rate": 0.00016127839991949503, "loss": 0.9423, "step": 25170 }, { "epoch": 0.36, "grad_norm": 0.5546875, "learning_rate": 0.00016125861312964705, "loss": 0.9443, "step": 25175 }, { "epoch": 0.36, "grad_norm": 0.58203125, "learning_rate": 0.00016123882250003942, "loss": 0.8957, "step": 25180 }, { "epoch": 0.36, "grad_norm": 0.5234375, "learning_rate": 0.0001612190280319126, "loss": 1.0252, "step": 25185 }, { "epoch": 0.36, "grad_norm": 0.54296875, "learning_rate": 0.00016119922972650743, "loss": 0.8536, "step": 25190 }, { "epoch": 0.36, "grad_norm": 0.6015625, "learning_rate": 0.00016117942758506483, "loss": 0.9057, "step": 25195 }, { "epoch": 0.36, "grad_norm": 0.5625, "learning_rate": 0.00016115962160882604, "loss": 0.917, "step": 25200 }, { "epoch": 0.36, "grad_norm": 0.546875, "learning_rate": 0.0001611398117990325, "loss": 0.9083, "step": 25205 }, { "epoch": 0.36, "grad_norm": 0.58984375, "learning_rate": 0.00016111999815692594, "loss": 1.0793, "step": 25210 }, { "epoch": 0.36, "grad_norm": 0.58984375, "learning_rate": 0.00016110018068374825, "loss": 0.9917, "step": 25215 }, { "epoch": 0.36, "grad_norm": 0.52734375, "learning_rate": 0.0001610803593807417, "loss": 1.0187, "step": 25220 }, { "epoch": 0.36, "grad_norm": 0.6015625, "learning_rate": 0.0001610605342491486, "loss": 0.8417, "step": 25225 }, { "epoch": 0.36, "grad_norm": 0.578125, "learning_rate": 0.00016104070529021172, "loss": 0.9645, "step": 25230 }, { "epoch": 0.36, "grad_norm": 0.52734375, "learning_rate": 0.00016102087250517388, "loss": 0.882, "step": 25235 }, { "epoch": 0.36, "grad_norm": 0.51953125, "learning_rate": 0.00016100103589527826, "loss": 1.0314, "step": 25240 }, { "epoch": 0.36, "grad_norm": 0.640625, "learning_rate": 0.00016098119546176825, "loss": 1.0143, "step": 25245 }, { "epoch": 0.36, "grad_norm": 0.61328125, "learning_rate": 0.00016096135120588744, "loss": 0.9423, "step": 25250 }, { "epoch": 0.36, "grad_norm": 0.60546875, "learning_rate": 0.00016094150312887973, "loss": 1.1706, "step": 25255 }, { "epoch": 0.36, "grad_norm": 0.62109375, "learning_rate": 0.0001609216512319892, "loss": 0.9264, "step": 25260 }, { "epoch": 0.36, "grad_norm": 0.54296875, "learning_rate": 0.00016090179551646013, "loss": 0.8544, "step": 25265 }, { "epoch": 0.36, "grad_norm": 0.78515625, "learning_rate": 0.00016088193598353724, "loss": 0.968, "step": 25270 }, { "epoch": 0.36, "grad_norm": 0.5625, "learning_rate": 0.00016086207263446518, "loss": 0.9744, "step": 25275 }, { "epoch": 0.36, "grad_norm": 0.62890625, "learning_rate": 0.00016084220547048916, "loss": 1.0384, "step": 25280 }, { "epoch": 0.36, "grad_norm": 0.6015625, "learning_rate": 0.00016082233449285437, "loss": 1.0272, "step": 25285 }, { "epoch": 0.36, "grad_norm": 0.54296875, "learning_rate": 0.00016080245970280638, "loss": 0.9706, "step": 25290 }, { "epoch": 0.36, "grad_norm": 0.59375, "learning_rate": 0.000160782581101591, "loss": 1.0, "step": 25295 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.00016076269869045418, "loss": 0.912, "step": 25300 }, { "epoch": 0.36, "grad_norm": 0.54296875, "learning_rate": 0.0001607428124706422, "loss": 0.8741, "step": 25305 }, { "epoch": 0.36, "grad_norm": 0.62890625, "learning_rate": 0.00016072292244340158, "loss": 0.8362, "step": 25310 }, { "epoch": 0.36, "grad_norm": 0.55859375, "learning_rate": 0.00016070302860997902, "loss": 0.8294, "step": 25315 }, { "epoch": 0.36, "grad_norm": 0.56640625, "learning_rate": 0.00016068313097162147, "loss": 0.9885, "step": 25320 }, { "epoch": 0.36, "grad_norm": 0.58203125, "learning_rate": 0.0001606632295295762, "loss": 0.8356, "step": 25325 }, { "epoch": 0.36, "grad_norm": 0.6953125, "learning_rate": 0.00016064332428509056, "loss": 1.0694, "step": 25330 }, { "epoch": 0.36, "grad_norm": 0.58984375, "learning_rate": 0.00016062341523941234, "loss": 1.0154, "step": 25335 }, { "epoch": 0.36, "grad_norm": 0.59375, "learning_rate": 0.00016060350239378935, "loss": 0.9468, "step": 25340 }, { "epoch": 0.36, "grad_norm": 0.57421875, "learning_rate": 0.00016058358574946985, "loss": 0.9269, "step": 25345 }, { "epoch": 0.36, "grad_norm": 0.57421875, "learning_rate": 0.0001605636653077022, "loss": 1.0031, "step": 25350 }, { "epoch": 0.36, "grad_norm": 0.609375, "learning_rate": 0.000160543741069735, "loss": 0.9001, "step": 25355 }, { "epoch": 0.36, "grad_norm": 0.59765625, "learning_rate": 0.0001605238130368172, "loss": 0.9295, "step": 25360 }, { "epoch": 0.36, "grad_norm": 0.5234375, "learning_rate": 0.00016050388121019782, "loss": 0.8978, "step": 25365 }, { "epoch": 0.36, "grad_norm": 0.56640625, "learning_rate": 0.00016048394559112626, "loss": 0.847, "step": 25370 }, { "epoch": 0.36, "grad_norm": 0.765625, "learning_rate": 0.00016046400618085214, "loss": 0.9054, "step": 25375 }, { "epoch": 0.36, "grad_norm": 0.5859375, "learning_rate": 0.00016044406298062526, "loss": 0.9974, "step": 25380 }, { "epoch": 0.36, "grad_norm": 0.5625, "learning_rate": 0.00016042411599169563, "loss": 1.1149, "step": 25385 }, { "epoch": 0.36, "grad_norm": 0.5859375, "learning_rate": 0.0001604041652153136, "loss": 0.9138, "step": 25390 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.0001603842106527297, "loss": 0.8508, "step": 25395 }, { "epoch": 0.36, "grad_norm": 0.65625, "learning_rate": 0.00016036425230519475, "loss": 0.9259, "step": 25400 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.00016034429017395966, "loss": 0.8668, "step": 25405 }, { "epoch": 0.36, "grad_norm": 0.640625, "learning_rate": 0.00016032432426027577, "loss": 1.0463, "step": 25410 }, { "epoch": 0.36, "grad_norm": 0.6015625, "learning_rate": 0.00016030435456539452, "loss": 0.9397, "step": 25415 }, { "epoch": 0.36, "grad_norm": 0.58203125, "learning_rate": 0.00016028438109056762, "loss": 0.9873, "step": 25420 }, { "epoch": 0.36, "grad_norm": 0.55859375, "learning_rate": 0.00016026440383704708, "loss": 1.0089, "step": 25425 }, { "epoch": 0.36, "grad_norm": 0.51953125, "learning_rate": 0.00016024442280608507, "loss": 1.0219, "step": 25430 }, { "epoch": 0.36, "grad_norm": 0.609375, "learning_rate": 0.00016022443799893404, "loss": 0.9601, "step": 25435 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.00016020444941684662, "loss": 1.0713, "step": 25440 }, { "epoch": 0.36, "grad_norm": 0.52734375, "learning_rate": 0.00016018445706107576, "loss": 0.8197, "step": 25445 }, { "epoch": 0.37, "grad_norm": 0.60546875, "learning_rate": 0.00016016446093287457, "loss": 0.9444, "step": 25450 }, { "epoch": 0.37, "grad_norm": 0.58203125, "learning_rate": 0.00016014446103349648, "loss": 0.9886, "step": 25455 }, { "epoch": 0.37, "grad_norm": 0.5234375, "learning_rate": 0.00016012445736419503, "loss": 0.8165, "step": 25460 }, { "epoch": 0.37, "grad_norm": 0.5703125, "learning_rate": 0.00016010444992622415, "loss": 1.0564, "step": 25465 }, { "epoch": 0.37, "grad_norm": 0.59375, "learning_rate": 0.00016008443872083788, "loss": 0.9417, "step": 25470 }, { "epoch": 0.37, "grad_norm": 0.59765625, "learning_rate": 0.00016006442374929058, "loss": 0.9726, "step": 25475 }, { "epoch": 0.37, "grad_norm": 0.5625, "learning_rate": 0.00016004440501283677, "loss": 0.9835, "step": 25480 }, { "epoch": 0.37, "grad_norm": 0.5625, "learning_rate": 0.00016002438251273127, "loss": 0.9092, "step": 25485 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.00016000435625022913, "loss": 0.926, "step": 25490 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.00015998432622658557, "loss": 0.939, "step": 25495 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015996429244305617, "loss": 0.9014, "step": 25500 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.00015994425490089659, "loss": 0.9478, "step": 25505 }, { "epoch": 0.37, "grad_norm": 0.6953125, "learning_rate": 0.00015992421360136282, "loss": 1.0787, "step": 25510 }, { "epoch": 0.37, "grad_norm": 0.58203125, "learning_rate": 0.00015990416854571115, "loss": 0.8994, "step": 25515 }, { "epoch": 0.37, "grad_norm": 0.578125, "learning_rate": 0.00015988411973519794, "loss": 0.9534, "step": 25520 }, { "epoch": 0.37, "grad_norm": 0.58984375, "learning_rate": 0.0001598640671710799, "loss": 1.0001, "step": 25525 }, { "epoch": 0.37, "grad_norm": 0.63671875, "learning_rate": 0.00015984401085461397, "loss": 0.9634, "step": 25530 }, { "epoch": 0.37, "grad_norm": 0.6015625, "learning_rate": 0.00015982395078705729, "loss": 0.9582, "step": 25535 }, { "epoch": 0.37, "grad_norm": 0.5625, "learning_rate": 0.00015980388696966723, "loss": 0.8748, "step": 25540 }, { "epoch": 0.37, "grad_norm": 0.4765625, "learning_rate": 0.0001597838194037014, "loss": 0.9613, "step": 25545 }, { "epoch": 0.37, "grad_norm": 0.53125, "learning_rate": 0.0001597637480904177, "loss": 1.0516, "step": 25550 }, { "epoch": 0.37, "grad_norm": 0.84765625, "learning_rate": 0.0001597436730310742, "loss": 0.9361, "step": 25555 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.00015972359422692923, "loss": 0.9223, "step": 25560 }, { "epoch": 0.37, "grad_norm": 0.53125, "learning_rate": 0.00015970351167924138, "loss": 0.98, "step": 25565 }, { "epoch": 0.37, "grad_norm": 0.546875, "learning_rate": 0.0001596834253892694, "loss": 1.0118, "step": 25570 }, { "epoch": 0.37, "grad_norm": 0.61328125, "learning_rate": 0.00015966333535827234, "loss": 1.0899, "step": 25575 }, { "epoch": 0.37, "grad_norm": 0.51171875, "learning_rate": 0.00015964324158750947, "loss": 0.8592, "step": 25580 }, { "epoch": 0.37, "grad_norm": 0.6171875, "learning_rate": 0.0001596231440782403, "loss": 0.9544, "step": 25585 }, { "epoch": 0.37, "grad_norm": 0.53125, "learning_rate": 0.00015960304283172452, "loss": 1.0262, "step": 25590 }, { "epoch": 0.37, "grad_norm": 0.59375, "learning_rate": 0.00015958293784922218, "loss": 0.9641, "step": 25595 }, { "epoch": 0.37, "grad_norm": 0.58984375, "learning_rate": 0.0001595628291319934, "loss": 1.0267, "step": 25600 }, { "epoch": 0.37, "grad_norm": 0.609375, "learning_rate": 0.0001595427166812987, "loss": 0.9957, "step": 25605 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015952260049839864, "loss": 1.0398, "step": 25610 }, { "epoch": 0.37, "grad_norm": 0.51953125, "learning_rate": 0.00015950248058455423, "loss": 1.001, "step": 25615 }, { "epoch": 0.37, "grad_norm": 0.55859375, "learning_rate": 0.0001594823569410266, "loss": 1.0848, "step": 25620 }, { "epoch": 0.37, "grad_norm": 0.60546875, "learning_rate": 0.00015946222956907704, "loss": 0.9839, "step": 25625 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.00015944209846996722, "loss": 0.8242, "step": 25630 }, { "epoch": 0.37, "grad_norm": 0.51953125, "learning_rate": 0.00015942196364495897, "loss": 0.8995, "step": 25635 }, { "epoch": 0.37, "grad_norm": 0.63671875, "learning_rate": 0.00015940182509531435, "loss": 0.9686, "step": 25640 }, { "epoch": 0.37, "grad_norm": 0.71875, "learning_rate": 0.00015938168282229572, "loss": 1.0651, "step": 25645 }, { "epoch": 0.37, "grad_norm": 0.5859375, "learning_rate": 0.00015936153682716557, "loss": 1.0298, "step": 25650 }, { "epoch": 0.37, "grad_norm": 0.62890625, "learning_rate": 0.00015934138711118666, "loss": 0.9333, "step": 25655 }, { "epoch": 0.37, "grad_norm": 0.55859375, "learning_rate": 0.000159321233675622, "loss": 0.9606, "step": 25660 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.00015930107652173492, "loss": 0.8906, "step": 25665 }, { "epoch": 0.37, "grad_norm": 0.58984375, "learning_rate": 0.00015928091565078879, "loss": 0.9417, "step": 25670 }, { "epoch": 0.37, "grad_norm": 0.63671875, "learning_rate": 0.00015926075106404737, "loss": 0.9536, "step": 25675 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015924058276277453, "loss": 0.925, "step": 25680 }, { "epoch": 0.37, "grad_norm": 0.61328125, "learning_rate": 0.00015922041074823456, "loss": 0.987, "step": 25685 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015920023502169174, "loss": 1.0728, "step": 25690 }, { "epoch": 0.37, "grad_norm": 0.5859375, "learning_rate": 0.00015918005558441078, "loss": 0.9732, "step": 25695 }, { "epoch": 0.37, "grad_norm": 0.6015625, "learning_rate": 0.00015915987243765657, "loss": 1.0096, "step": 25700 }, { "epoch": 0.37, "grad_norm": 0.578125, "learning_rate": 0.00015913968558269414, "loss": 0.9989, "step": 25705 }, { "epoch": 0.37, "grad_norm": 0.53125, "learning_rate": 0.0001591194950207889, "loss": 1.0489, "step": 25710 }, { "epoch": 0.37, "grad_norm": 0.6484375, "learning_rate": 0.00015909930075320633, "loss": 0.9951, "step": 25715 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.00015907910278121232, "loss": 0.8418, "step": 25720 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015905890110607285, "loss": 1.0019, "step": 25725 }, { "epoch": 0.37, "grad_norm": 0.5546875, "learning_rate": 0.00015903869572905422, "loss": 1.1367, "step": 25730 }, { "epoch": 0.37, "grad_norm": 0.56640625, "learning_rate": 0.00015901848665142288, "loss": 0.9247, "step": 25735 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.00015899827387444554, "loss": 0.9569, "step": 25740 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.00015897805739938927, "loss": 0.8595, "step": 25745 }, { "epoch": 0.37, "grad_norm": 0.515625, "learning_rate": 0.00015895783722752116, "loss": 0.9867, "step": 25750 }, { "epoch": 0.37, "grad_norm": 0.84375, "learning_rate": 0.00015893761336010866, "loss": 1.0285, "step": 25755 }, { "epoch": 0.37, "grad_norm": 0.578125, "learning_rate": 0.00015891738579841943, "loss": 1.0182, "step": 25760 }, { "epoch": 0.37, "grad_norm": 0.61328125, "learning_rate": 0.00015889715454372137, "loss": 0.9272, "step": 25765 }, { "epoch": 0.37, "grad_norm": 0.6328125, "learning_rate": 0.00015887691959728256, "loss": 1.0613, "step": 25770 }, { "epoch": 0.37, "grad_norm": 0.65234375, "learning_rate": 0.0001588566809603714, "loss": 0.9419, "step": 25775 }, { "epoch": 0.37, "grad_norm": 0.5859375, "learning_rate": 0.00015883643863425645, "loss": 0.8621, "step": 25780 }, { "epoch": 0.37, "grad_norm": 0.51953125, "learning_rate": 0.0001588161926202065, "loss": 0.9175, "step": 25785 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.0001587959429194906, "loss": 0.9916, "step": 25790 }, { "epoch": 0.37, "grad_norm": 0.6484375, "learning_rate": 0.00015877568953337806, "loss": 1.0484, "step": 25795 }, { "epoch": 0.37, "grad_norm": 0.61328125, "learning_rate": 0.00015875543246313836, "loss": 1.063, "step": 25800 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.00015873517171004125, "loss": 1.0483, "step": 25805 }, { "epoch": 0.37, "grad_norm": 0.59765625, "learning_rate": 0.00015871490727535666, "loss": 1.0646, "step": 25810 }, { "epoch": 0.37, "grad_norm": 0.64453125, "learning_rate": 0.0001586946391603548, "loss": 0.9879, "step": 25815 }, { "epoch": 0.37, "grad_norm": 0.60546875, "learning_rate": 0.0001586743673663061, "loss": 0.9607, "step": 25820 }, { "epoch": 0.37, "grad_norm": 0.53125, "learning_rate": 0.00015865409189448127, "loss": 0.9211, "step": 25825 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015863381274615115, "loss": 1.0857, "step": 25830 }, { "epoch": 0.37, "grad_norm": 0.49609375, "learning_rate": 0.00015861352992258684, "loss": 0.8483, "step": 25835 }, { "epoch": 0.37, "grad_norm": 0.466796875, "learning_rate": 0.00015859324342505974, "loss": 1.0105, "step": 25840 }, { "epoch": 0.37, "grad_norm": 0.5625, "learning_rate": 0.0001585729532548414, "loss": 0.9075, "step": 25845 }, { "epoch": 0.37, "grad_norm": 0.6171875, "learning_rate": 0.00015855265941320366, "loss": 1.1377, "step": 25850 }, { "epoch": 0.37, "grad_norm": 0.57421875, "learning_rate": 0.00015853236190141855, "loss": 0.8974, "step": 25855 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.00015851206072075829, "loss": 0.9576, "step": 25860 }, { "epoch": 0.37, "grad_norm": 0.5234375, "learning_rate": 0.00015849175587249545, "loss": 1.062, "step": 25865 }, { "epoch": 0.37, "grad_norm": 0.6328125, "learning_rate": 0.0001584714473579027, "loss": 0.9231, "step": 25870 }, { "epoch": 0.37, "grad_norm": 0.5234375, "learning_rate": 0.00015845113517825313, "loss": 0.9307, "step": 25875 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015843081933481976, "loss": 0.942, "step": 25880 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.0001584104998288761, "loss": 1.075, "step": 25885 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.0001583901766616958, "loss": 0.9113, "step": 25890 }, { "epoch": 0.37, "grad_norm": 0.5078125, "learning_rate": 0.0001583698498345527, "loss": 0.8825, "step": 25895 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015834951934872094, "loss": 0.9867, "step": 25900 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015832918520547487, "loss": 0.9442, "step": 25905 }, { "epoch": 0.37, "grad_norm": 0.62890625, "learning_rate": 0.00015830884740608906, "loss": 1.0392, "step": 25910 }, { "epoch": 0.37, "grad_norm": 0.60546875, "learning_rate": 0.00015828850595183823, "loss": 1.052, "step": 25915 }, { "epoch": 0.37, "grad_norm": 0.546875, "learning_rate": 0.0001582681608439975, "loss": 0.8458, "step": 25920 }, { "epoch": 0.37, "grad_norm": 0.58984375, "learning_rate": 0.00015824781208384207, "loss": 0.9396, "step": 25925 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015822745967264745, "loss": 1.1154, "step": 25930 }, { "epoch": 0.37, "grad_norm": 0.5078125, "learning_rate": 0.00015820710361168935, "loss": 0.9944, "step": 25935 }, { "epoch": 0.37, "grad_norm": 0.5859375, "learning_rate": 0.0001581867439022437, "loss": 0.8879, "step": 25940 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015816638054558666, "loss": 0.9119, "step": 25945 }, { "epoch": 0.37, "grad_norm": 0.703125, "learning_rate": 0.00015814601354299462, "loss": 0.8787, "step": 25950 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015812564289574427, "loss": 1.0819, "step": 25955 }, { "epoch": 0.37, "grad_norm": 0.578125, "learning_rate": 0.00015810526860511243, "loss": 0.9735, "step": 25960 }, { "epoch": 0.37, "grad_norm": 0.65625, "learning_rate": 0.00015808489067237614, "loss": 1.1039, "step": 25965 }, { "epoch": 0.37, "grad_norm": 0.578125, "learning_rate": 0.00015806450909881277, "loss": 0.9072, "step": 25970 }, { "epoch": 0.37, "grad_norm": 0.490234375, "learning_rate": 0.00015804412388569986, "loss": 0.8738, "step": 25975 }, { "epoch": 0.37, "grad_norm": 0.5703125, "learning_rate": 0.00015802373503431513, "loss": 0.9162, "step": 25980 }, { "epoch": 0.37, "grad_norm": 0.58203125, "learning_rate": 0.00015800334254593661, "loss": 0.9719, "step": 25985 }, { "epoch": 0.37, "grad_norm": 0.578125, "learning_rate": 0.00015798294642184251, "loss": 0.9771, "step": 25990 }, { "epoch": 0.37, "grad_norm": 0.5703125, "learning_rate": 0.00015796254666331131, "loss": 1.0905, "step": 25995 }, { "epoch": 0.37, "grad_norm": 0.578125, "learning_rate": 0.00015794214327162167, "loss": 0.8707, "step": 26000 }, { "epoch": 0.37, "grad_norm": 0.609375, "learning_rate": 0.00015792173624805245, "loss": 0.8273, "step": 26005 }, { "epoch": 0.37, "grad_norm": 0.5703125, "learning_rate": 0.00015790132559388291, "loss": 0.9846, "step": 26010 }, { "epoch": 0.37, "grad_norm": 0.734375, "learning_rate": 0.0001578809113103923, "loss": 0.9594, "step": 26015 }, { "epoch": 0.37, "grad_norm": 0.546875, "learning_rate": 0.00015786049339886024, "loss": 0.9845, "step": 26020 }, { "epoch": 0.37, "grad_norm": 0.494140625, "learning_rate": 0.00015784007186056656, "loss": 0.9394, "step": 26025 }, { "epoch": 0.37, "grad_norm": 0.57421875, "learning_rate": 0.00015781964669679132, "loss": 1.0991, "step": 26030 }, { "epoch": 0.37, "grad_norm": 0.5703125, "learning_rate": 0.00015779921790881474, "loss": 1.0305, "step": 26035 }, { "epoch": 0.37, "grad_norm": 0.5078125, "learning_rate": 0.0001577787854979174, "loss": 1.0352, "step": 26040 }, { "epoch": 0.37, "grad_norm": 0.4921875, "learning_rate": 0.00015775834946537995, "loss": 0.9475, "step": 26045 }, { "epoch": 0.37, "grad_norm": 0.6171875, "learning_rate": 0.0001577379098124834, "loss": 1.0782, "step": 26050 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015771746654050887, "loss": 0.9941, "step": 26055 }, { "epoch": 0.37, "grad_norm": 0.58203125, "learning_rate": 0.00015769701965073782, "loss": 0.9338, "step": 26060 }, { "epoch": 0.37, "grad_norm": 0.59375, "learning_rate": 0.00015767656914445188, "loss": 1.0494, "step": 26065 }, { "epoch": 0.37, "grad_norm": 0.70703125, "learning_rate": 0.0001576561150229329, "loss": 0.9931, "step": 26070 }, { "epoch": 0.37, "grad_norm": 0.58203125, "learning_rate": 0.00015763565728746292, "loss": 0.8672, "step": 26075 }, { "epoch": 0.37, "grad_norm": 0.55859375, "learning_rate": 0.00015761519593932434, "loss": 0.6515, "step": 26080 }, { "epoch": 0.37, "grad_norm": 0.5546875, "learning_rate": 0.00015759473097979964, "loss": 0.9082, "step": 26085 }, { "epoch": 0.37, "grad_norm": 0.5859375, "learning_rate": 0.00015757426241017161, "loss": 0.9968, "step": 26090 }, { "epoch": 0.37, "grad_norm": 0.5546875, "learning_rate": 0.00015755379023172327, "loss": 0.8977, "step": 26095 }, { "epoch": 0.37, "grad_norm": 0.64453125, "learning_rate": 0.00015753331444573777, "loss": 1.0706, "step": 26100 }, { "epoch": 0.37, "grad_norm": 0.58203125, "learning_rate": 0.00015751283505349863, "loss": 0.8743, "step": 26105 }, { "epoch": 0.37, "grad_norm": 0.51953125, "learning_rate": 0.00015749235205628946, "loss": 1.0556, "step": 26110 }, { "epoch": 0.37, "grad_norm": 0.5390625, "learning_rate": 0.00015747186545539418, "loss": 0.9144, "step": 26115 }, { "epoch": 0.37, "grad_norm": 0.609375, "learning_rate": 0.00015745137525209694, "loss": 0.929, "step": 26120 }, { "epoch": 0.37, "grad_norm": 0.5625, "learning_rate": 0.00015743088144768209, "loss": 0.8177, "step": 26125 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015741038404343412, "loss": 0.9324, "step": 26130 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015738988304063792, "loss": 0.805, "step": 26135 }, { "epoch": 0.37, "grad_norm": 0.58984375, "learning_rate": 0.00015736937844057852, "loss": 0.9338, "step": 26140 }, { "epoch": 0.38, "grad_norm": 0.765625, "learning_rate": 0.00015734887024454111, "loss": 1.0461, "step": 26145 }, { "epoch": 0.38, "grad_norm": 0.66015625, "learning_rate": 0.0001573283584538112, "loss": 1.0292, "step": 26150 }, { "epoch": 0.38, "grad_norm": 0.56640625, "learning_rate": 0.0001573078430696745, "loss": 0.8662, "step": 26155 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.0001572873240934169, "loss": 0.9804, "step": 26160 }, { "epoch": 0.38, "grad_norm": 0.5078125, "learning_rate": 0.00015726680152632462, "loss": 1.1208, "step": 26165 }, { "epoch": 0.38, "grad_norm": 0.62109375, "learning_rate": 0.000157246275369684, "loss": 1.0409, "step": 26170 }, { "epoch": 0.38, "grad_norm": 0.5703125, "learning_rate": 0.0001572257456247816, "loss": 0.8872, "step": 26175 }, { "epoch": 0.38, "grad_norm": 0.59375, "learning_rate": 0.00015720521229290434, "loss": 0.8249, "step": 26180 }, { "epoch": 0.38, "grad_norm": 0.58203125, "learning_rate": 0.0001571846753753392, "loss": 0.7681, "step": 26185 }, { "epoch": 0.38, "grad_norm": 0.59765625, "learning_rate": 0.00015716413487337346, "loss": 0.9822, "step": 26190 }, { "epoch": 0.38, "grad_norm": 0.5234375, "learning_rate": 0.00015714359078829467, "loss": 0.9559, "step": 26195 }, { "epoch": 0.38, "grad_norm": 0.55859375, "learning_rate": 0.0001571230431213905, "loss": 0.9754, "step": 26200 }, { "epoch": 0.38, "grad_norm": 0.71875, "learning_rate": 0.00015710249187394896, "loss": 0.8946, "step": 26205 }, { "epoch": 0.38, "grad_norm": 0.62109375, "learning_rate": 0.00015708193704725817, "loss": 0.9902, "step": 26210 }, { "epoch": 0.38, "grad_norm": 0.54296875, "learning_rate": 0.0001570613786426066, "loss": 0.9934, "step": 26215 }, { "epoch": 0.38, "grad_norm": 0.56640625, "learning_rate": 0.00015704081666128276, "loss": 1.0759, "step": 26220 }, { "epoch": 0.38, "grad_norm": 0.6171875, "learning_rate": 0.00015702025110457562, "loss": 0.9217, "step": 26225 }, { "epoch": 0.38, "grad_norm": 0.5625, "learning_rate": 0.0001569996819737742, "loss": 0.9059, "step": 26230 }, { "epoch": 0.38, "grad_norm": 0.45703125, "learning_rate": 0.00015697910927016775, "loss": 0.9285, "step": 26235 }, { "epoch": 0.38, "grad_norm": 0.609375, "learning_rate": 0.00015695853299504587, "loss": 1.0308, "step": 26240 }, { "epoch": 0.38, "grad_norm": 0.53515625, "learning_rate": 0.00015693795314969825, "loss": 0.9831, "step": 26245 }, { "epoch": 0.38, "grad_norm": 0.55078125, "learning_rate": 0.00015691736973541493, "loss": 0.9238, "step": 26250 }, { "epoch": 0.38, "grad_norm": 0.53515625, "learning_rate": 0.000156896782753486, "loss": 0.9768, "step": 26255 }, { "epoch": 0.38, "grad_norm": 0.5859375, "learning_rate": 0.00015687619220520194, "loss": 0.9166, "step": 26260 }, { "epoch": 0.38, "grad_norm": 0.54296875, "learning_rate": 0.0001568555980918534, "loss": 0.8379, "step": 26265 }, { "epoch": 0.38, "grad_norm": 0.78125, "learning_rate": 0.0001568350004147312, "loss": 0.9155, "step": 26270 }, { "epoch": 0.38, "grad_norm": 0.58984375, "learning_rate": 0.00015681439917512646, "loss": 0.8975, "step": 26275 }, { "epoch": 0.38, "grad_norm": 0.61328125, "learning_rate": 0.00015679379437433046, "loss": 0.9756, "step": 26280 }, { "epoch": 0.38, "grad_norm": 0.58203125, "learning_rate": 0.00015677318601363472, "loss": 1.0602, "step": 26285 }, { "epoch": 0.38, "grad_norm": 0.57421875, "learning_rate": 0.00015675257409433107, "loss": 0.8572, "step": 26290 }, { "epoch": 0.38, "grad_norm": 0.5390625, "learning_rate": 0.00015673195861771143, "loss": 0.849, "step": 26295 }, { "epoch": 0.38, "grad_norm": 0.56640625, "learning_rate": 0.000156711339585068, "loss": 1.04, "step": 26300 }, { "epoch": 0.38, "grad_norm": 0.53515625, "learning_rate": 0.0001566907169976932, "loss": 0.9551, "step": 26305 }, { "epoch": 0.38, "grad_norm": 0.546875, "learning_rate": 0.00015667009085687972, "loss": 1.0188, "step": 26310 }, { "epoch": 0.38, "grad_norm": 0.6796875, "learning_rate": 0.0001566494611639204, "loss": 0.8871, "step": 26315 }, { "epoch": 0.38, "grad_norm": 0.5703125, "learning_rate": 0.00015662882792010828, "loss": 0.8827, "step": 26320 }, { "epoch": 0.38, "grad_norm": 0.58984375, "learning_rate": 0.00015660819112673678, "loss": 1.01, "step": 26325 }, { "epoch": 0.38, "grad_norm": 0.5625, "learning_rate": 0.0001565875507850994, "loss": 0.8856, "step": 26330 }, { "epoch": 0.38, "grad_norm": 0.59375, "learning_rate": 0.0001565669068964899, "loss": 1.0583, "step": 26335 }, { "epoch": 0.38, "grad_norm": 1.21875, "learning_rate": 0.0001565462594622022, "loss": 1.0725, "step": 26340 }, { "epoch": 0.38, "grad_norm": 0.83984375, "learning_rate": 0.00015652560848353057, "loss": 1.0632, "step": 26345 }, { "epoch": 0.38, "grad_norm": 0.5234375, "learning_rate": 0.0001565049539617695, "loss": 1.0049, "step": 26350 }, { "epoch": 0.38, "grad_norm": 0.59375, "learning_rate": 0.00015648429589821348, "loss": 1.0311, "step": 26355 }, { "epoch": 0.38, "grad_norm": 0.51953125, "learning_rate": 0.00015646363429415748, "loss": 0.9113, "step": 26360 }, { "epoch": 0.38, "grad_norm": 0.578125, "learning_rate": 0.00015644296915089657, "loss": 0.9195, "step": 26365 }, { "epoch": 0.38, "grad_norm": 0.703125, "learning_rate": 0.00015642230046972606, "loss": 0.891, "step": 26370 }, { "epoch": 0.38, "grad_norm": 0.55078125, "learning_rate": 0.00015640162825194151, "loss": 1.0039, "step": 26375 }, { "epoch": 0.38, "grad_norm": 0.70703125, "learning_rate": 0.0001563809524988387, "loss": 1.0073, "step": 26380 }, { "epoch": 0.38, "grad_norm": 0.62109375, "learning_rate": 0.00015636027321171353, "loss": 1.0067, "step": 26385 }, { "epoch": 0.38, "grad_norm": 0.56640625, "learning_rate": 0.00015633959039186227, "loss": 1.0397, "step": 26390 }, { "epoch": 0.38, "grad_norm": 0.61328125, "learning_rate": 0.0001563189040405813, "loss": 0.9546, "step": 26395 }, { "epoch": 0.38, "grad_norm": 0.921875, "learning_rate": 0.0001562982141591673, "loss": 0.8314, "step": 26400 }, { "epoch": 0.38, "grad_norm": 0.49609375, "learning_rate": 0.0001562775207489171, "loss": 0.9185, "step": 26405 }, { "epoch": 0.38, "grad_norm": 0.578125, "learning_rate": 0.0001562568238111278, "loss": 1.0778, "step": 26410 }, { "epoch": 0.38, "grad_norm": 0.5859375, "learning_rate": 0.00015623612334709675, "loss": 1.086, "step": 26415 }, { "epoch": 0.38, "grad_norm": 0.5, "learning_rate": 0.0001562154193581214, "loss": 1.0063, "step": 26420 }, { "epoch": 0.38, "grad_norm": 0.66015625, "learning_rate": 0.00015619471184549955, "loss": 0.8892, "step": 26425 }, { "epoch": 0.38, "grad_norm": 0.50390625, "learning_rate": 0.0001561740008105292, "loss": 0.8712, "step": 26430 }, { "epoch": 0.38, "grad_norm": 0.59375, "learning_rate": 0.00015615328625450848, "loss": 1.0723, "step": 26435 }, { "epoch": 0.38, "grad_norm": 0.546875, "learning_rate": 0.0001561325681787358, "loss": 0.9601, "step": 26440 }, { "epoch": 0.38, "grad_norm": 0.73828125, "learning_rate": 0.00015611184658450983, "loss": 0.9405, "step": 26445 }, { "epoch": 0.38, "grad_norm": 0.609375, "learning_rate": 0.0001560911214731294, "loss": 0.8906, "step": 26450 }, { "epoch": 0.38, "grad_norm": 0.625, "learning_rate": 0.0001560703928458936, "loss": 1.0349, "step": 26455 }, { "epoch": 0.38, "grad_norm": 0.5390625, "learning_rate": 0.00015604966070410176, "loss": 0.9879, "step": 26460 }, { "epoch": 0.38, "grad_norm": 0.6875, "learning_rate": 0.0001560289250490533, "loss": 0.9159, "step": 26465 }, { "epoch": 0.38, "grad_norm": 0.57421875, "learning_rate": 0.00015600818588204805, "loss": 0.9955, "step": 26470 }, { "epoch": 0.38, "grad_norm": 0.74609375, "learning_rate": 0.00015598744320438588, "loss": 0.9993, "step": 26475 }, { "epoch": 0.38, "grad_norm": 0.53515625, "learning_rate": 0.000155966697017367, "loss": 0.868, "step": 26480 }, { "epoch": 0.38, "grad_norm": 0.57421875, "learning_rate": 0.00015594594732229187, "loss": 0.9074, "step": 26485 }, { "epoch": 0.38, "grad_norm": 0.51171875, "learning_rate": 0.00015592519412046098, "loss": 0.9526, "step": 26490 }, { "epoch": 0.38, "grad_norm": 0.55078125, "learning_rate": 0.00015590443741317524, "loss": 0.8759, "step": 26495 }, { "epoch": 0.38, "grad_norm": 0.59765625, "learning_rate": 0.0001558836772017357, "loss": 0.9871, "step": 26500 }, { "epoch": 0.38, "grad_norm": 0.78125, "learning_rate": 0.00015586291348744364, "loss": 1.1237, "step": 26505 }, { "epoch": 0.38, "grad_norm": 0.53515625, "learning_rate": 0.00015584214627160055, "loss": 0.9317, "step": 26510 }, { "epoch": 0.38, "grad_norm": 0.62109375, "learning_rate": 0.0001558213755555081, "loss": 0.8633, "step": 26515 }, { "epoch": 0.38, "grad_norm": 0.6015625, "learning_rate": 0.0001558006013404683, "loss": 1.0021, "step": 26520 }, { "epoch": 0.38, "grad_norm": 0.625, "learning_rate": 0.0001557798236277832, "loss": 0.9625, "step": 26525 }, { "epoch": 0.38, "grad_norm": 0.55859375, "learning_rate": 0.00015575904241875525, "loss": 0.8798, "step": 26530 }, { "epoch": 0.38, "grad_norm": 0.72265625, "learning_rate": 0.00015573825771468704, "loss": 1.0856, "step": 26535 }, { "epoch": 0.38, "grad_norm": 0.58984375, "learning_rate": 0.00015571746951688136, "loss": 0.9614, "step": 26540 }, { "epoch": 0.38, "grad_norm": 0.51171875, "learning_rate": 0.00015569667782664118, "loss": 0.8792, "step": 26545 }, { "epoch": 0.38, "grad_norm": 0.5625, "learning_rate": 0.00015567588264526984, "loss": 1.0623, "step": 26550 }, { "epoch": 0.38, "grad_norm": 0.546875, "learning_rate": 0.0001556550839740708, "loss": 1.0637, "step": 26555 }, { "epoch": 0.38, "grad_norm": 0.5546875, "learning_rate": 0.00015563428181434764, "loss": 1.0017, "step": 26560 }, { "epoch": 0.38, "grad_norm": 0.51171875, "learning_rate": 0.00015561347616740436, "loss": 1.1512, "step": 26565 }, { "epoch": 0.38, "grad_norm": 0.59765625, "learning_rate": 0.00015559266703454508, "loss": 0.9056, "step": 26570 }, { "epoch": 0.38, "grad_norm": 0.5625, "learning_rate": 0.00015557185441707406, "loss": 0.9349, "step": 26575 }, { "epoch": 0.38, "grad_norm": 0.490234375, "learning_rate": 0.00015555103831629597, "loss": 0.8511, "step": 26580 }, { "epoch": 0.38, "grad_norm": 0.57421875, "learning_rate": 0.0001555302187335155, "loss": 0.9237, "step": 26585 }, { "epoch": 0.38, "grad_norm": 0.4921875, "learning_rate": 0.00015550939567003771, "loss": 0.9405, "step": 26590 }, { "epoch": 0.38, "grad_norm": 0.5703125, "learning_rate": 0.00015548856912716774, "loss": 0.9342, "step": 26595 }, { "epoch": 0.38, "grad_norm": 0.58203125, "learning_rate": 0.00015546773910621106, "loss": 0.9176, "step": 26600 }, { "epoch": 0.38, "grad_norm": 0.54296875, "learning_rate": 0.00015544690560847336, "loss": 1.0353, "step": 26605 }, { "epoch": 0.38, "grad_norm": 0.80078125, "learning_rate": 0.0001554260686352604, "loss": 0.9468, "step": 26610 }, { "epoch": 0.38, "grad_norm": 0.515625, "learning_rate": 0.0001554052281878784, "loss": 0.8511, "step": 26615 }, { "epoch": 0.38, "grad_norm": 0.546875, "learning_rate": 0.0001553843842676336, "loss": 0.9141, "step": 26620 }, { "epoch": 0.38, "grad_norm": 0.55078125, "learning_rate": 0.00015536353687583247, "loss": 1.1151, "step": 26625 }, { "epoch": 0.38, "grad_norm": 0.61328125, "learning_rate": 0.0001553426860137818, "loss": 1.0005, "step": 26630 }, { "epoch": 0.38, "grad_norm": 0.59765625, "learning_rate": 0.00015532183168278854, "loss": 1.0243, "step": 26635 }, { "epoch": 0.38, "grad_norm": 0.5078125, "learning_rate": 0.0001553009738841599, "loss": 0.9182, "step": 26640 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.0001552801126192032, "loss": 0.9837, "step": 26645 }, { "epoch": 0.38, "grad_norm": 0.482421875, "learning_rate": 0.0001552592478892261, "loss": 0.8765, "step": 26650 }, { "epoch": 0.38, "grad_norm": 0.76171875, "learning_rate": 0.00015523837969553644, "loss": 1.1424, "step": 26655 }, { "epoch": 0.38, "grad_norm": 0.66796875, "learning_rate": 0.00015521750803944214, "loss": 0.9813, "step": 26660 }, { "epoch": 0.38, "grad_norm": 0.5546875, "learning_rate": 0.0001551966329222516, "loss": 0.903, "step": 26665 }, { "epoch": 0.38, "grad_norm": 0.625, "learning_rate": 0.0001551757543452733, "loss": 0.9145, "step": 26670 }, { "epoch": 0.38, "grad_norm": 0.5703125, "learning_rate": 0.0001551548723098158, "loss": 1.0128, "step": 26675 }, { "epoch": 0.38, "grad_norm": 0.57421875, "learning_rate": 0.0001551339868171881, "loss": 0.8879, "step": 26680 }, { "epoch": 0.38, "grad_norm": 0.54296875, "learning_rate": 0.00015511309786869935, "loss": 1.0045, "step": 26685 }, { "epoch": 0.38, "grad_norm": 0.578125, "learning_rate": 0.00015509220546565882, "loss": 0.9818, "step": 26690 }, { "epoch": 0.38, "grad_norm": 0.56640625, "learning_rate": 0.00015507130960937612, "loss": 0.9332, "step": 26695 }, { "epoch": 0.38, "grad_norm": 0.5703125, "learning_rate": 0.00015505041030116102, "loss": 0.9466, "step": 26700 }, { "epoch": 0.38, "grad_norm": 0.625, "learning_rate": 0.00015502950754232349, "loss": 1.0231, "step": 26705 }, { "epoch": 0.38, "grad_norm": 0.59375, "learning_rate": 0.00015500860133417374, "loss": 0.9721, "step": 26710 }, { "epoch": 0.38, "grad_norm": 0.609375, "learning_rate": 0.00015498769167802222, "loss": 0.9158, "step": 26715 }, { "epoch": 0.38, "grad_norm": 0.5390625, "learning_rate": 0.0001549667785751796, "loss": 0.9728, "step": 26720 }, { "epoch": 0.38, "grad_norm": 0.50390625, "learning_rate": 0.00015494586202695665, "loss": 0.9821, "step": 26725 }, { "epoch": 0.38, "grad_norm": 0.54296875, "learning_rate": 0.00015492494203466452, "loss": 1.0645, "step": 26730 }, { "epoch": 0.38, "grad_norm": 0.51953125, "learning_rate": 0.00015490401859961445, "loss": 0.8899, "step": 26735 }, { "epoch": 0.38, "grad_norm": 0.59375, "learning_rate": 0.00015488309172311798, "loss": 0.9968, "step": 26740 }, { "epoch": 0.38, "grad_norm": 0.51171875, "learning_rate": 0.00015486216140648678, "loss": 0.873, "step": 26745 }, { "epoch": 0.38, "grad_norm": 0.69921875, "learning_rate": 0.00015484122765103286, "loss": 1.048, "step": 26750 }, { "epoch": 0.38, "grad_norm": 0.453125, "learning_rate": 0.0001548202904580683, "loss": 0.8578, "step": 26755 }, { "epoch": 0.38, "grad_norm": 0.76953125, "learning_rate": 0.00015479934982890551, "loss": 0.9117, "step": 26760 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.0001547784057648571, "loss": 0.9955, "step": 26765 }, { "epoch": 0.38, "grad_norm": 0.52734375, "learning_rate": 0.00015475745826723576, "loss": 0.9677, "step": 26770 }, { "epoch": 0.38, "grad_norm": 0.53515625, "learning_rate": 0.00015473650733735463, "loss": 0.9753, "step": 26775 }, { "epoch": 0.38, "grad_norm": 0.54296875, "learning_rate": 0.00015471555297652686, "loss": 0.9088, "step": 26780 }, { "epoch": 0.38, "grad_norm": 0.5625, "learning_rate": 0.0001546945951860659, "loss": 0.9215, "step": 26785 }, { "epoch": 0.38, "grad_norm": 0.5, "learning_rate": 0.00015467363396728543, "loss": 0.8744, "step": 26790 }, { "epoch": 0.38, "grad_norm": 0.5625, "learning_rate": 0.00015465266932149932, "loss": 0.9558, "step": 26795 }, { "epoch": 0.38, "grad_norm": 0.5625, "learning_rate": 0.00015463170125002166, "loss": 0.9333, "step": 26800 }, { "epoch": 0.38, "grad_norm": 0.5546875, "learning_rate": 0.00015461072975416675, "loss": 0.951, "step": 26805 }, { "epoch": 0.38, "grad_norm": 0.61328125, "learning_rate": 0.00015458975483524905, "loss": 0.8719, "step": 26810 }, { "epoch": 0.38, "grad_norm": 0.52734375, "learning_rate": 0.0001545687764945834, "loss": 0.8961, "step": 26815 }, { "epoch": 0.38, "grad_norm": 0.51953125, "learning_rate": 0.00015454779473348467, "loss": 0.9622, "step": 26820 }, { "epoch": 0.38, "grad_norm": 0.61328125, "learning_rate": 0.00015452680955326803, "loss": 0.9103, "step": 26825 }, { "epoch": 0.38, "grad_norm": 0.50390625, "learning_rate": 0.00015450582095524892, "loss": 0.9518, "step": 26830 }, { "epoch": 0.38, "grad_norm": 0.5625, "learning_rate": 0.00015448482894074282, "loss": 0.9383, "step": 26835 }, { "epoch": 0.39, "grad_norm": 0.6015625, "learning_rate": 0.00015446383351106562, "loss": 1.0018, "step": 26840 }, { "epoch": 0.39, "grad_norm": 0.515625, "learning_rate": 0.0001544428346675333, "loss": 0.9988, "step": 26845 }, { "epoch": 0.39, "grad_norm": 0.5703125, "learning_rate": 0.00015442183241146207, "loss": 0.96, "step": 26850 }, { "epoch": 0.39, "grad_norm": 0.58984375, "learning_rate": 0.00015440082674416842, "loss": 0.9955, "step": 26855 }, { "epoch": 0.39, "grad_norm": 0.5390625, "learning_rate": 0.000154379817666969, "loss": 0.9501, "step": 26860 }, { "epoch": 0.39, "grad_norm": 0.55078125, "learning_rate": 0.00015435880518118066, "loss": 1.0952, "step": 26865 }, { "epoch": 0.39, "grad_norm": 0.5703125, "learning_rate": 0.0001543377892881205, "loss": 0.866, "step": 26870 }, { "epoch": 0.39, "grad_norm": 0.59765625, "learning_rate": 0.00015431676998910586, "loss": 0.9214, "step": 26875 }, { "epoch": 0.39, "grad_norm": 0.5625, "learning_rate": 0.00015429574728545418, "loss": 1.0176, "step": 26880 }, { "epoch": 0.39, "grad_norm": 0.625, "learning_rate": 0.00015427472117848323, "loss": 0.9703, "step": 26885 }, { "epoch": 0.39, "grad_norm": 0.56640625, "learning_rate": 0.00015425369166951095, "loss": 0.9798, "step": 26890 }, { "epoch": 0.39, "grad_norm": 0.65234375, "learning_rate": 0.0001542326587598555, "loss": 0.9244, "step": 26895 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.0001542116224508352, "loss": 1.0506, "step": 26900 }, { "epoch": 0.39, "grad_norm": 0.58984375, "learning_rate": 0.00015419058274376867, "loss": 0.8334, "step": 26905 }, { "epoch": 0.39, "grad_norm": 0.62890625, "learning_rate": 0.00015416953963997472, "loss": 1.0585, "step": 26910 }, { "epoch": 0.39, "grad_norm": 0.8125, "learning_rate": 0.0001541484931407723, "loss": 1.1043, "step": 26915 }, { "epoch": 0.39, "grad_norm": 0.5625, "learning_rate": 0.00015412744324748068, "loss": 0.9439, "step": 26920 }, { "epoch": 0.39, "grad_norm": 0.494140625, "learning_rate": 0.00015410638996141927, "loss": 0.8591, "step": 26925 }, { "epoch": 0.39, "grad_norm": 0.515625, "learning_rate": 0.0001540853332839077, "loss": 0.9574, "step": 26930 }, { "epoch": 0.39, "grad_norm": 0.54296875, "learning_rate": 0.00015406427321626586, "loss": 1.0304, "step": 26935 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.00015404320975981382, "loss": 0.8397, "step": 26940 }, { "epoch": 0.39, "grad_norm": 0.67578125, "learning_rate": 0.0001540221429158718, "loss": 1.142, "step": 26945 }, { "epoch": 0.39, "grad_norm": 0.5859375, "learning_rate": 0.00015400107268576037, "loss": 1.2038, "step": 26950 }, { "epoch": 0.39, "grad_norm": 0.5625, "learning_rate": 0.00015397999907080015, "loss": 1.0749, "step": 26955 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.00015395892207231216, "loss": 0.9764, "step": 26960 }, { "epoch": 0.39, "grad_norm": 0.51171875, "learning_rate": 0.00015393784169161746, "loss": 0.8951, "step": 26965 }, { "epoch": 0.39, "grad_norm": 0.59765625, "learning_rate": 0.00015391675793003742, "loss": 1.0228, "step": 26970 }, { "epoch": 0.39, "grad_norm": 0.5625, "learning_rate": 0.00015389567078889356, "loss": 0.9191, "step": 26975 }, { "epoch": 0.39, "grad_norm": 0.6796875, "learning_rate": 0.0001538745802695077, "loss": 1.1133, "step": 26980 }, { "epoch": 0.39, "grad_norm": 0.5859375, "learning_rate": 0.00015385348637320182, "loss": 0.9428, "step": 26985 }, { "epoch": 0.39, "grad_norm": 0.62890625, "learning_rate": 0.00015383238910129804, "loss": 0.8891, "step": 26990 }, { "epoch": 0.39, "grad_norm": 0.6875, "learning_rate": 0.0001538112884551188, "loss": 0.9961, "step": 26995 }, { "epoch": 0.39, "grad_norm": 0.58984375, "learning_rate": 0.00015379018443598672, "loss": 0.9033, "step": 27000 }, { "epoch": 0.39, "grad_norm": 0.6171875, "learning_rate": 0.00015376907704522464, "loss": 0.9199, "step": 27005 }, { "epoch": 0.39, "grad_norm": 0.5546875, "learning_rate": 0.00015374796628415556, "loss": 1.0566, "step": 27010 }, { "epoch": 0.39, "grad_norm": 0.5234375, "learning_rate": 0.00015372685215410273, "loss": 1.057, "step": 27015 }, { "epoch": 0.39, "grad_norm": 0.57421875, "learning_rate": 0.00015370573465638962, "loss": 1.0201, "step": 27020 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.00015368461379233986, "loss": 0.9872, "step": 27025 }, { "epoch": 0.39, "grad_norm": 0.703125, "learning_rate": 0.00015366348956327743, "loss": 0.9782, "step": 27030 }, { "epoch": 0.39, "grad_norm": 0.52734375, "learning_rate": 0.00015364236197052634, "loss": 0.9006, "step": 27035 }, { "epoch": 0.39, "grad_norm": 0.57421875, "learning_rate": 0.0001536212310154109, "loss": 0.9762, "step": 27040 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.00015360009669925564, "loss": 0.8898, "step": 27045 }, { "epoch": 0.39, "grad_norm": 0.54296875, "learning_rate": 0.00015357895902338523, "loss": 0.9673, "step": 27050 }, { "epoch": 0.39, "grad_norm": 0.62109375, "learning_rate": 0.00015355781798912467, "loss": 1.0248, "step": 27055 }, { "epoch": 0.39, "grad_norm": 0.6640625, "learning_rate": 0.00015353667359779908, "loss": 1.0253, "step": 27060 }, { "epoch": 0.39, "grad_norm": 0.6328125, "learning_rate": 0.00015351552585073384, "loss": 0.9211, "step": 27065 }, { "epoch": 0.39, "grad_norm": 0.55078125, "learning_rate": 0.0001534943747492545, "loss": 0.7949, "step": 27070 }, { "epoch": 0.39, "grad_norm": 0.55859375, "learning_rate": 0.0001534732202946868, "loss": 0.9069, "step": 27075 }, { "epoch": 0.39, "grad_norm": 0.5078125, "learning_rate": 0.00015345206248835674, "loss": 1.0244, "step": 27080 }, { "epoch": 0.39, "grad_norm": 0.5703125, "learning_rate": 0.00015343090133159053, "loss": 0.9724, "step": 27085 }, { "epoch": 0.39, "grad_norm": 0.5546875, "learning_rate": 0.00015340973682571459, "loss": 0.9931, "step": 27090 }, { "epoch": 0.39, "grad_norm": 0.52734375, "learning_rate": 0.00015338856897205552, "loss": 0.9437, "step": 27095 }, { "epoch": 0.39, "grad_norm": 0.609375, "learning_rate": 0.00015336739777194013, "loss": 0.9188, "step": 27100 }, { "epoch": 0.39, "grad_norm": 0.70703125, "learning_rate": 0.00015334622322669543, "loss": 1.1199, "step": 27105 }, { "epoch": 0.39, "grad_norm": 0.515625, "learning_rate": 0.00015332504533764876, "loss": 0.9538, "step": 27110 }, { "epoch": 0.39, "grad_norm": 0.48828125, "learning_rate": 0.0001533038641061275, "loss": 0.9649, "step": 27115 }, { "epoch": 0.39, "grad_norm": 0.66015625, "learning_rate": 0.00015328267953345934, "loss": 1.0045, "step": 27120 }, { "epoch": 0.39, "grad_norm": 0.5390625, "learning_rate": 0.00015326149162097212, "loss": 1.0143, "step": 27125 }, { "epoch": 0.39, "grad_norm": 0.4921875, "learning_rate": 0.00015324030036999395, "loss": 0.969, "step": 27130 }, { "epoch": 0.39, "grad_norm": 0.455078125, "learning_rate": 0.0001532191057818531, "loss": 0.9579, "step": 27135 }, { "epoch": 0.39, "grad_norm": 0.5078125, "learning_rate": 0.0001531979078578781, "loss": 0.8333, "step": 27140 }, { "epoch": 0.39, "grad_norm": 0.5078125, "learning_rate": 0.00015317670659939768, "loss": 0.941, "step": 27145 }, { "epoch": 0.39, "grad_norm": 0.59375, "learning_rate": 0.00015315550200774072, "loss": 0.9879, "step": 27150 }, { "epoch": 0.39, "grad_norm": 0.52734375, "learning_rate": 0.00015313429408423632, "loss": 0.8847, "step": 27155 }, { "epoch": 0.39, "grad_norm": 0.5703125, "learning_rate": 0.00015311308283021385, "loss": 0.9423, "step": 27160 }, { "epoch": 0.39, "grad_norm": 0.54296875, "learning_rate": 0.0001530918682470029, "loss": 0.9994, "step": 27165 }, { "epoch": 0.39, "grad_norm": 0.578125, "learning_rate": 0.00015307065033593316, "loss": 0.9144, "step": 27170 }, { "epoch": 0.39, "grad_norm": 0.458984375, "learning_rate": 0.00015304942909833463, "loss": 0.9032, "step": 27175 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.00015302820453553745, "loss": 0.9647, "step": 27180 }, { "epoch": 0.39, "grad_norm": 0.59375, "learning_rate": 0.000153006976648872, "loss": 0.9738, "step": 27185 }, { "epoch": 0.39, "grad_norm": 0.50390625, "learning_rate": 0.0001529857454396689, "loss": 0.9856, "step": 27190 }, { "epoch": 0.39, "grad_norm": 0.671875, "learning_rate": 0.00015296451090925897, "loss": 0.8408, "step": 27195 }, { "epoch": 0.39, "grad_norm": 0.57421875, "learning_rate": 0.0001529432730589731, "loss": 0.9164, "step": 27200 }, { "epoch": 0.39, "grad_norm": 0.6015625, "learning_rate": 0.00015292203189014262, "loss": 1.1075, "step": 27205 }, { "epoch": 0.39, "grad_norm": 0.59375, "learning_rate": 0.0001529007874040989, "loss": 0.8321, "step": 27210 }, { "epoch": 0.39, "grad_norm": 0.5859375, "learning_rate": 0.00015287953960217357, "loss": 0.9266, "step": 27215 }, { "epoch": 0.39, "grad_norm": 0.51171875, "learning_rate": 0.0001528582884856985, "loss": 1.0955, "step": 27220 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.0001528370340560057, "loss": 0.8284, "step": 27225 }, { "epoch": 0.39, "grad_norm": 0.5703125, "learning_rate": 0.0001528157763144274, "loss": 0.9035, "step": 27230 }, { "epoch": 0.39, "grad_norm": 0.51953125, "learning_rate": 0.0001527945152622961, "loss": 0.7278, "step": 27235 }, { "epoch": 0.39, "grad_norm": 0.65234375, "learning_rate": 0.00015277325090094443, "loss": 0.9253, "step": 27240 }, { "epoch": 0.39, "grad_norm": 0.49609375, "learning_rate": 0.00015275198323170535, "loss": 0.8693, "step": 27245 }, { "epoch": 0.39, "grad_norm": 0.58203125, "learning_rate": 0.00015273071225591187, "loss": 0.9612, "step": 27250 }, { "epoch": 0.39, "grad_norm": 0.54296875, "learning_rate": 0.00015270943797489724, "loss": 0.9629, "step": 27255 }, { "epoch": 0.39, "grad_norm": 0.54296875, "learning_rate": 0.00015268816038999504, "loss": 0.9402, "step": 27260 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.00015266687950253894, "loss": 0.9483, "step": 27265 }, { "epoch": 0.39, "grad_norm": 0.57421875, "learning_rate": 0.00015264559531386285, "loss": 0.9645, "step": 27270 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.00015262430782530092, "loss": 1.0064, "step": 27275 }, { "epoch": 0.39, "grad_norm": 0.58203125, "learning_rate": 0.0001526030170381874, "loss": 0.9229, "step": 27280 }, { "epoch": 0.39, "grad_norm": 0.6015625, "learning_rate": 0.00015258172295385688, "loss": 1.114, "step": 27285 }, { "epoch": 0.39, "grad_norm": 0.55859375, "learning_rate": 0.00015256042557364405, "loss": 1.021, "step": 27290 }, { "epoch": 0.39, "grad_norm": 0.6015625, "learning_rate": 0.0001525391248988839, "loss": 1.0266, "step": 27295 }, { "epoch": 0.39, "grad_norm": 0.51953125, "learning_rate": 0.0001525178209309116, "loss": 1.1505, "step": 27300 }, { "epoch": 0.39, "grad_norm": 0.5859375, "learning_rate": 0.0001524965136710624, "loss": 0.9701, "step": 27305 }, { "epoch": 0.39, "grad_norm": 0.64453125, "learning_rate": 0.00015247520312067198, "loss": 1.1152, "step": 27310 }, { "epoch": 0.39, "grad_norm": 0.5, "learning_rate": 0.00015245388928107606, "loss": 0.8663, "step": 27315 }, { "epoch": 0.39, "grad_norm": 0.55078125, "learning_rate": 0.00015243257215361063, "loss": 0.8906, "step": 27320 }, { "epoch": 0.39, "grad_norm": 0.4921875, "learning_rate": 0.00015241125173961185, "loss": 0.9771, "step": 27325 }, { "epoch": 0.39, "grad_norm": 0.52734375, "learning_rate": 0.00015238992804041612, "loss": 0.9428, "step": 27330 }, { "epoch": 0.39, "grad_norm": 0.6015625, "learning_rate": 0.00015236860105736003, "loss": 0.9696, "step": 27335 }, { "epoch": 0.39, "grad_norm": 0.578125, "learning_rate": 0.00015234727079178038, "loss": 0.9693, "step": 27340 }, { "epoch": 0.39, "grad_norm": 0.61328125, "learning_rate": 0.00015232593724501419, "loss": 0.8225, "step": 27345 }, { "epoch": 0.39, "grad_norm": 0.59375, "learning_rate": 0.00015230460041839862, "loss": 1.0572, "step": 27350 }, { "epoch": 0.39, "grad_norm": 0.546875, "learning_rate": 0.0001522832603132712, "loss": 0.923, "step": 27355 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.00015226191693096944, "loss": 0.9713, "step": 27360 }, { "epoch": 0.39, "grad_norm": 0.578125, "learning_rate": 0.00015224057027283117, "loss": 1.0176, "step": 27365 }, { "epoch": 0.39, "grad_norm": 0.62109375, "learning_rate": 0.0001522192203401945, "loss": 0.9373, "step": 27370 }, { "epoch": 0.39, "grad_norm": 0.5625, "learning_rate": 0.00015219786713439762, "loss": 1.1225, "step": 27375 }, { "epoch": 0.39, "grad_norm": 0.5625, "learning_rate": 0.00015217651065677898, "loss": 0.8477, "step": 27380 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.00015215515090867722, "loss": 0.9262, "step": 27385 }, { "epoch": 0.39, "grad_norm": 0.6015625, "learning_rate": 0.0001521337878914312, "loss": 0.8049, "step": 27390 }, { "epoch": 0.39, "grad_norm": 0.5703125, "learning_rate": 0.00015211242160637997, "loss": 0.942, "step": 27395 }, { "epoch": 0.39, "grad_norm": 0.59375, "learning_rate": 0.00015209105205486284, "loss": 0.9257, "step": 27400 }, { "epoch": 0.39, "grad_norm": 0.54296875, "learning_rate": 0.00015206967923821923, "loss": 0.9311, "step": 27405 }, { "epoch": 0.39, "grad_norm": 0.62109375, "learning_rate": 0.0001520483031577888, "loss": 0.9391, "step": 27410 }, { "epoch": 0.39, "grad_norm": 0.6875, "learning_rate": 0.00015202692381491146, "loss": 0.9197, "step": 27415 }, { "epoch": 0.39, "grad_norm": 0.6640625, "learning_rate": 0.00015200554121092726, "loss": 1.159, "step": 27420 }, { "epoch": 0.39, "grad_norm": 0.58984375, "learning_rate": 0.00015198415534717653, "loss": 0.9105, "step": 27425 }, { "epoch": 0.39, "grad_norm": 0.66015625, "learning_rate": 0.00015196276622499977, "loss": 0.933, "step": 27430 }, { "epoch": 0.39, "grad_norm": 0.46484375, "learning_rate": 0.0001519413738457376, "loss": 0.8771, "step": 27435 }, { "epoch": 0.39, "grad_norm": 0.55859375, "learning_rate": 0.00015191997821073098, "loss": 0.9563, "step": 27440 }, { "epoch": 0.39, "grad_norm": 0.59375, "learning_rate": 0.00015189857932132098, "loss": 0.9751, "step": 27445 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.00015187717717884893, "loss": 0.8955, "step": 27450 }, { "epoch": 0.39, "grad_norm": 0.5234375, "learning_rate": 0.00015185577178465631, "loss": 0.8752, "step": 27455 }, { "epoch": 0.39, "grad_norm": 0.51171875, "learning_rate": 0.00015183436314008487, "loss": 0.8825, "step": 27460 }, { "epoch": 0.39, "grad_norm": 0.6328125, "learning_rate": 0.00015181295124647653, "loss": 0.9836, "step": 27465 }, { "epoch": 0.39, "grad_norm": 0.51953125, "learning_rate": 0.00015179153610517338, "loss": 1.0022, "step": 27470 }, { "epoch": 0.39, "grad_norm": 0.625, "learning_rate": 0.00015177011771751777, "loss": 0.8779, "step": 27475 }, { "epoch": 0.39, "grad_norm": 0.640625, "learning_rate": 0.0001517486960848522, "loss": 0.9912, "step": 27480 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.00015172727120851947, "loss": 0.9619, "step": 27485 }, { "epoch": 0.39, "grad_norm": 0.6171875, "learning_rate": 0.0001517058430898624, "loss": 1.092, "step": 27490 }, { "epoch": 0.39, "grad_norm": 0.609375, "learning_rate": 0.00015168441173022426, "loss": 0.928, "step": 27495 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.00015166297713094828, "loss": 0.8879, "step": 27500 }, { "epoch": 0.39, "grad_norm": 0.5703125, "learning_rate": 0.0001516415392933781, "loss": 1.0288, "step": 27505 }, { "epoch": 0.39, "grad_norm": 0.5234375, "learning_rate": 0.00015162009821885738, "loss": 0.9421, "step": 27510 }, { "epoch": 0.39, "grad_norm": 0.57421875, "learning_rate": 0.00015159865390873014, "loss": 0.9683, "step": 27515 }, { "epoch": 0.39, "grad_norm": 0.5703125, "learning_rate": 0.0001515772063643405, "loss": 0.8493, "step": 27520 }, { "epoch": 0.39, "grad_norm": 0.498046875, "learning_rate": 0.00015155575558703282, "loss": 1.0029, "step": 27525 }, { "epoch": 0.39, "grad_norm": 0.5625, "learning_rate": 0.00015153430157815168, "loss": 0.9784, "step": 27530 }, { "epoch": 0.39, "grad_norm": 0.5859375, "learning_rate": 0.0001515128443390418, "loss": 0.9032, "step": 27535 }, { "epoch": 0.4, "grad_norm": 0.6328125, "learning_rate": 0.00015149138387104817, "loss": 0.9295, "step": 27540 }, { "epoch": 0.4, "grad_norm": 0.578125, "learning_rate": 0.00015146992017551597, "loss": 0.9388, "step": 27545 }, { "epoch": 0.4, "grad_norm": 0.56640625, "learning_rate": 0.00015144845325379053, "loss": 0.9346, "step": 27550 }, { "epoch": 0.4, "grad_norm": 0.59375, "learning_rate": 0.00015142698310721748, "loss": 1.0505, "step": 27555 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.00015140550973714251, "loss": 1.0346, "step": 27560 }, { "epoch": 0.4, "grad_norm": 0.62109375, "learning_rate": 0.0001513840331449117, "loss": 0.9626, "step": 27565 }, { "epoch": 0.4, "grad_norm": 0.4765625, "learning_rate": 0.00015136255333187115, "loss": 1.002, "step": 27570 }, { "epoch": 0.4, "grad_norm": 0.46484375, "learning_rate": 0.00015134107029936725, "loss": 0.8865, "step": 27575 }, { "epoch": 0.4, "grad_norm": 0.578125, "learning_rate": 0.0001513195840487466, "loss": 0.9616, "step": 27580 }, { "epoch": 0.4, "grad_norm": 0.57421875, "learning_rate": 0.00015129809458135597, "loss": 0.9437, "step": 27585 }, { "epoch": 0.4, "grad_norm": 0.54296875, "learning_rate": 0.00015127660189854237, "loss": 1.066, "step": 27590 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.00015125510600165295, "loss": 0.8881, "step": 27595 }, { "epoch": 0.4, "grad_norm": 0.478515625, "learning_rate": 0.00015123360689203507, "loss": 0.8513, "step": 27600 }, { "epoch": 0.4, "grad_norm": 0.55078125, "learning_rate": 0.00015121210457103642, "loss": 0.9547, "step": 27605 }, { "epoch": 0.4, "grad_norm": 0.5859375, "learning_rate": 0.00015119059904000466, "loss": 0.9742, "step": 27610 }, { "epoch": 0.4, "grad_norm": 0.58984375, "learning_rate": 0.00015116909030028793, "loss": 0.8072, "step": 27615 }, { "epoch": 0.4, "grad_norm": 0.56640625, "learning_rate": 0.0001511475783532343, "loss": 0.8026, "step": 27620 }, { "epoch": 0.4, "grad_norm": 0.52734375, "learning_rate": 0.0001511260632001922, "loss": 0.9882, "step": 27625 }, { "epoch": 0.4, "grad_norm": 0.54296875, "learning_rate": 0.00015110454484251027, "loss": 0.8986, "step": 27630 }, { "epoch": 0.4, "grad_norm": 0.61328125, "learning_rate": 0.0001510830232815372, "loss": 0.9226, "step": 27635 }, { "epoch": 0.4, "grad_norm": 0.53125, "learning_rate": 0.00015106149851862213, "loss": 0.954, "step": 27640 }, { "epoch": 0.4, "grad_norm": 0.52734375, "learning_rate": 0.00015103997055511414, "loss": 0.9185, "step": 27645 }, { "epoch": 0.4, "grad_norm": 0.5703125, "learning_rate": 0.00015101843939236263, "loss": 0.8623, "step": 27650 }, { "epoch": 0.4, "grad_norm": 0.515625, "learning_rate": 0.0001509969050317173, "loss": 1.0706, "step": 27655 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.00015097536747452785, "loss": 1.0242, "step": 27660 }, { "epoch": 0.4, "grad_norm": 0.498046875, "learning_rate": 0.00015095382672214428, "loss": 0.8957, "step": 27665 }, { "epoch": 0.4, "grad_norm": 0.60546875, "learning_rate": 0.00015093228277591688, "loss": 0.8766, "step": 27670 }, { "epoch": 0.4, "grad_norm": 0.5078125, "learning_rate": 0.00015091073563719596, "loss": 0.874, "step": 27675 }, { "epoch": 0.4, "grad_norm": 0.53125, "learning_rate": 0.00015088918530733217, "loss": 0.8828, "step": 27680 }, { "epoch": 0.4, "grad_norm": 0.58984375, "learning_rate": 0.00015086763178767627, "loss": 0.8816, "step": 27685 }, { "epoch": 0.4, "grad_norm": 0.5, "learning_rate": 0.00015084607507957924, "loss": 0.8533, "step": 27690 }, { "epoch": 0.4, "grad_norm": 0.54296875, "learning_rate": 0.00015082451518439238, "loss": 0.9149, "step": 27695 }, { "epoch": 0.4, "grad_norm": 0.58984375, "learning_rate": 0.000150802952103467, "loss": 1.0048, "step": 27700 }, { "epoch": 0.4, "grad_norm": 0.58203125, "learning_rate": 0.0001507813858381547, "loss": 0.9428, "step": 27705 }, { "epoch": 0.4, "grad_norm": 0.53125, "learning_rate": 0.00015075981638980733, "loss": 0.8444, "step": 27710 }, { "epoch": 0.4, "grad_norm": 0.578125, "learning_rate": 0.0001507382437597768, "loss": 0.9639, "step": 27715 }, { "epoch": 0.4, "grad_norm": 0.58984375, "learning_rate": 0.00015071666794941544, "loss": 0.9995, "step": 27720 }, { "epoch": 0.4, "grad_norm": 0.515625, "learning_rate": 0.00015069508896007553, "loss": 0.9617, "step": 27725 }, { "epoch": 0.4, "grad_norm": 0.55859375, "learning_rate": 0.00015067350679310971, "loss": 0.901, "step": 27730 }, { "epoch": 0.4, "grad_norm": 0.6953125, "learning_rate": 0.00015065192144987077, "loss": 1.0665, "step": 27735 }, { "epoch": 0.4, "grad_norm": 0.52734375, "learning_rate": 0.00015063033293171173, "loss": 1.0087, "step": 27740 }, { "epoch": 0.4, "grad_norm": 0.58984375, "learning_rate": 0.00015060874123998575, "loss": 1.0189, "step": 27745 }, { "epoch": 0.4, "grad_norm": 0.55078125, "learning_rate": 0.00015058714637604623, "loss": 0.858, "step": 27750 }, { "epoch": 0.4, "grad_norm": 0.55859375, "learning_rate": 0.00015056554834124675, "loss": 0.9466, "step": 27755 }, { "epoch": 0.4, "grad_norm": 0.57421875, "learning_rate": 0.0001505439471369411, "loss": 0.9885, "step": 27760 }, { "epoch": 0.4, "grad_norm": 0.55078125, "learning_rate": 0.0001505223427644833, "loss": 0.9319, "step": 27765 }, { "epoch": 0.4, "grad_norm": 0.76171875, "learning_rate": 0.00015050073522522751, "loss": 1.2127, "step": 27770 }, { "epoch": 0.4, "grad_norm": 0.5546875, "learning_rate": 0.00015047912452052813, "loss": 1.0039, "step": 27775 }, { "epoch": 0.4, "grad_norm": 0.51953125, "learning_rate": 0.00015045751065173972, "loss": 0.9694, "step": 27780 }, { "epoch": 0.4, "grad_norm": 0.5859375, "learning_rate": 0.00015043589362021708, "loss": 1.0176, "step": 27785 }, { "epoch": 0.4, "grad_norm": 0.57421875, "learning_rate": 0.0001504142734273152, "loss": 1.0114, "step": 27790 }, { "epoch": 0.4, "grad_norm": 0.55078125, "learning_rate": 0.0001503926500743892, "loss": 0.8299, "step": 27795 }, { "epoch": 0.4, "grad_norm": 0.5546875, "learning_rate": 0.00015037102356279457, "loss": 0.961, "step": 27800 }, { "epoch": 0.4, "grad_norm": 0.67578125, "learning_rate": 0.00015034939389388678, "loss": 1.0191, "step": 27805 }, { "epoch": 0.4, "grad_norm": 0.67578125, "learning_rate": 0.0001503277610690216, "loss": 0.8792, "step": 27810 }, { "epoch": 0.4, "grad_norm": 0.56640625, "learning_rate": 0.0001503061250895551, "loss": 0.9465, "step": 27815 }, { "epoch": 0.4, "grad_norm": 0.55859375, "learning_rate": 0.00015028448595684336, "loss": 1.059, "step": 27820 }, { "epoch": 0.4, "grad_norm": 0.6171875, "learning_rate": 0.00015026284367224276, "loss": 0.9291, "step": 27825 }, { "epoch": 0.4, "grad_norm": 0.58203125, "learning_rate": 0.00015024119823710987, "loss": 0.9444, "step": 27830 }, { "epoch": 0.4, "grad_norm": 0.447265625, "learning_rate": 0.00015021954965280148, "loss": 0.8182, "step": 27835 }, { "epoch": 0.4, "grad_norm": 0.5546875, "learning_rate": 0.0001501978979206745, "loss": 0.9046, "step": 27840 }, { "epoch": 0.4, "grad_norm": 0.54296875, "learning_rate": 0.0001501762430420861, "loss": 1.1551, "step": 27845 }, { "epoch": 0.4, "grad_norm": 0.53515625, "learning_rate": 0.00015015458501839367, "loss": 0.9656, "step": 27850 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.00015013292385095475, "loss": 0.9725, "step": 27855 }, { "epoch": 0.4, "grad_norm": 0.484375, "learning_rate": 0.000150111259541127, "loss": 0.9957, "step": 27860 }, { "epoch": 0.4, "grad_norm": 0.55078125, "learning_rate": 0.00015008959209026848, "loss": 1.0478, "step": 27865 }, { "epoch": 0.4, "grad_norm": 0.5546875, "learning_rate": 0.0001500679214997373, "loss": 0.9713, "step": 27870 }, { "epoch": 0.4, "grad_norm": 0.56640625, "learning_rate": 0.0001500462477708917, "loss": 0.948, "step": 27875 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00015002457090509033, "loss": 1.055, "step": 27880 }, { "epoch": 0.4, "grad_norm": 0.5078125, "learning_rate": 0.0001500028909036919, "loss": 0.9306, "step": 27885 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.0001499812077680553, "loss": 0.8554, "step": 27890 }, { "epoch": 0.4, "grad_norm": 0.5703125, "learning_rate": 0.0001499595214995397, "loss": 0.9094, "step": 27895 }, { "epoch": 0.4, "grad_norm": 0.50390625, "learning_rate": 0.00014993783209950437, "loss": 0.9488, "step": 27900 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00014991613956930885, "loss": 0.8167, "step": 27905 }, { "epoch": 0.4, "grad_norm": 0.466796875, "learning_rate": 0.00014989444391031283, "loss": 0.8708, "step": 27910 }, { "epoch": 0.4, "grad_norm": 0.55859375, "learning_rate": 0.00014987274512387631, "loss": 0.9122, "step": 27915 }, { "epoch": 0.4, "grad_norm": 0.60546875, "learning_rate": 0.00014985104321135927, "loss": 1.0521, "step": 27920 }, { "epoch": 0.4, "grad_norm": 0.6015625, "learning_rate": 0.00014982933817412207, "loss": 1.0315, "step": 27925 }, { "epoch": 0.4, "grad_norm": 0.62890625, "learning_rate": 0.00014980763001352522, "loss": 1.0003, "step": 27930 }, { "epoch": 0.4, "grad_norm": 0.5703125, "learning_rate": 0.00014978591873092938, "loss": 0.9905, "step": 27935 }, { "epoch": 0.4, "grad_norm": 0.56640625, "learning_rate": 0.00014976420432769545, "loss": 0.9145, "step": 27940 }, { "epoch": 0.4, "grad_norm": 0.54296875, "learning_rate": 0.00014974248680518453, "loss": 0.8594, "step": 27945 }, { "epoch": 0.4, "grad_norm": 0.625, "learning_rate": 0.00014972076616475785, "loss": 0.9862, "step": 27950 }, { "epoch": 0.4, "grad_norm": 0.5703125, "learning_rate": 0.00014969904240777696, "loss": 1.0253, "step": 27955 }, { "epoch": 0.4, "grad_norm": 0.6328125, "learning_rate": 0.0001496773155356035, "loss": 0.9986, "step": 27960 }, { "epoch": 0.4, "grad_norm": 0.5859375, "learning_rate": 0.0001496555855495993, "loss": 1.0846, "step": 27965 }, { "epoch": 0.4, "grad_norm": 0.6875, "learning_rate": 0.00014963385245112643, "loss": 0.9269, "step": 27970 }, { "epoch": 0.4, "grad_norm": 0.53515625, "learning_rate": 0.00014961211624154715, "loss": 1.0928, "step": 27975 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.00014959037692222396, "loss": 0.9256, "step": 27980 }, { "epoch": 0.4, "grad_norm": 0.5546875, "learning_rate": 0.00014956863449451947, "loss": 0.8836, "step": 27985 }, { "epoch": 0.4, "grad_norm": 0.53515625, "learning_rate": 0.0001495468889597965, "loss": 0.9389, "step": 27990 }, { "epoch": 0.4, "grad_norm": 0.50390625, "learning_rate": 0.0001495251403194181, "loss": 0.932, "step": 27995 }, { "epoch": 0.4, "grad_norm": 0.55859375, "learning_rate": 0.00014950338857474751, "loss": 1.0042, "step": 28000 }, { "epoch": 0.4, "grad_norm": 0.5390625, "learning_rate": 0.00014948163372714812, "loss": 0.9303, "step": 28005 }, { "epoch": 0.4, "grad_norm": 0.59765625, "learning_rate": 0.0001494598757779836, "loss": 1.0224, "step": 28010 }, { "epoch": 0.4, "grad_norm": 0.53515625, "learning_rate": 0.0001494381147286177, "loss": 0.9624, "step": 28015 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.0001494163505804145, "loss": 1.1779, "step": 28020 }, { "epoch": 0.4, "grad_norm": 0.54296875, "learning_rate": 0.00014939458333473814, "loss": 0.8101, "step": 28025 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00014937281299295306, "loss": 0.8563, "step": 28030 }, { "epoch": 0.4, "grad_norm": 0.515625, "learning_rate": 0.00014935103955642385, "loss": 0.9414, "step": 28035 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.00014932926302651525, "loss": 1.0134, "step": 28040 }, { "epoch": 0.4, "grad_norm": 0.53125, "learning_rate": 0.00014930748340459223, "loss": 1.0407, "step": 28045 }, { "epoch": 0.4, "grad_norm": 0.53515625, "learning_rate": 0.00014928570069202004, "loss": 0.9893, "step": 28050 }, { "epoch": 0.4, "grad_norm": 0.59765625, "learning_rate": 0.000149263914890164, "loss": 0.8799, "step": 28055 }, { "epoch": 0.4, "grad_norm": 0.69140625, "learning_rate": 0.00014924212600038962, "loss": 1.0822, "step": 28060 }, { "epoch": 0.4, "grad_norm": 0.53515625, "learning_rate": 0.00014922033402406273, "loss": 0.9435, "step": 28065 }, { "epoch": 0.4, "grad_norm": 0.5390625, "learning_rate": 0.00014919853896254925, "loss": 0.9677, "step": 28070 }, { "epoch": 0.4, "grad_norm": 0.58203125, "learning_rate": 0.00014917674081721532, "loss": 0.9603, "step": 28075 }, { "epoch": 0.4, "grad_norm": 0.66015625, "learning_rate": 0.00014915493958942726, "loss": 0.8813, "step": 28080 }, { "epoch": 0.4, "grad_norm": 0.6171875, "learning_rate": 0.0001491331352805516, "loss": 0.9499, "step": 28085 }, { "epoch": 0.4, "grad_norm": 0.5390625, "learning_rate": 0.00014911132789195507, "loss": 1.0815, "step": 28090 }, { "epoch": 0.4, "grad_norm": 0.6015625, "learning_rate": 0.0001490895174250046, "loss": 0.9011, "step": 28095 }, { "epoch": 0.4, "grad_norm": 0.53515625, "learning_rate": 0.00014906770388106722, "loss": 0.9064, "step": 28100 }, { "epoch": 0.4, "grad_norm": 0.58203125, "learning_rate": 0.0001490458872615103, "loss": 0.936, "step": 28105 }, { "epoch": 0.4, "grad_norm": 0.58984375, "learning_rate": 0.00014902406756770131, "loss": 1.0428, "step": 28110 }, { "epoch": 0.4, "grad_norm": 0.640625, "learning_rate": 0.00014900224480100794, "loss": 1.0521, "step": 28115 }, { "epoch": 0.4, "grad_norm": 0.466796875, "learning_rate": 0.00014898041896279805, "loss": 0.8189, "step": 28120 }, { "epoch": 0.4, "grad_norm": 0.5859375, "learning_rate": 0.0001489585900544397, "loss": 0.8784, "step": 28125 }, { "epoch": 0.4, "grad_norm": 0.59765625, "learning_rate": 0.00014893675807730117, "loss": 0.9528, "step": 28130 }, { "epoch": 0.4, "grad_norm": 0.4765625, "learning_rate": 0.0001489149230327509, "loss": 1.0401, "step": 28135 }, { "epoch": 0.4, "grad_norm": 0.6015625, "learning_rate": 0.00014889308492215756, "loss": 0.9957, "step": 28140 }, { "epoch": 0.4, "grad_norm": 0.62109375, "learning_rate": 0.00014887124374688999, "loss": 1.0581, "step": 28145 }, { "epoch": 0.4, "grad_norm": 0.63671875, "learning_rate": 0.00014884939950831716, "loss": 0.9572, "step": 28150 }, { "epoch": 0.4, "grad_norm": 0.62109375, "learning_rate": 0.00014882755220780837, "loss": 0.8592, "step": 28155 }, { "epoch": 0.4, "grad_norm": 0.515625, "learning_rate": 0.000148805701846733, "loss": 0.8136, "step": 28160 }, { "epoch": 0.4, "grad_norm": 0.6171875, "learning_rate": 0.0001487838484264606, "loss": 0.9009, "step": 28165 }, { "epoch": 0.4, "grad_norm": 0.5625, "learning_rate": 0.0001487619919483611, "loss": 0.9096, "step": 28170 }, { "epoch": 0.4, "grad_norm": 0.68359375, "learning_rate": 0.00014874013241380436, "loss": 0.9505, "step": 28175 }, { "epoch": 0.4, "grad_norm": 0.625, "learning_rate": 0.00014871826982416062, "loss": 0.9748, "step": 28180 }, { "epoch": 0.4, "grad_norm": 0.69140625, "learning_rate": 0.00014869640418080024, "loss": 1.0135, "step": 28185 }, { "epoch": 0.4, "grad_norm": 0.515625, "learning_rate": 0.00014867453548509377, "loss": 0.9244, "step": 28190 }, { "epoch": 0.4, "grad_norm": 0.578125, "learning_rate": 0.00014865266373841204, "loss": 1.0503, "step": 28195 }, { "epoch": 0.4, "grad_norm": 0.66015625, "learning_rate": 0.00014863078894212587, "loss": 0.9883, "step": 28200 }, { "epoch": 0.4, "grad_norm": 0.5703125, "learning_rate": 0.00014860891109760646, "loss": 1.0211, "step": 28205 }, { "epoch": 0.4, "grad_norm": 0.458984375, "learning_rate": 0.0001485870302062252, "loss": 0.9979, "step": 28210 }, { "epoch": 0.4, "grad_norm": 0.55078125, "learning_rate": 0.0001485651462693535, "loss": 0.9707, "step": 28215 }, { "epoch": 0.4, "grad_norm": 0.51953125, "learning_rate": 0.00014854325928836314, "loss": 0.8423, "step": 28220 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00014852136926462602, "loss": 0.878, "step": 28225 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.0001484994761995142, "loss": 0.9854, "step": 28230 }, { "epoch": 0.41, "grad_norm": 0.6171875, "learning_rate": 0.00014847758009439995, "loss": 1.0014, "step": 28235 }, { "epoch": 0.41, "grad_norm": 0.57421875, "learning_rate": 0.00014845568095065578, "loss": 0.9161, "step": 28240 }, { "epoch": 0.41, "grad_norm": 0.5546875, "learning_rate": 0.00014843377876965437, "loss": 0.9832, "step": 28245 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.00014841187355276852, "loss": 1.0405, "step": 28250 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.0001483899653013713, "loss": 0.9107, "step": 28255 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.00014836805401683597, "loss": 0.843, "step": 28260 }, { "epoch": 0.41, "grad_norm": 0.498046875, "learning_rate": 0.00014834613970053588, "loss": 0.9375, "step": 28265 }, { "epoch": 0.41, "grad_norm": 0.62890625, "learning_rate": 0.0001483242223538447, "loss": 1.0245, "step": 28270 }, { "epoch": 0.41, "grad_norm": 0.58984375, "learning_rate": 0.00014830230197813627, "loss": 0.9011, "step": 28275 }, { "epoch": 0.41, "grad_norm": 0.58203125, "learning_rate": 0.00014828037857478451, "loss": 0.8082, "step": 28280 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.00014825845214516364, "loss": 0.9006, "step": 28285 }, { "epoch": 0.41, "grad_norm": 0.58984375, "learning_rate": 0.00014823652269064803, "loss": 0.9635, "step": 28290 }, { "epoch": 0.41, "grad_norm": 0.59375, "learning_rate": 0.00014821459021261224, "loss": 0.9608, "step": 28295 }, { "epoch": 0.41, "grad_norm": 1.0625, "learning_rate": 0.00014819265471243103, "loss": 1.0338, "step": 28300 }, { "epoch": 0.41, "grad_norm": 0.57421875, "learning_rate": 0.00014817071619147932, "loss": 1.0203, "step": 28305 }, { "epoch": 0.41, "grad_norm": 0.55078125, "learning_rate": 0.00014814877465113227, "loss": 1.1001, "step": 28310 }, { "epoch": 0.41, "grad_norm": 0.5390625, "learning_rate": 0.00014812683009276517, "loss": 0.944, "step": 28315 }, { "epoch": 0.41, "grad_norm": 0.59765625, "learning_rate": 0.00014810488251775357, "loss": 1.0509, "step": 28320 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.00014808293192747313, "loss": 0.9797, "step": 28325 }, { "epoch": 0.41, "grad_norm": 0.66796875, "learning_rate": 0.00014806097832329975, "loss": 0.88, "step": 28330 }, { "epoch": 0.41, "grad_norm": 0.62890625, "learning_rate": 0.00014803902170660953, "loss": 1.0054, "step": 28335 }, { "epoch": 0.41, "grad_norm": 0.5859375, "learning_rate": 0.0001480170620787787, "loss": 1.0066, "step": 28340 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.00014799509944118374, "loss": 1.0437, "step": 28345 }, { "epoch": 0.41, "grad_norm": 0.58984375, "learning_rate": 0.00014797313379520132, "loss": 0.9357, "step": 28350 }, { "epoch": 0.41, "grad_norm": 0.55078125, "learning_rate": 0.00014795116514220818, "loss": 0.9221, "step": 28355 }, { "epoch": 0.41, "grad_norm": 0.56640625, "learning_rate": 0.00014792919348358144, "loss": 0.913, "step": 28360 }, { "epoch": 0.41, "grad_norm": 0.5078125, "learning_rate": 0.00014790721882069823, "loss": 0.8986, "step": 28365 }, { "epoch": 0.41, "grad_norm": 0.66796875, "learning_rate": 0.000147885241154936, "loss": 0.9824, "step": 28370 }, { "epoch": 0.41, "grad_norm": 0.62890625, "learning_rate": 0.00014786326048767232, "loss": 0.9492, "step": 28375 }, { "epoch": 0.41, "grad_norm": 0.5546875, "learning_rate": 0.000147841276820285, "loss": 0.865, "step": 28380 }, { "epoch": 0.41, "grad_norm": 0.546875, "learning_rate": 0.0001478192901541519, "loss": 1.1203, "step": 28385 }, { "epoch": 0.41, "grad_norm": 0.55859375, "learning_rate": 0.00014779730049065124, "loss": 0.8508, "step": 28390 }, { "epoch": 0.41, "grad_norm": 0.55078125, "learning_rate": 0.00014777530783116136, "loss": 0.9426, "step": 28395 }, { "epoch": 0.41, "grad_norm": 0.5, "learning_rate": 0.00014775331217706077, "loss": 1.1074, "step": 28400 }, { "epoch": 0.41, "grad_norm": 0.60546875, "learning_rate": 0.0001477313135297282, "loss": 0.9565, "step": 28405 }, { "epoch": 0.41, "grad_norm": 0.53125, "learning_rate": 0.00014770931189054252, "loss": 0.9497, "step": 28410 }, { "epoch": 0.41, "grad_norm": 0.5625, "learning_rate": 0.00014768730726088286, "loss": 0.8835, "step": 28415 }, { "epoch": 0.41, "grad_norm": 0.59375, "learning_rate": 0.00014766529964212844, "loss": 1.0526, "step": 28420 }, { "epoch": 0.41, "grad_norm": 0.484375, "learning_rate": 0.00014764328903565875, "loss": 0.9905, "step": 28425 }, { "epoch": 0.41, "grad_norm": 0.50390625, "learning_rate": 0.0001476212754428535, "loss": 0.9627, "step": 28430 }, { "epoch": 0.41, "grad_norm": 0.6171875, "learning_rate": 0.00014759925886509241, "loss": 0.8762, "step": 28435 }, { "epoch": 0.41, "grad_norm": 0.63671875, "learning_rate": 0.00014757723930375555, "loss": 0.9423, "step": 28440 }, { "epoch": 0.41, "grad_norm": 0.5546875, "learning_rate": 0.0001475552167602232, "loss": 1.1222, "step": 28445 }, { "epoch": 0.41, "grad_norm": 0.64453125, "learning_rate": 0.00014753319123587567, "loss": 1.0916, "step": 28450 }, { "epoch": 0.41, "grad_norm": 0.50390625, "learning_rate": 0.00014751116273209358, "loss": 0.9933, "step": 28455 }, { "epoch": 0.41, "grad_norm": 0.55859375, "learning_rate": 0.00014748913125025773, "loss": 0.9168, "step": 28460 }, { "epoch": 0.41, "grad_norm": 0.56640625, "learning_rate": 0.000147467096791749, "loss": 0.9554, "step": 28465 }, { "epoch": 0.41, "grad_norm": 0.5078125, "learning_rate": 0.00014744505935794858, "loss": 0.8522, "step": 28470 }, { "epoch": 0.41, "grad_norm": 0.5625, "learning_rate": 0.00014742301895023785, "loss": 1.1012, "step": 28475 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.00014740097556999824, "loss": 0.9742, "step": 28480 }, { "epoch": 0.41, "grad_norm": 0.5625, "learning_rate": 0.00014737892921861155, "loss": 1.1805, "step": 28485 }, { "epoch": 0.41, "grad_norm": 0.56640625, "learning_rate": 0.00014735687989745957, "loss": 0.9158, "step": 28490 }, { "epoch": 0.41, "grad_norm": 0.59765625, "learning_rate": 0.0001473348276079244, "loss": 1.0106, "step": 28495 }, { "epoch": 0.41, "grad_norm": 0.56640625, "learning_rate": 0.00014731277235138839, "loss": 1.0168, "step": 28500 }, { "epoch": 0.41, "grad_norm": 0.60546875, "learning_rate": 0.0001472907141292339, "loss": 0.8944, "step": 28505 }, { "epoch": 0.41, "grad_norm": 0.5390625, "learning_rate": 0.00014726865294284358, "loss": 0.8638, "step": 28510 }, { "epoch": 0.41, "grad_norm": 0.52734375, "learning_rate": 0.00014724658879360027, "loss": 0.7973, "step": 28515 }, { "epoch": 0.41, "grad_norm": 0.486328125, "learning_rate": 0.00014722452168288694, "loss": 0.7767, "step": 28520 }, { "epoch": 0.41, "grad_norm": 0.5859375, "learning_rate": 0.00014720245161208682, "loss": 0.9628, "step": 28525 }, { "epoch": 0.41, "grad_norm": 0.53515625, "learning_rate": 0.0001471803785825833, "loss": 0.9037, "step": 28530 }, { "epoch": 0.41, "grad_norm": 0.5703125, "learning_rate": 0.00014715830259575988, "loss": 1.0658, "step": 28535 }, { "epoch": 0.41, "grad_norm": 0.8046875, "learning_rate": 0.00014713622365300038, "loss": 0.971, "step": 28540 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.00014711414175568865, "loss": 1.1279, "step": 28545 }, { "epoch": 0.41, "grad_norm": 0.58984375, "learning_rate": 0.00014709205690520888, "loss": 0.9409, "step": 28550 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.00014706996910294535, "loss": 1.0696, "step": 28555 }, { "epoch": 0.41, "grad_norm": 0.62890625, "learning_rate": 0.00014704787835028257, "loss": 0.8597, "step": 28560 }, { "epoch": 0.41, "grad_norm": 0.5859375, "learning_rate": 0.00014702578464860516, "loss": 0.9773, "step": 28565 }, { "epoch": 0.41, "grad_norm": 0.6171875, "learning_rate": 0.00014700368799929804, "loss": 0.9193, "step": 28570 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.00014698158840374619, "loss": 0.9545, "step": 28575 }, { "epoch": 0.41, "grad_norm": 0.5390625, "learning_rate": 0.0001469594858633349, "loss": 0.9189, "step": 28580 }, { "epoch": 0.41, "grad_norm": 0.63671875, "learning_rate": 0.00014693738037944954, "loss": 0.9593, "step": 28585 }, { "epoch": 0.41, "grad_norm": 0.6015625, "learning_rate": 0.00014691527195347573, "loss": 0.9682, "step": 28590 }, { "epoch": 0.41, "grad_norm": 0.5, "learning_rate": 0.00014689316058679922, "loss": 0.8859, "step": 28595 }, { "epoch": 0.41, "grad_norm": 0.55078125, "learning_rate": 0.000146871046280806, "loss": 1.1101, "step": 28600 }, { "epoch": 0.41, "grad_norm": 0.58203125, "learning_rate": 0.00014684892903688224, "loss": 0.9146, "step": 28605 }, { "epoch": 0.41, "grad_norm": 0.490234375, "learning_rate": 0.00014682680885641424, "loss": 0.989, "step": 28610 }, { "epoch": 0.41, "grad_norm": 0.5703125, "learning_rate": 0.00014680468574078853, "loss": 0.9693, "step": 28615 }, { "epoch": 0.41, "grad_norm": 0.55078125, "learning_rate": 0.00014678255969139184, "loss": 0.9098, "step": 28620 }, { "epoch": 0.41, "grad_norm": 0.53125, "learning_rate": 0.00014676043070961097, "loss": 0.9663, "step": 28625 }, { "epoch": 0.41, "grad_norm": 0.494140625, "learning_rate": 0.0001467382987968331, "loss": 0.8683, "step": 28630 }, { "epoch": 0.41, "grad_norm": 0.5546875, "learning_rate": 0.0001467161639544454, "loss": 0.9827, "step": 28635 }, { "epoch": 0.41, "grad_norm": 0.56640625, "learning_rate": 0.00014669402618383535, "loss": 1.0172, "step": 28640 }, { "epoch": 0.41, "grad_norm": 0.5703125, "learning_rate": 0.00014667188548639056, "loss": 1.003, "step": 28645 }, { "epoch": 0.41, "grad_norm": 0.65234375, "learning_rate": 0.00014664974186349883, "loss": 1.0974, "step": 28650 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.00014662759531654812, "loss": 0.8167, "step": 28655 }, { "epoch": 0.41, "grad_norm": 0.51953125, "learning_rate": 0.0001466054458469267, "loss": 0.959, "step": 28660 }, { "epoch": 0.41, "grad_norm": 0.6484375, "learning_rate": 0.00014658329345602282, "loss": 1.1323, "step": 28665 }, { "epoch": 0.41, "grad_norm": 0.5546875, "learning_rate": 0.00014656113814522502, "loss": 0.9222, "step": 28670 }, { "epoch": 0.41, "grad_norm": 0.48828125, "learning_rate": 0.0001465389799159221, "loss": 0.926, "step": 28675 }, { "epoch": 0.41, "grad_norm": 0.52734375, "learning_rate": 0.00014651681876950287, "loss": 1.0413, "step": 28680 }, { "epoch": 0.41, "grad_norm": 0.6328125, "learning_rate": 0.0001464946547073565, "loss": 0.9231, "step": 28685 }, { "epoch": 0.41, "grad_norm": 0.5625, "learning_rate": 0.00014647248773087219, "loss": 0.9352, "step": 28690 }, { "epoch": 0.41, "grad_norm": 0.546875, "learning_rate": 0.00014645031784143946, "loss": 0.8001, "step": 28695 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.00014642814504044787, "loss": 0.9051, "step": 28700 }, { "epoch": 0.41, "grad_norm": 0.53515625, "learning_rate": 0.0001464059693292873, "loss": 0.9376, "step": 28705 }, { "epoch": 0.41, "grad_norm": 0.60546875, "learning_rate": 0.00014638379070934767, "loss": 1.1241, "step": 28710 }, { "epoch": 0.41, "grad_norm": 0.5390625, "learning_rate": 0.00014636160918201927, "loss": 0.9326, "step": 28715 }, { "epoch": 0.41, "grad_norm": 0.59375, "learning_rate": 0.0001463394247486924, "loss": 0.9006, "step": 28720 }, { "epoch": 0.41, "grad_norm": 0.484375, "learning_rate": 0.00014631723741075759, "loss": 1.0671, "step": 28725 }, { "epoch": 0.41, "grad_norm": 0.53515625, "learning_rate": 0.00014629504716960558, "loss": 1.1036, "step": 28730 }, { "epoch": 0.41, "grad_norm": 0.6875, "learning_rate": 0.0001462728540266273, "loss": 1.0322, "step": 28735 }, { "epoch": 0.41, "grad_norm": 0.62109375, "learning_rate": 0.00014625065798321382, "loss": 0.9795, "step": 28740 }, { "epoch": 0.41, "grad_norm": 0.57421875, "learning_rate": 0.00014622845904075643, "loss": 0.9386, "step": 28745 }, { "epoch": 0.41, "grad_norm": 0.6171875, "learning_rate": 0.00014620625720064657, "loss": 1.0119, "step": 28750 }, { "epoch": 0.41, "grad_norm": 0.640625, "learning_rate": 0.00014618405246427592, "loss": 1.0182, "step": 28755 }, { "epoch": 0.41, "grad_norm": 0.6328125, "learning_rate": 0.00014616184483303622, "loss": 0.9796, "step": 28760 }, { "epoch": 0.41, "grad_norm": 0.5078125, "learning_rate": 0.00014613963430831948, "loss": 0.9254, "step": 28765 }, { "epoch": 0.41, "grad_norm": 0.5234375, "learning_rate": 0.000146117420891518, "loss": 0.8252, "step": 28770 }, { "epoch": 0.41, "grad_norm": 0.62890625, "learning_rate": 0.00014609520458402404, "loss": 0.8693, "step": 28775 }, { "epoch": 0.41, "grad_norm": 0.478515625, "learning_rate": 0.0001460729853872301, "loss": 0.9317, "step": 28780 }, { "epoch": 0.41, "grad_norm": 0.53125, "learning_rate": 0.000146050763302529, "loss": 0.8887, "step": 28785 }, { "epoch": 0.41, "grad_norm": 0.5859375, "learning_rate": 0.00014602853833131361, "loss": 0.9777, "step": 28790 }, { "epoch": 0.41, "grad_norm": 0.57421875, "learning_rate": 0.00014600631047497698, "loss": 0.9747, "step": 28795 }, { "epoch": 0.41, "grad_norm": 0.55859375, "learning_rate": 0.00014598407973491248, "loss": 1.0291, "step": 28800 }, { "epoch": 0.41, "grad_norm": 0.6953125, "learning_rate": 0.00014596184611251345, "loss": 0.9252, "step": 28805 }, { "epoch": 0.41, "grad_norm": 0.5234375, "learning_rate": 0.00014593960960917354, "loss": 0.8716, "step": 28810 }, { "epoch": 0.41, "grad_norm": 0.64453125, "learning_rate": 0.00014591737022628663, "loss": 1.051, "step": 28815 }, { "epoch": 0.41, "grad_norm": 0.5625, "learning_rate": 0.0001458951279652466, "loss": 0.9231, "step": 28820 }, { "epoch": 0.41, "grad_norm": 0.63671875, "learning_rate": 0.00014587288282744774, "loss": 0.9758, "step": 28825 }, { "epoch": 0.41, "grad_norm": 0.61328125, "learning_rate": 0.0001458506348142843, "loss": 0.9687, "step": 28830 }, { "epoch": 0.41, "grad_norm": 0.451171875, "learning_rate": 0.00014582838392715087, "loss": 0.8625, "step": 28835 }, { "epoch": 0.41, "grad_norm": 0.56640625, "learning_rate": 0.00014580613016744213, "loss": 0.9746, "step": 28840 }, { "epoch": 0.41, "grad_norm": 0.51953125, "learning_rate": 0.00014578387353655296, "loss": 0.9406, "step": 28845 }, { "epoch": 0.41, "grad_norm": 0.53515625, "learning_rate": 0.0001457616140358785, "loss": 0.8996, "step": 28850 }, { "epoch": 0.41, "grad_norm": 0.578125, "learning_rate": 0.00014573935166681392, "loss": 0.9932, "step": 28855 }, { "epoch": 0.41, "grad_norm": 0.5859375, "learning_rate": 0.00014571708643075468, "loss": 0.8784, "step": 28860 }, { "epoch": 0.41, "grad_norm": 0.5625, "learning_rate": 0.0001456948183290964, "loss": 1.002, "step": 28865 }, { "epoch": 0.41, "grad_norm": 0.55859375, "learning_rate": 0.0001456725473632349, "loss": 0.9869, "step": 28870 }, { "epoch": 0.41, "grad_norm": 0.609375, "learning_rate": 0.00014565027353456608, "loss": 0.9852, "step": 28875 }, { "epoch": 0.41, "grad_norm": 0.5625, "learning_rate": 0.00014562799684448617, "loss": 0.8989, "step": 28880 }, { "epoch": 0.41, "grad_norm": 0.5703125, "learning_rate": 0.00014560571729439138, "loss": 0.9583, "step": 28885 }, { "epoch": 0.41, "grad_norm": 0.55859375, "learning_rate": 0.00014558343488567835, "loss": 0.9679, "step": 28890 }, { "epoch": 0.41, "grad_norm": 0.63671875, "learning_rate": 0.00014556114961974368, "loss": 1.1131, "step": 28895 }, { "epoch": 0.41, "grad_norm": 0.56640625, "learning_rate": 0.00014553886149798424, "loss": 1.0294, "step": 28900 }, { "epoch": 0.41, "grad_norm": 0.60546875, "learning_rate": 0.00014551657052179712, "loss": 0.971, "step": 28905 }, { "epoch": 0.41, "grad_norm": 0.6796875, "learning_rate": 0.00014549427669257955, "loss": 0.9676, "step": 28910 }, { "epoch": 0.41, "grad_norm": 0.6171875, "learning_rate": 0.00014547198001172885, "loss": 0.9163, "step": 28915 }, { "epoch": 0.41, "grad_norm": 0.53125, "learning_rate": 0.00014544968048064267, "loss": 1.026, "step": 28920 }, { "epoch": 0.41, "grad_norm": 0.58984375, "learning_rate": 0.00014542737810071879, "loss": 0.8377, "step": 28925 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.00014540507287335506, "loss": 0.9355, "step": 28930 }, { "epoch": 0.42, "grad_norm": 0.6015625, "learning_rate": 0.0001453827647999497, "loss": 0.8847, "step": 28935 }, { "epoch": 0.42, "grad_norm": 0.484375, "learning_rate": 0.00014536045388190093, "loss": 1.0317, "step": 28940 }, { "epoch": 0.42, "grad_norm": 0.578125, "learning_rate": 0.0001453381401206072, "loss": 0.8708, "step": 28945 }, { "epoch": 0.42, "grad_norm": 0.55859375, "learning_rate": 0.0001453158235174673, "loss": 0.9982, "step": 28950 }, { "epoch": 0.42, "grad_norm": 0.64453125, "learning_rate": 0.00014529350407387995, "loss": 0.9613, "step": 28955 }, { "epoch": 0.42, "grad_norm": 0.5390625, "learning_rate": 0.00014527118179124415, "loss": 0.8559, "step": 28960 }, { "epoch": 0.42, "grad_norm": 0.478515625, "learning_rate": 0.00014524885667095914, "loss": 1.0255, "step": 28965 }, { "epoch": 0.42, "grad_norm": 0.58203125, "learning_rate": 0.00014522652871442425, "loss": 0.9109, "step": 28970 }, { "epoch": 0.42, "grad_norm": 0.54296875, "learning_rate": 0.000145204197923039, "loss": 0.8796, "step": 28975 }, { "epoch": 0.42, "grad_norm": 0.65625, "learning_rate": 0.0001451818642982032, "loss": 1.0481, "step": 28980 }, { "epoch": 0.42, "grad_norm": 0.5703125, "learning_rate": 0.00014515952784131665, "loss": 0.9254, "step": 28985 }, { "epoch": 0.42, "grad_norm": 0.578125, "learning_rate": 0.00014513718855377949, "loss": 1.0509, "step": 28990 }, { "epoch": 0.42, "grad_norm": 0.490234375, "learning_rate": 0.0001451148464369919, "loss": 0.9491, "step": 28995 }, { "epoch": 0.42, "grad_norm": 0.5390625, "learning_rate": 0.00014509250149235438, "loss": 0.9705, "step": 29000 }, { "epoch": 0.42, "grad_norm": 0.50390625, "learning_rate": 0.00014507015372126753, "loss": 0.9703, "step": 29005 }, { "epoch": 0.42, "grad_norm": 0.640625, "learning_rate": 0.00014504780312513208, "loss": 1.0045, "step": 29010 }, { "epoch": 0.42, "grad_norm": 0.5, "learning_rate": 0.00014502544970534906, "loss": 0.9571, "step": 29015 }, { "epoch": 0.42, "grad_norm": 0.671875, "learning_rate": 0.00014500309346331954, "loss": 1.0786, "step": 29020 }, { "epoch": 0.42, "grad_norm": 0.59765625, "learning_rate": 0.00014498073440044487, "loss": 0.9729, "step": 29025 }, { "epoch": 0.42, "grad_norm": 0.640625, "learning_rate": 0.00014495837251812655, "loss": 0.9124, "step": 29030 }, { "epoch": 0.42, "grad_norm": 0.5625, "learning_rate": 0.00014493600781776626, "loss": 0.8797, "step": 29035 }, { "epoch": 0.42, "grad_norm": 0.51171875, "learning_rate": 0.0001449136403007658, "loss": 0.8734, "step": 29040 }, { "epoch": 0.42, "grad_norm": 0.5625, "learning_rate": 0.0001448912699685272, "loss": 1.0155, "step": 29045 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.00014486889682245268, "loss": 0.9959, "step": 29050 }, { "epoch": 0.42, "grad_norm": 0.48046875, "learning_rate": 0.0001448465208639446, "loss": 0.9737, "step": 29055 }, { "epoch": 0.42, "grad_norm": 0.5234375, "learning_rate": 0.00014482414209440555, "loss": 0.7726, "step": 29060 }, { "epoch": 0.42, "grad_norm": 0.486328125, "learning_rate": 0.0001448017605152382, "loss": 0.8841, "step": 29065 }, { "epoch": 0.42, "grad_norm": 0.53515625, "learning_rate": 0.0001447793761278455, "loss": 0.8884, "step": 29070 }, { "epoch": 0.42, "grad_norm": 0.53515625, "learning_rate": 0.00014475698893363044, "loss": 0.896, "step": 29075 }, { "epoch": 0.42, "grad_norm": 0.5625, "learning_rate": 0.00014473459893399638, "loss": 1.1103, "step": 29080 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.00014471220613034675, "loss": 1.0756, "step": 29085 }, { "epoch": 0.42, "grad_norm": 0.65234375, "learning_rate": 0.00014468981052408506, "loss": 1.0535, "step": 29090 }, { "epoch": 0.42, "grad_norm": 0.515625, "learning_rate": 0.00014466741211661516, "loss": 0.8998, "step": 29095 }, { "epoch": 0.42, "grad_norm": 0.458984375, "learning_rate": 0.000144645010909341, "loss": 0.9208, "step": 29100 }, { "epoch": 0.42, "grad_norm": 1.0078125, "learning_rate": 0.00014462260690366668, "loss": 1.0528, "step": 29105 }, { "epoch": 0.42, "grad_norm": 0.61328125, "learning_rate": 0.0001446002001009966, "loss": 1.0644, "step": 29110 }, { "epoch": 0.42, "grad_norm": 0.578125, "learning_rate": 0.0001445777905027352, "loss": 0.9569, "step": 29115 }, { "epoch": 0.42, "grad_norm": 0.55859375, "learning_rate": 0.00014455537811028702, "loss": 0.9952, "step": 29120 }, { "epoch": 0.42, "grad_norm": 0.6484375, "learning_rate": 0.00014453296292505708, "loss": 1.0159, "step": 29125 }, { "epoch": 0.42, "grad_norm": 0.61328125, "learning_rate": 0.00014451054494845025, "loss": 0.8709, "step": 29130 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.0001444881241818718, "loss": 0.92, "step": 29135 }, { "epoch": 0.42, "grad_norm": 0.6640625, "learning_rate": 0.00014446570062672705, "loss": 1.0585, "step": 29140 }, { "epoch": 0.42, "grad_norm": 0.53515625, "learning_rate": 0.00014444327428442155, "loss": 1.0007, "step": 29145 }, { "epoch": 0.42, "grad_norm": 0.60546875, "learning_rate": 0.000144420845156361, "loss": 0.9254, "step": 29150 }, { "epoch": 0.42, "grad_norm": 0.6640625, "learning_rate": 0.0001443984132439513, "loss": 0.9686, "step": 29155 }, { "epoch": 0.42, "grad_norm": 0.57421875, "learning_rate": 0.00014437597854859846, "loss": 0.9258, "step": 29160 }, { "epoch": 0.42, "grad_norm": 0.5234375, "learning_rate": 0.00014435354107170876, "loss": 0.8689, "step": 29165 }, { "epoch": 0.42, "grad_norm": 0.6328125, "learning_rate": 0.00014433110081468859, "loss": 0.9698, "step": 29170 }, { "epoch": 0.42, "grad_norm": 0.65625, "learning_rate": 0.00014430865777894454, "loss": 1.0518, "step": 29175 }, { "epoch": 0.42, "grad_norm": 0.58203125, "learning_rate": 0.00014428621196588336, "loss": 0.9741, "step": 29180 }, { "epoch": 0.42, "grad_norm": 0.609375, "learning_rate": 0.00014426376337691195, "loss": 0.8741, "step": 29185 }, { "epoch": 0.42, "grad_norm": 0.5859375, "learning_rate": 0.00014424131201343747, "loss": 0.9883, "step": 29190 }, { "epoch": 0.42, "grad_norm": 0.60546875, "learning_rate": 0.00014421885787686714, "loss": 0.9213, "step": 29195 }, { "epoch": 0.42, "grad_norm": 0.58984375, "learning_rate": 0.00014419640096860844, "loss": 0.9058, "step": 29200 }, { "epoch": 0.42, "grad_norm": 0.54296875, "learning_rate": 0.000144173941290069, "loss": 0.892, "step": 29205 }, { "epoch": 0.42, "grad_norm": 0.6796875, "learning_rate": 0.0001441514788426566, "loss": 0.9142, "step": 29210 }, { "epoch": 0.42, "grad_norm": 0.6171875, "learning_rate": 0.00014412901362777922, "loss": 1.0063, "step": 29215 }, { "epoch": 0.42, "grad_norm": 0.60546875, "learning_rate": 0.00014410654564684505, "loss": 1.0145, "step": 29220 }, { "epoch": 0.42, "grad_norm": 0.57421875, "learning_rate": 0.00014408407490126233, "loss": 0.9077, "step": 29225 }, { "epoch": 0.42, "grad_norm": 0.5546875, "learning_rate": 0.00014406160139243958, "loss": 1.0044, "step": 29230 }, { "epoch": 0.42, "grad_norm": 0.69921875, "learning_rate": 0.0001440391251217855, "loss": 1.1243, "step": 29235 }, { "epoch": 0.42, "grad_norm": 0.5859375, "learning_rate": 0.00014401664609070889, "loss": 1.1431, "step": 29240 }, { "epoch": 0.42, "grad_norm": 0.69140625, "learning_rate": 0.00014399416430061875, "loss": 0.789, "step": 29245 }, { "epoch": 0.42, "grad_norm": 0.5703125, "learning_rate": 0.0001439716797529243, "loss": 0.8925, "step": 29250 }, { "epoch": 0.42, "grad_norm": 0.53515625, "learning_rate": 0.0001439491924490349, "loss": 0.9749, "step": 29255 }, { "epoch": 0.42, "grad_norm": 0.6640625, "learning_rate": 0.00014392670239036004, "loss": 1.0446, "step": 29260 }, { "epoch": 0.42, "grad_norm": 0.60546875, "learning_rate": 0.00014390420957830947, "loss": 0.8864, "step": 29265 }, { "epoch": 0.42, "grad_norm": 0.61328125, "learning_rate": 0.00014388171401429306, "loss": 0.9329, "step": 29270 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.00014385921569972079, "loss": 0.843, "step": 29275 }, { "epoch": 0.42, "grad_norm": 0.62890625, "learning_rate": 0.00014383671463600293, "loss": 0.8605, "step": 29280 }, { "epoch": 0.42, "grad_norm": 0.67578125, "learning_rate": 0.00014381421082454988, "loss": 0.9682, "step": 29285 }, { "epoch": 0.42, "grad_norm": 0.5390625, "learning_rate": 0.0001437917042667722, "loss": 0.8813, "step": 29290 }, { "epoch": 0.42, "grad_norm": 0.578125, "learning_rate": 0.00014376919496408063, "loss": 1.0273, "step": 29295 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.00014374668291788608, "loss": 1.1245, "step": 29300 }, { "epoch": 0.42, "grad_norm": 0.55859375, "learning_rate": 0.0001437241681295996, "loss": 0.9684, "step": 29305 }, { "epoch": 0.42, "grad_norm": 0.75, "learning_rate": 0.00014370165060063246, "loss": 0.858, "step": 29310 }, { "epoch": 0.42, "grad_norm": 0.55078125, "learning_rate": 0.0001436791303323961, "loss": 0.9644, "step": 29315 }, { "epoch": 0.42, "grad_norm": 0.5546875, "learning_rate": 0.0001436566073263021, "loss": 1.017, "step": 29320 }, { "epoch": 0.42, "grad_norm": 0.5390625, "learning_rate": 0.00014363408158376225, "loss": 0.9339, "step": 29325 }, { "epoch": 0.42, "grad_norm": 0.6015625, "learning_rate": 0.00014361155310618846, "loss": 1.0671, "step": 29330 }, { "epoch": 0.42, "grad_norm": 0.6171875, "learning_rate": 0.00014358902189499283, "loss": 0.9671, "step": 29335 }, { "epoch": 0.42, "grad_norm": 0.57421875, "learning_rate": 0.00014356648795158766, "loss": 0.9803, "step": 29340 }, { "epoch": 0.42, "grad_norm": 0.609375, "learning_rate": 0.00014354395127738544, "loss": 1.1102, "step": 29345 }, { "epoch": 0.42, "grad_norm": 0.7109375, "learning_rate": 0.00014352141187379876, "loss": 1.1862, "step": 29350 }, { "epoch": 0.42, "grad_norm": 0.5625, "learning_rate": 0.0001434988697422404, "loss": 0.9587, "step": 29355 }, { "epoch": 0.42, "grad_norm": 0.546875, "learning_rate": 0.00014347632488412337, "loss": 1.0016, "step": 29360 }, { "epoch": 0.42, "grad_norm": 0.7578125, "learning_rate": 0.00014345377730086076, "loss": 0.9902, "step": 29365 }, { "epoch": 0.42, "grad_norm": 0.578125, "learning_rate": 0.00014343122699386589, "loss": 0.9437, "step": 29370 }, { "epoch": 0.42, "grad_norm": 0.60546875, "learning_rate": 0.00014340867396455228, "loss": 0.9887, "step": 29375 }, { "epoch": 0.42, "grad_norm": 0.6171875, "learning_rate": 0.0001433861182143335, "loss": 0.8093, "step": 29380 }, { "epoch": 0.42, "grad_norm": 0.57421875, "learning_rate": 0.00014336355974462346, "loss": 1.0003, "step": 29385 }, { "epoch": 0.42, "grad_norm": 0.5, "learning_rate": 0.00014334099855683606, "loss": 0.871, "step": 29390 }, { "epoch": 0.42, "grad_norm": 0.53125, "learning_rate": 0.00014331843465238556, "loss": 1.0591, "step": 29395 }, { "epoch": 0.42, "grad_norm": 0.5703125, "learning_rate": 0.00014329586803268625, "loss": 0.9311, "step": 29400 }, { "epoch": 0.42, "grad_norm": 0.54296875, "learning_rate": 0.0001432732986991526, "loss": 0.9105, "step": 29405 }, { "epoch": 0.42, "grad_norm": 0.55859375, "learning_rate": 0.00014325072665319927, "loss": 0.9017, "step": 29410 }, { "epoch": 0.42, "grad_norm": 0.68359375, "learning_rate": 0.00014322815189624118, "loss": 1.0109, "step": 29415 }, { "epoch": 0.42, "grad_norm": 0.58203125, "learning_rate": 0.00014320557442969325, "loss": 0.9553, "step": 29420 }, { "epoch": 0.42, "grad_norm": 0.546875, "learning_rate": 0.00014318299425497075, "loss": 0.9934, "step": 29425 }, { "epoch": 0.42, "grad_norm": 0.65625, "learning_rate": 0.00014316041137348896, "loss": 1.0326, "step": 29430 }, { "epoch": 0.42, "grad_norm": 0.51953125, "learning_rate": 0.00014313782578666346, "loss": 0.8656, "step": 29435 }, { "epoch": 0.42, "grad_norm": 0.59765625, "learning_rate": 0.0001431152374959099, "loss": 1.0418, "step": 29440 }, { "epoch": 0.42, "grad_norm": 0.59765625, "learning_rate": 0.00014309264650264414, "loss": 1.0044, "step": 29445 }, { "epoch": 0.42, "grad_norm": 0.546875, "learning_rate": 0.0001430700528082822, "loss": 0.8477, "step": 29450 }, { "epoch": 0.42, "grad_norm": 0.515625, "learning_rate": 0.00014304745641424035, "loss": 1.0616, "step": 29455 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.00014302485732193486, "loss": 0.9678, "step": 29460 }, { "epoch": 0.42, "grad_norm": 0.50390625, "learning_rate": 0.00014300225553278234, "loss": 0.8745, "step": 29465 }, { "epoch": 0.42, "grad_norm": 0.5390625, "learning_rate": 0.00014297965104819946, "loss": 0.8087, "step": 29470 }, { "epoch": 0.42, "grad_norm": 0.6875, "learning_rate": 0.00014295704386960308, "loss": 1.0838, "step": 29475 }, { "epoch": 0.42, "grad_norm": 0.5859375, "learning_rate": 0.0001429344339984103, "loss": 0.9963, "step": 29480 }, { "epoch": 0.42, "grad_norm": 0.59765625, "learning_rate": 0.00014291182143603826, "loss": 0.8762, "step": 29485 }, { "epoch": 0.42, "grad_norm": 0.54296875, "learning_rate": 0.0001428892061839044, "loss": 0.839, "step": 29490 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.00014286658824342626, "loss": 0.8925, "step": 29495 }, { "epoch": 0.42, "grad_norm": 0.412109375, "learning_rate": 0.00014284396761602152, "loss": 0.8431, "step": 29500 }, { "epoch": 0.42, "grad_norm": 0.490234375, "learning_rate": 0.00014282134430310814, "loss": 0.9033, "step": 29505 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.00014279871830610413, "loss": 0.9588, "step": 29510 }, { "epoch": 0.42, "grad_norm": 0.59765625, "learning_rate": 0.00014277608962642765, "loss": 0.9106, "step": 29515 }, { "epoch": 0.42, "grad_norm": 0.53515625, "learning_rate": 0.00014275345826549718, "loss": 0.951, "step": 29520 }, { "epoch": 0.42, "grad_norm": 0.51171875, "learning_rate": 0.0001427308242247313, "loss": 0.8358, "step": 29525 }, { "epoch": 0.42, "grad_norm": 0.609375, "learning_rate": 0.00014270818750554862, "loss": 1.019, "step": 29530 }, { "epoch": 0.42, "grad_norm": 0.578125, "learning_rate": 0.00014268554810936818, "loss": 1.0055, "step": 29535 }, { "epoch": 0.42, "grad_norm": 0.498046875, "learning_rate": 0.00014266290603760892, "loss": 0.8804, "step": 29540 }, { "epoch": 0.42, "grad_norm": 0.5234375, "learning_rate": 0.00014264026129169014, "loss": 0.9749, "step": 29545 }, { "epoch": 0.42, "grad_norm": 0.52734375, "learning_rate": 0.0001426176138730312, "loss": 0.8841, "step": 29550 }, { "epoch": 0.42, "grad_norm": 0.490234375, "learning_rate": 0.0001425949637830517, "loss": 0.943, "step": 29555 }, { "epoch": 0.42, "grad_norm": 0.6796875, "learning_rate": 0.00014257231102317137, "loss": 1.0354, "step": 29560 }, { "epoch": 0.42, "grad_norm": 0.5546875, "learning_rate": 0.00014254965559481005, "loss": 0.8461, "step": 29565 }, { "epoch": 0.42, "grad_norm": 0.58203125, "learning_rate": 0.0001425269974993879, "loss": 0.959, "step": 29570 }, { "epoch": 0.42, "grad_norm": 0.58203125, "learning_rate": 0.00014250433673832513, "loss": 1.0894, "step": 29575 }, { "epoch": 0.42, "grad_norm": 0.5234375, "learning_rate": 0.0001424816733130421, "loss": 0.9041, "step": 29580 }, { "epoch": 0.42, "grad_norm": 0.59765625, "learning_rate": 0.0001424590072249594, "loss": 0.9017, "step": 29585 }, { "epoch": 0.42, "grad_norm": 0.5, "learning_rate": 0.00014243633847549778, "loss": 0.9902, "step": 29590 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.0001424136670660781, "loss": 1.0025, "step": 29595 }, { "epoch": 0.42, "grad_norm": 0.5859375, "learning_rate": 0.0001423909929981215, "loss": 0.8774, "step": 29600 }, { "epoch": 0.42, "grad_norm": 0.61328125, "learning_rate": 0.00014236831627304918, "loss": 0.9068, "step": 29605 }, { "epoch": 0.42, "grad_norm": 0.59375, "learning_rate": 0.00014234563689228253, "loss": 0.8929, "step": 29610 }, { "epoch": 0.42, "grad_norm": 0.52734375, "learning_rate": 0.00014232295485724314, "loss": 0.9433, "step": 29615 }, { "epoch": 0.42, "grad_norm": 0.53515625, "learning_rate": 0.00014230027016935273, "loss": 0.8688, "step": 29620 }, { "epoch": 0.42, "grad_norm": 0.59765625, "learning_rate": 0.0001422775828300332, "loss": 0.9854, "step": 29625 }, { "epoch": 0.43, "grad_norm": 0.5078125, "learning_rate": 0.00014225489284070667, "loss": 0.9152, "step": 29630 }, { "epoch": 0.43, "grad_norm": 0.5078125, "learning_rate": 0.0001422322002027953, "loss": 0.9432, "step": 29635 }, { "epoch": 0.43, "grad_norm": 0.60546875, "learning_rate": 0.0001422095049177215, "loss": 1.1606, "step": 29640 }, { "epoch": 0.43, "grad_norm": 0.58984375, "learning_rate": 0.00014218680698690792, "loss": 0.9514, "step": 29645 }, { "epoch": 0.43, "grad_norm": 0.58203125, "learning_rate": 0.00014216410641177717, "loss": 0.87, "step": 29650 }, { "epoch": 0.43, "grad_norm": 0.50390625, "learning_rate": 0.00014214140319375224, "loss": 0.9848, "step": 29655 }, { "epoch": 0.43, "grad_norm": 0.5625, "learning_rate": 0.00014211869733425615, "loss": 0.904, "step": 29660 }, { "epoch": 0.43, "grad_norm": 0.69140625, "learning_rate": 0.00014209598883471215, "loss": 0.9153, "step": 29665 }, { "epoch": 0.43, "grad_norm": 0.55859375, "learning_rate": 0.00014207327769654365, "loss": 0.9631, "step": 29670 }, { "epoch": 0.43, "grad_norm": 0.6328125, "learning_rate": 0.00014205056392117414, "loss": 1.0254, "step": 29675 }, { "epoch": 0.43, "grad_norm": 0.59765625, "learning_rate": 0.00014202784751002744, "loss": 0.898, "step": 29680 }, { "epoch": 0.43, "grad_norm": 0.60546875, "learning_rate": 0.00014200512846452735, "loss": 1.1391, "step": 29685 }, { "epoch": 0.43, "grad_norm": 0.59375, "learning_rate": 0.000141982406786098, "loss": 0.9663, "step": 29690 }, { "epoch": 0.43, "grad_norm": 0.50390625, "learning_rate": 0.00014195968247616357, "loss": 0.9682, "step": 29695 }, { "epoch": 0.43, "grad_norm": 0.63671875, "learning_rate": 0.00014193695553614845, "loss": 1.0978, "step": 29700 }, { "epoch": 0.43, "grad_norm": 0.56640625, "learning_rate": 0.00014191422596747716, "loss": 0.8681, "step": 29705 }, { "epoch": 0.43, "grad_norm": 0.57421875, "learning_rate": 0.00014189149377157452, "loss": 0.8885, "step": 29710 }, { "epoch": 0.43, "grad_norm": 0.55078125, "learning_rate": 0.0001418687589498653, "loss": 1.0428, "step": 29715 }, { "epoch": 0.43, "grad_norm": 0.62109375, "learning_rate": 0.00014184602150377456, "loss": 0.814, "step": 29720 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.0001418232814347276, "loss": 0.9806, "step": 29725 }, { "epoch": 0.43, "grad_norm": 0.546875, "learning_rate": 0.00014180053874414967, "loss": 0.907, "step": 29730 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.00014177779343346638, "loss": 0.9666, "step": 29735 }, { "epoch": 0.43, "grad_norm": 0.578125, "learning_rate": 0.00014175504550410342, "loss": 1.0781, "step": 29740 }, { "epoch": 0.43, "grad_norm": 0.578125, "learning_rate": 0.00014173229495748665, "loss": 0.9353, "step": 29745 }, { "epoch": 0.43, "grad_norm": 0.546875, "learning_rate": 0.00014170954179504213, "loss": 0.982, "step": 29750 }, { "epoch": 0.43, "grad_norm": 0.6484375, "learning_rate": 0.00014168678601819598, "loss": 0.9841, "step": 29755 }, { "epoch": 0.43, "grad_norm": 0.5625, "learning_rate": 0.00014166402762837464, "loss": 0.9569, "step": 29760 }, { "epoch": 0.43, "grad_norm": 0.54296875, "learning_rate": 0.00014164126662700457, "loss": 1.0546, "step": 29765 }, { "epoch": 0.43, "grad_norm": 0.57421875, "learning_rate": 0.0001416185030155125, "loss": 0.869, "step": 29770 }, { "epoch": 0.43, "grad_norm": 0.55078125, "learning_rate": 0.00014159573679532525, "loss": 0.9614, "step": 29775 }, { "epoch": 0.43, "grad_norm": 0.55859375, "learning_rate": 0.00014157296796786989, "loss": 0.9047, "step": 29780 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.0001415501965345735, "loss": 0.9159, "step": 29785 }, { "epoch": 0.43, "grad_norm": 0.65234375, "learning_rate": 0.00014152742249686347, "loss": 0.896, "step": 29790 }, { "epoch": 0.43, "grad_norm": 0.5, "learning_rate": 0.00014150464585616733, "loss": 0.9605, "step": 29795 }, { "epoch": 0.43, "grad_norm": 0.5390625, "learning_rate": 0.0001414818666139127, "loss": 0.9356, "step": 29800 }, { "epoch": 0.43, "grad_norm": 0.55859375, "learning_rate": 0.00014145908477152742, "loss": 0.9055, "step": 29805 }, { "epoch": 0.43, "grad_norm": 0.671875, "learning_rate": 0.00014143630033043952, "loss": 1.1045, "step": 29810 }, { "epoch": 0.43, "grad_norm": 0.58984375, "learning_rate": 0.00014141351329207706, "loss": 1.0658, "step": 29815 }, { "epoch": 0.43, "grad_norm": 0.58984375, "learning_rate": 0.0001413907236578685, "loss": 0.9089, "step": 29820 }, { "epoch": 0.43, "grad_norm": 0.59765625, "learning_rate": 0.00014136793142924218, "loss": 1.1593, "step": 29825 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00014134513660762682, "loss": 0.8836, "step": 29830 }, { "epoch": 0.43, "grad_norm": 0.515625, "learning_rate": 0.00014132233919445124, "loss": 0.8684, "step": 29835 }, { "epoch": 0.43, "grad_norm": 0.6328125, "learning_rate": 0.00014129953919114435, "loss": 0.9364, "step": 29840 }, { "epoch": 0.43, "grad_norm": 0.640625, "learning_rate": 0.0001412767365991353, "loss": 1.017, "step": 29845 }, { "epoch": 0.43, "grad_norm": 0.53515625, "learning_rate": 0.00014125393141985342, "loss": 0.9966, "step": 29850 }, { "epoch": 0.43, "grad_norm": 0.52734375, "learning_rate": 0.00014123112365472808, "loss": 1.0908, "step": 29855 }, { "epoch": 0.43, "grad_norm": 0.59375, "learning_rate": 0.000141208313305189, "loss": 0.9723, "step": 29860 }, { "epoch": 0.43, "grad_norm": 0.5859375, "learning_rate": 0.0001411855003726659, "loss": 0.927, "step": 29865 }, { "epoch": 0.43, "grad_norm": 0.62890625, "learning_rate": 0.0001411626848585887, "loss": 0.9745, "step": 29870 }, { "epoch": 0.43, "grad_norm": 0.4921875, "learning_rate": 0.0001411398667643876, "loss": 1.08, "step": 29875 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.00014111704609149275, "loss": 0.7581, "step": 29880 }, { "epoch": 0.43, "grad_norm": 0.51171875, "learning_rate": 0.0001410942228413346, "loss": 0.9267, "step": 29885 }, { "epoch": 0.43, "grad_norm": 0.59765625, "learning_rate": 0.0001410713970153438, "loss": 0.961, "step": 29890 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00014104856861495103, "loss": 0.9943, "step": 29895 }, { "epoch": 0.43, "grad_norm": 0.58203125, "learning_rate": 0.0001410257376415873, "loss": 0.9378, "step": 29900 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00014100290409668352, "loss": 0.9556, "step": 29905 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00014098006798167104, "loss": 1.1233, "step": 29910 }, { "epoch": 0.43, "grad_norm": 0.451171875, "learning_rate": 0.00014095722929798122, "loss": 0.7322, "step": 29915 }, { "epoch": 0.43, "grad_norm": 0.60546875, "learning_rate": 0.00014093438804704567, "loss": 0.925, "step": 29920 }, { "epoch": 0.43, "grad_norm": 0.703125, "learning_rate": 0.00014091154423029602, "loss": 0.9481, "step": 29925 }, { "epoch": 0.43, "grad_norm": 0.6015625, "learning_rate": 0.00014088869784916418, "loss": 0.9175, "step": 29930 }, { "epoch": 0.43, "grad_norm": 0.53515625, "learning_rate": 0.0001408658489050822, "loss": 0.9337, "step": 29935 }, { "epoch": 0.43, "grad_norm": 0.56640625, "learning_rate": 0.00014084299739948225, "loss": 0.8176, "step": 29940 }, { "epoch": 0.43, "grad_norm": 0.62109375, "learning_rate": 0.00014082014333379673, "loss": 0.9413, "step": 29945 }, { "epoch": 0.43, "grad_norm": 0.54296875, "learning_rate": 0.0001407972867094581, "loss": 0.8769, "step": 29950 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.00014077442752789913, "loss": 0.8575, "step": 29955 }, { "epoch": 0.43, "grad_norm": 0.578125, "learning_rate": 0.0001407515657905526, "loss": 0.9906, "step": 29960 }, { "epoch": 0.43, "grad_norm": 0.609375, "learning_rate": 0.00014072870149885148, "loss": 0.9162, "step": 29965 }, { "epoch": 0.43, "grad_norm": 0.57421875, "learning_rate": 0.00014070583465422898, "loss": 0.9681, "step": 29970 }, { "epoch": 0.43, "grad_norm": 0.69921875, "learning_rate": 0.00014068296525811838, "loss": 0.9826, "step": 29975 }, { "epoch": 0.43, "grad_norm": 0.64453125, "learning_rate": 0.0001406600933119532, "loss": 0.9846, "step": 29980 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00014063721881716707, "loss": 0.9939, "step": 29985 }, { "epoch": 0.43, "grad_norm": 0.52734375, "learning_rate": 0.00014061434177519376, "loss": 0.9893, "step": 29990 }, { "epoch": 0.43, "grad_norm": 0.66796875, "learning_rate": 0.00014059146218746728, "loss": 0.979, "step": 29995 }, { "epoch": 0.43, "grad_norm": 0.53125, "learning_rate": 0.00014056858005542173, "loss": 0.9693, "step": 30000 }, { "epoch": 0.43, "grad_norm": 0.57421875, "learning_rate": 0.00014054569538049134, "loss": 1.0135, "step": 30005 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.0001405228081641106, "loss": 0.9971, "step": 30010 }, { "epoch": 0.43, "grad_norm": 0.65625, "learning_rate": 0.0001404999184077141, "loss": 1.0018, "step": 30015 }, { "epoch": 0.43, "grad_norm": 0.6328125, "learning_rate": 0.00014047702611273658, "loss": 0.9322, "step": 30020 }, { "epoch": 0.43, "grad_norm": 0.66796875, "learning_rate": 0.000140454131280613, "loss": 0.9966, "step": 30025 }, { "epoch": 0.43, "grad_norm": 0.58984375, "learning_rate": 0.00014043123391277836, "loss": 1.0644, "step": 30030 }, { "epoch": 0.43, "grad_norm": 0.5546875, "learning_rate": 0.00014040833401066793, "loss": 1.1402, "step": 30035 }, { "epoch": 0.43, "grad_norm": 0.60546875, "learning_rate": 0.00014038543157571712, "loss": 0.9642, "step": 30040 }, { "epoch": 0.43, "grad_norm": 0.62890625, "learning_rate": 0.00014036252660936142, "loss": 0.9608, "step": 30045 }, { "epoch": 0.43, "grad_norm": 0.53515625, "learning_rate": 0.00014033961911303665, "loss": 0.9696, "step": 30050 }, { "epoch": 0.43, "grad_norm": 0.51953125, "learning_rate": 0.00014031670908817856, "loss": 0.9089, "step": 30055 }, { "epoch": 0.43, "grad_norm": 0.6015625, "learning_rate": 0.00014029379653622326, "loss": 0.993, "step": 30060 }, { "epoch": 0.43, "grad_norm": 0.63671875, "learning_rate": 0.0001402708814586069, "loss": 0.8867, "step": 30065 }, { "epoch": 0.43, "grad_norm": 0.515625, "learning_rate": 0.0001402479638567658, "loss": 0.9627, "step": 30070 }, { "epoch": 0.43, "grad_norm": 0.66796875, "learning_rate": 0.0001402250437321365, "loss": 0.9215, "step": 30075 }, { "epoch": 0.43, "grad_norm": 0.65234375, "learning_rate": 0.00014020212108615564, "loss": 0.9507, "step": 30080 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00014017919592026006, "loss": 0.9175, "step": 30085 }, { "epoch": 0.43, "grad_norm": 0.5546875, "learning_rate": 0.0001401562682358867, "loss": 0.9142, "step": 30090 }, { "epoch": 0.43, "grad_norm": 0.546875, "learning_rate": 0.00014013333803447275, "loss": 1.0266, "step": 30095 }, { "epoch": 0.43, "grad_norm": 0.546875, "learning_rate": 0.00014011040531745542, "loss": 0.8784, "step": 30100 }, { "epoch": 0.43, "grad_norm": 0.51171875, "learning_rate": 0.0001400874700862722, "loss": 0.8888, "step": 30105 }, { "epoch": 0.43, "grad_norm": 0.55078125, "learning_rate": 0.00014006453234236075, "loss": 1.0052, "step": 30110 }, { "epoch": 0.43, "grad_norm": 0.48046875, "learning_rate": 0.00014004159208715874, "loss": 0.9737, "step": 30115 }, { "epoch": 0.43, "grad_norm": 0.859375, "learning_rate": 0.00014001864932210417, "loss": 1.165, "step": 30120 }, { "epoch": 0.43, "grad_norm": 0.56640625, "learning_rate": 0.00013999570404863503, "loss": 1.0133, "step": 30125 }, { "epoch": 0.43, "grad_norm": 0.5390625, "learning_rate": 0.00013997275626818965, "loss": 0.9089, "step": 30130 }, { "epoch": 0.43, "grad_norm": 0.6015625, "learning_rate": 0.0001399498059822064, "loss": 1.0132, "step": 30135 }, { "epoch": 0.43, "grad_norm": 0.55859375, "learning_rate": 0.00013992685319212376, "loss": 0.8784, "step": 30140 }, { "epoch": 0.43, "grad_norm": 0.5078125, "learning_rate": 0.00013990389789938053, "loss": 0.9172, "step": 30145 }, { "epoch": 0.43, "grad_norm": 0.55859375, "learning_rate": 0.0001398809401054155, "loss": 1.0431, "step": 30150 }, { "epoch": 0.43, "grad_norm": 0.5, "learning_rate": 0.00013985797981166774, "loss": 1.0053, "step": 30155 }, { "epoch": 0.43, "grad_norm": 0.62890625, "learning_rate": 0.0001398350170195764, "loss": 1.0129, "step": 30160 }, { "epoch": 0.43, "grad_norm": 0.6328125, "learning_rate": 0.00013981205173058082, "loss": 1.0763, "step": 30165 }, { "epoch": 0.43, "grad_norm": 0.72265625, "learning_rate": 0.00013978908394612053, "loss": 1.1376, "step": 30170 }, { "epoch": 0.43, "grad_norm": 0.59375, "learning_rate": 0.00013976611366763514, "loss": 0.9536, "step": 30175 }, { "epoch": 0.43, "grad_norm": 0.56640625, "learning_rate": 0.0001397431408965644, "loss": 1.0176, "step": 30180 }, { "epoch": 0.43, "grad_norm": 0.52734375, "learning_rate": 0.00013972016563434838, "loss": 1.008, "step": 30185 }, { "epoch": 0.43, "grad_norm": 0.58203125, "learning_rate": 0.00013969718788242713, "loss": 1.0744, "step": 30190 }, { "epoch": 0.43, "grad_norm": 0.6171875, "learning_rate": 0.00013967420764224092, "loss": 0.8735, "step": 30195 }, { "epoch": 0.43, "grad_norm": 0.61328125, "learning_rate": 0.0001396512249152302, "loss": 1.1065, "step": 30200 }, { "epoch": 0.43, "grad_norm": 0.54296875, "learning_rate": 0.00013962823970283553, "loss": 0.9289, "step": 30205 }, { "epoch": 0.43, "grad_norm": 0.5625, "learning_rate": 0.00013960525200649765, "loss": 1.0908, "step": 30210 }, { "epoch": 0.43, "grad_norm": 0.56640625, "learning_rate": 0.00013958226182765753, "loss": 1.0867, "step": 30215 }, { "epoch": 0.43, "grad_norm": 0.59375, "learning_rate": 0.0001395592691677561, "loss": 0.9663, "step": 30220 }, { "epoch": 0.43, "grad_norm": 0.65234375, "learning_rate": 0.00013953627402823465, "loss": 0.9168, "step": 30225 }, { "epoch": 0.43, "grad_norm": 0.5546875, "learning_rate": 0.0001395132764105345, "loss": 0.8632, "step": 30230 }, { "epoch": 0.43, "grad_norm": 0.494140625, "learning_rate": 0.00013949027631609718, "loss": 0.9526, "step": 30235 }, { "epoch": 0.43, "grad_norm": 0.53125, "learning_rate": 0.0001394672737463644, "loss": 0.9847, "step": 30240 }, { "epoch": 0.43, "grad_norm": 0.58203125, "learning_rate": 0.00013944426870277793, "loss": 0.9977, "step": 30245 }, { "epoch": 0.43, "grad_norm": 0.5625, "learning_rate": 0.00013942126118677973, "loss": 0.8099, "step": 30250 }, { "epoch": 0.43, "grad_norm": 0.59765625, "learning_rate": 0.00013939825119981206, "loss": 0.9267, "step": 30255 }, { "epoch": 0.43, "grad_norm": 0.53125, "learning_rate": 0.0001393752387433171, "loss": 0.9748, "step": 30260 }, { "epoch": 0.43, "grad_norm": 0.486328125, "learning_rate": 0.00013935222381873728, "loss": 1.0083, "step": 30265 }, { "epoch": 0.43, "grad_norm": 0.609375, "learning_rate": 0.00013932920642751535, "loss": 0.9495, "step": 30270 }, { "epoch": 0.43, "grad_norm": 0.71484375, "learning_rate": 0.0001393061865710939, "loss": 0.913, "step": 30275 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00013928316425091593, "loss": 0.9324, "step": 30280 }, { "epoch": 0.43, "grad_norm": 0.5078125, "learning_rate": 0.00013926013946842449, "loss": 0.8735, "step": 30285 }, { "epoch": 0.43, "grad_norm": 0.51953125, "learning_rate": 0.00013923711222506277, "loss": 0.903, "step": 30290 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00013921408252227422, "loss": 0.935, "step": 30295 }, { "epoch": 0.43, "grad_norm": 0.78125, "learning_rate": 0.00013919105036150226, "loss": 1.0949, "step": 30300 }, { "epoch": 0.43, "grad_norm": 0.56640625, "learning_rate": 0.00013916801574419068, "loss": 0.8978, "step": 30305 }, { "epoch": 0.43, "grad_norm": 0.57421875, "learning_rate": 0.00013914497867178322, "loss": 1.004, "step": 30310 }, { "epoch": 0.43, "grad_norm": 0.953125, "learning_rate": 0.00013912193914572391, "loss": 1.0612, "step": 30315 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00013909889716745693, "loss": 0.9002, "step": 30320 }, { "epoch": 0.43, "grad_norm": 0.59375, "learning_rate": 0.00013907585273842656, "loss": 0.8754, "step": 30325 }, { "epoch": 0.44, "grad_norm": 0.58984375, "learning_rate": 0.00013905280586007718, "loss": 0.9143, "step": 30330 }, { "epoch": 0.44, "grad_norm": 0.44921875, "learning_rate": 0.0001390297565338535, "loss": 0.6927, "step": 30335 }, { "epoch": 0.44, "grad_norm": 0.55078125, "learning_rate": 0.00013900670476120022, "loss": 0.9526, "step": 30340 }, { "epoch": 0.44, "grad_norm": 0.625, "learning_rate": 0.00013898365054356226, "loss": 0.9771, "step": 30345 }, { "epoch": 0.44, "grad_norm": 0.6171875, "learning_rate": 0.00013896059388238466, "loss": 0.9911, "step": 30350 }, { "epoch": 0.44, "grad_norm": 0.52734375, "learning_rate": 0.00013893753477911268, "loss": 0.9632, "step": 30355 }, { "epoch": 0.44, "grad_norm": 0.57421875, "learning_rate": 0.0001389144732351917, "loss": 1.0988, "step": 30360 }, { "epoch": 0.44, "grad_norm": 0.60546875, "learning_rate": 0.0001388914092520672, "loss": 0.9119, "step": 30365 }, { "epoch": 0.44, "grad_norm": 0.5546875, "learning_rate": 0.00013886834283118483, "loss": 0.8508, "step": 30370 }, { "epoch": 0.44, "grad_norm": 0.5546875, "learning_rate": 0.0001388452739739905, "loss": 0.9863, "step": 30375 }, { "epoch": 0.44, "grad_norm": 0.60546875, "learning_rate": 0.0001388222026819302, "loss": 0.9309, "step": 30380 }, { "epoch": 0.44, "grad_norm": 0.6484375, "learning_rate": 0.00013879912895644995, "loss": 0.9081, "step": 30385 }, { "epoch": 0.44, "grad_norm": 0.578125, "learning_rate": 0.00013877605279899612, "loss": 0.9678, "step": 30390 }, { "epoch": 0.44, "grad_norm": 0.55859375, "learning_rate": 0.00013875297421101518, "loss": 0.8776, "step": 30395 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.00013872989319395367, "loss": 0.8552, "step": 30400 }, { "epoch": 0.44, "grad_norm": 0.55078125, "learning_rate": 0.0001387068097492583, "loss": 0.9758, "step": 30405 }, { "epoch": 0.44, "grad_norm": 0.60546875, "learning_rate": 0.00013868372387837604, "loss": 0.9454, "step": 30410 }, { "epoch": 0.44, "grad_norm": 0.60546875, "learning_rate": 0.00013866063558275394, "loss": 1.0248, "step": 30415 }, { "epoch": 0.44, "grad_norm": 0.58984375, "learning_rate": 0.00013863754486383913, "loss": 1.0927, "step": 30420 }, { "epoch": 0.44, "grad_norm": 0.60546875, "learning_rate": 0.00013861445172307904, "loss": 0.9265, "step": 30425 }, { "epoch": 0.44, "grad_norm": 0.6015625, "learning_rate": 0.0001385913561619211, "loss": 1.1097, "step": 30430 }, { "epoch": 0.44, "grad_norm": 0.58984375, "learning_rate": 0.00013856825818181302, "loss": 1.0002, "step": 30435 }, { "epoch": 0.44, "grad_norm": 0.65234375, "learning_rate": 0.0001385451577842026, "loss": 1.1563, "step": 30440 }, { "epoch": 0.44, "grad_norm": 0.5078125, "learning_rate": 0.00013852205497053775, "loss": 0.8349, "step": 30445 }, { "epoch": 0.44, "grad_norm": 0.6171875, "learning_rate": 0.00013849894974226666, "loss": 0.9589, "step": 30450 }, { "epoch": 0.44, "grad_norm": 0.5234375, "learning_rate": 0.00013847584210083754, "loss": 1.0755, "step": 30455 }, { "epoch": 0.44, "grad_norm": 0.58203125, "learning_rate": 0.00013845273204769884, "loss": 0.9441, "step": 30460 }, { "epoch": 0.44, "grad_norm": 0.5859375, "learning_rate": 0.00013842961958429907, "loss": 1.0065, "step": 30465 }, { "epoch": 0.44, "grad_norm": 0.61328125, "learning_rate": 0.000138406504712087, "loss": 0.9354, "step": 30470 }, { "epoch": 0.44, "grad_norm": 0.62109375, "learning_rate": 0.00013838338743251148, "loss": 0.9546, "step": 30475 }, { "epoch": 0.44, "grad_norm": 0.578125, "learning_rate": 0.0001383602677470215, "loss": 0.9654, "step": 30480 }, { "epoch": 0.44, "grad_norm": 0.60546875, "learning_rate": 0.00013833714565706626, "loss": 0.9255, "step": 30485 }, { "epoch": 0.44, "grad_norm": 0.54296875, "learning_rate": 0.00013831402116409506, "loss": 0.9128, "step": 30490 }, { "epoch": 0.44, "grad_norm": 0.51171875, "learning_rate": 0.00013829089426955736, "loss": 0.8833, "step": 30495 }, { "epoch": 0.44, "grad_norm": 0.546875, "learning_rate": 0.0001382677649749028, "loss": 0.9549, "step": 30500 }, { "epoch": 0.44, "grad_norm": 0.67578125, "learning_rate": 0.0001382446332815812, "loss": 0.9081, "step": 30505 }, { "epoch": 0.44, "grad_norm": 0.53515625, "learning_rate": 0.0001382214991910424, "loss": 0.9331, "step": 30510 }, { "epoch": 0.44, "grad_norm": 0.58984375, "learning_rate": 0.0001381983627047365, "loss": 0.9937, "step": 30515 }, { "epoch": 0.44, "grad_norm": 0.5625, "learning_rate": 0.0001381752238241137, "loss": 0.8444, "step": 30520 }, { "epoch": 0.44, "grad_norm": 0.58984375, "learning_rate": 0.0001381520825506244, "loss": 1.0127, "step": 30525 }, { "epoch": 0.44, "grad_norm": 0.5, "learning_rate": 0.00013812893888571917, "loss": 0.9867, "step": 30530 }, { "epoch": 0.44, "grad_norm": 0.59765625, "learning_rate": 0.00013810579283084855, "loss": 0.9169, "step": 30535 }, { "epoch": 0.44, "grad_norm": 0.58203125, "learning_rate": 0.00013808264438746346, "loss": 0.915, "step": 30540 }, { "epoch": 0.44, "grad_norm": 0.59375, "learning_rate": 0.00013805949355701487, "loss": 0.9577, "step": 30545 }, { "epoch": 0.44, "grad_norm": 0.46484375, "learning_rate": 0.00013803634034095384, "loss": 0.9792, "step": 30550 }, { "epoch": 0.44, "grad_norm": 0.59375, "learning_rate": 0.00013801318474073167, "loss": 0.9618, "step": 30555 }, { "epoch": 0.44, "grad_norm": 0.5390625, "learning_rate": 0.00013799002675779983, "loss": 0.8944, "step": 30560 }, { "epoch": 0.44, "grad_norm": 0.5234375, "learning_rate": 0.00013796686639360982, "loss": 0.919, "step": 30565 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.00013794370364961342, "loss": 1.0096, "step": 30570 }, { "epoch": 0.44, "grad_norm": 0.69921875, "learning_rate": 0.00013792053852726242, "loss": 1.1821, "step": 30575 }, { "epoch": 0.44, "grad_norm": 0.67578125, "learning_rate": 0.00013789737102800888, "loss": 1.1147, "step": 30580 }, { "epoch": 0.44, "grad_norm": 0.50390625, "learning_rate": 0.000137874201153305, "loss": 1.0756, "step": 30585 }, { "epoch": 0.44, "grad_norm": 0.57421875, "learning_rate": 0.00013785102890460303, "loss": 0.9199, "step": 30590 }, { "epoch": 0.44, "grad_norm": 0.515625, "learning_rate": 0.00013782785428335546, "loss": 0.9286, "step": 30595 }, { "epoch": 0.44, "grad_norm": 0.62109375, "learning_rate": 0.0001378046772910149, "loss": 0.9718, "step": 30600 }, { "epoch": 0.44, "grad_norm": 0.83984375, "learning_rate": 0.0001377814979290341, "loss": 1.0316, "step": 30605 }, { "epoch": 0.44, "grad_norm": 0.51171875, "learning_rate": 0.00013775831619886603, "loss": 0.9671, "step": 30610 }, { "epoch": 0.44, "grad_norm": 0.6015625, "learning_rate": 0.0001377351321019636, "loss": 0.9524, "step": 30615 }, { "epoch": 0.44, "grad_norm": 0.5078125, "learning_rate": 0.00013771194563978024, "loss": 0.9654, "step": 30620 }, { "epoch": 0.44, "grad_norm": 0.5546875, "learning_rate": 0.00013768875681376908, "loss": 0.9857, "step": 30625 }, { "epoch": 0.44, "grad_norm": 0.58203125, "learning_rate": 0.00013766556562538375, "loss": 1.0038, "step": 30630 }, { "epoch": 0.44, "grad_norm": 0.57421875, "learning_rate": 0.0001376423720760779, "loss": 0.9687, "step": 30635 }, { "epoch": 0.44, "grad_norm": 0.5625, "learning_rate": 0.00013761917616730523, "loss": 0.9222, "step": 30640 }, { "epoch": 0.44, "grad_norm": 0.58984375, "learning_rate": 0.00013759597790051982, "loss": 0.9985, "step": 30645 }, { "epoch": 0.44, "grad_norm": 0.62109375, "learning_rate": 0.00013757277727717565, "loss": 1.0839, "step": 30650 }, { "epoch": 0.44, "grad_norm": 0.5625, "learning_rate": 0.000137549574298727, "loss": 0.9799, "step": 30655 }, { "epoch": 0.44, "grad_norm": 0.52734375, "learning_rate": 0.0001375263689666283, "loss": 0.9521, "step": 30660 }, { "epoch": 0.44, "grad_norm": 0.494140625, "learning_rate": 0.00013750316128233406, "loss": 0.9973, "step": 30665 }, { "epoch": 0.44, "grad_norm": 0.67578125, "learning_rate": 0.00013747995124729892, "loss": 0.8904, "step": 30670 }, { "epoch": 0.44, "grad_norm": 0.482421875, "learning_rate": 0.00013745673886297782, "loss": 0.9553, "step": 30675 }, { "epoch": 0.44, "grad_norm": 0.5546875, "learning_rate": 0.0001374335241308256, "loss": 1.0507, "step": 30680 }, { "epoch": 0.44, "grad_norm": 0.5859375, "learning_rate": 0.00013741030705229746, "loss": 0.9111, "step": 30685 }, { "epoch": 0.44, "grad_norm": 0.5546875, "learning_rate": 0.00013738708762884872, "loss": 0.9156, "step": 30690 }, { "epoch": 0.44, "grad_norm": 0.67578125, "learning_rate": 0.00013736386586193472, "loss": 1.0744, "step": 30695 }, { "epoch": 0.44, "grad_norm": 0.54296875, "learning_rate": 0.000137340641753011, "loss": 1.0734, "step": 30700 }, { "epoch": 0.44, "grad_norm": 0.55078125, "learning_rate": 0.0001373174153035334, "loss": 0.8312, "step": 30705 }, { "epoch": 0.44, "grad_norm": 0.43359375, "learning_rate": 0.00013729418651495766, "loss": 0.829, "step": 30710 }, { "epoch": 0.44, "grad_norm": 0.6328125, "learning_rate": 0.00013727095538873987, "loss": 0.8859, "step": 30715 }, { "epoch": 0.44, "grad_norm": 0.4609375, "learning_rate": 0.00013724772192633616, "loss": 0.8469, "step": 30720 }, { "epoch": 0.44, "grad_norm": 0.59375, "learning_rate": 0.00013722448612920278, "loss": 0.9399, "step": 30725 }, { "epoch": 0.44, "grad_norm": 0.474609375, "learning_rate": 0.0001372012479987962, "loss": 0.8732, "step": 30730 }, { "epoch": 0.44, "grad_norm": 0.59375, "learning_rate": 0.00013717800753657307, "loss": 0.8908, "step": 30735 }, { "epoch": 0.44, "grad_norm": 0.6640625, "learning_rate": 0.0001371547647439901, "loss": 0.9621, "step": 30740 }, { "epoch": 0.44, "grad_norm": 0.546875, "learning_rate": 0.00013713151962250412, "loss": 1.0011, "step": 30745 }, { "epoch": 0.44, "grad_norm": 0.6640625, "learning_rate": 0.0001371082721735722, "loss": 1.0021, "step": 30750 }, { "epoch": 0.44, "grad_norm": 0.49609375, "learning_rate": 0.00013708502239865152, "loss": 0.9714, "step": 30755 }, { "epoch": 0.44, "grad_norm": 0.578125, "learning_rate": 0.00013706177029919943, "loss": 1.1514, "step": 30760 }, { "epoch": 0.44, "grad_norm": 0.50390625, "learning_rate": 0.0001370385158766733, "loss": 0.8423, "step": 30765 }, { "epoch": 0.44, "grad_norm": 0.5390625, "learning_rate": 0.00013701525913253088, "loss": 0.9267, "step": 30770 }, { "epoch": 0.44, "grad_norm": 0.6484375, "learning_rate": 0.00013699200006822985, "loss": 0.9194, "step": 30775 }, { "epoch": 0.44, "grad_norm": 0.515625, "learning_rate": 0.0001369687386852281, "loss": 0.9124, "step": 30780 }, { "epoch": 0.44, "grad_norm": 0.6015625, "learning_rate": 0.00013694547498498372, "loss": 0.9982, "step": 30785 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.0001369222089689549, "loss": 0.8867, "step": 30790 }, { "epoch": 0.44, "grad_norm": 0.58203125, "learning_rate": 0.00013689894063859996, "loss": 0.9391, "step": 30795 }, { "epoch": 0.44, "grad_norm": 0.609375, "learning_rate": 0.0001368756699953774, "loss": 0.7938, "step": 30800 }, { "epoch": 0.44, "grad_norm": 0.62890625, "learning_rate": 0.00013685239704074582, "loss": 1.0281, "step": 30805 }, { "epoch": 0.44, "grad_norm": 0.59765625, "learning_rate": 0.00013682912177616404, "loss": 1.002, "step": 30810 }, { "epoch": 0.44, "grad_norm": 0.56640625, "learning_rate": 0.00013680584420309097, "loss": 0.8366, "step": 30815 }, { "epoch": 0.44, "grad_norm": 0.53515625, "learning_rate": 0.00013678256432298567, "loss": 0.981, "step": 30820 }, { "epoch": 0.44, "grad_norm": 0.640625, "learning_rate": 0.00013675928213730736, "loss": 1.0917, "step": 30825 }, { "epoch": 0.44, "grad_norm": 0.51953125, "learning_rate": 0.00013673599764751535, "loss": 0.9331, "step": 30830 }, { "epoch": 0.44, "grad_norm": 0.6484375, "learning_rate": 0.00013671271085506916, "loss": 0.9488, "step": 30835 }, { "epoch": 0.44, "grad_norm": 0.625, "learning_rate": 0.00013668942176142847, "loss": 0.9893, "step": 30840 }, { "epoch": 0.44, "grad_norm": 0.58203125, "learning_rate": 0.00013666613036805302, "loss": 0.8505, "step": 30845 }, { "epoch": 0.44, "grad_norm": 0.57421875, "learning_rate": 0.0001366428366764028, "loss": 1.0239, "step": 30850 }, { "epoch": 0.44, "grad_norm": 0.5390625, "learning_rate": 0.0001366195406879378, "loss": 0.9631, "step": 30855 }, { "epoch": 0.44, "grad_norm": 0.52734375, "learning_rate": 0.00013659624240411827, "loss": 0.8366, "step": 30860 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.00013657294182640463, "loss": 0.9888, "step": 30865 }, { "epoch": 0.44, "grad_norm": 0.51953125, "learning_rate": 0.0001365496389562573, "loss": 0.9006, "step": 30870 }, { "epoch": 0.44, "grad_norm": 0.6484375, "learning_rate": 0.00013652633379513703, "loss": 0.9934, "step": 30875 }, { "epoch": 0.44, "grad_norm": 0.48828125, "learning_rate": 0.00013650302634450454, "loss": 0.9593, "step": 30880 }, { "epoch": 0.44, "grad_norm": 0.5703125, "learning_rate": 0.00013647971660582075, "loss": 1.047, "step": 30885 }, { "epoch": 0.44, "grad_norm": 0.59765625, "learning_rate": 0.0001364564045805468, "loss": 0.9426, "step": 30890 }, { "epoch": 0.44, "grad_norm": 0.5625, "learning_rate": 0.00013643309027014389, "loss": 1.0609, "step": 30895 }, { "epoch": 0.44, "grad_norm": 0.609375, "learning_rate": 0.0001364097736760734, "loss": 0.8709, "step": 30900 }, { "epoch": 0.44, "grad_norm": 0.61328125, "learning_rate": 0.0001363864547997968, "loss": 0.9918, "step": 30905 }, { "epoch": 0.44, "grad_norm": 0.5234375, "learning_rate": 0.0001363631336427758, "loss": 0.8467, "step": 30910 }, { "epoch": 0.44, "grad_norm": 0.54296875, "learning_rate": 0.00013633981020647214, "loss": 0.9783, "step": 30915 }, { "epoch": 0.44, "grad_norm": 0.451171875, "learning_rate": 0.0001363164844923478, "loss": 1.0742, "step": 30920 }, { "epoch": 0.44, "grad_norm": 0.58203125, "learning_rate": 0.00013629315650186488, "loss": 0.929, "step": 30925 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.00013626982623648554, "loss": 0.8576, "step": 30930 }, { "epoch": 0.44, "grad_norm": 0.58984375, "learning_rate": 0.0001362464936976722, "loss": 0.873, "step": 30935 }, { "epoch": 0.44, "grad_norm": 0.5546875, "learning_rate": 0.00013622315888688736, "loss": 0.9349, "step": 30940 }, { "epoch": 0.44, "grad_norm": 0.57421875, "learning_rate": 0.00013619982180559364, "loss": 0.9348, "step": 30945 }, { "epoch": 0.44, "grad_norm": 0.43359375, "learning_rate": 0.00013617648245525392, "loss": 0.8357, "step": 30950 }, { "epoch": 0.44, "grad_norm": 0.66015625, "learning_rate": 0.00013615314083733102, "loss": 0.9747, "step": 30955 }, { "epoch": 0.44, "grad_norm": 0.5703125, "learning_rate": 0.00013612979695328808, "loss": 0.9304, "step": 30960 }, { "epoch": 0.44, "grad_norm": 0.640625, "learning_rate": 0.00013610645080458833, "loss": 0.9786, "step": 30965 }, { "epoch": 0.44, "grad_norm": 0.54296875, "learning_rate": 0.00013608310239269513, "loss": 0.8328, "step": 30970 }, { "epoch": 0.44, "grad_norm": 0.6328125, "learning_rate": 0.00013605975171907195, "loss": 0.8963, "step": 30975 }, { "epoch": 0.44, "grad_norm": 0.703125, "learning_rate": 0.0001360363987851825, "loss": 1.0268, "step": 30980 }, { "epoch": 0.44, "grad_norm": 0.57421875, "learning_rate": 0.00013601304359249053, "loss": 0.9399, "step": 30985 }, { "epoch": 0.44, "grad_norm": 0.5390625, "learning_rate": 0.00013598968614245995, "loss": 0.9845, "step": 30990 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.00013596632643655485, "loss": 0.7482, "step": 30995 }, { "epoch": 0.44, "grad_norm": 0.57421875, "learning_rate": 0.00013594296447623947, "loss": 0.95, "step": 31000 }, { "epoch": 0.44, "grad_norm": 0.59765625, "learning_rate": 0.00013591960026297813, "loss": 0.8751, "step": 31005 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.00013589623379823532, "loss": 0.85, "step": 31010 }, { "epoch": 0.44, "grad_norm": 0.56640625, "learning_rate": 0.00013587286508347574, "loss": 0.8601, "step": 31015 }, { "epoch": 0.44, "grad_norm": 0.58984375, "learning_rate": 0.0001358494941201641, "loss": 0.7568, "step": 31020 }, { "epoch": 0.45, "grad_norm": 0.9921875, "learning_rate": 0.0001358261209097653, "loss": 0.9786, "step": 31025 }, { "epoch": 0.45, "grad_norm": 0.5078125, "learning_rate": 0.0001358027454537445, "loss": 0.9896, "step": 31030 }, { "epoch": 0.45, "grad_norm": 0.5859375, "learning_rate": 0.00013577936775356676, "loss": 1.0181, "step": 31035 }, { "epoch": 0.45, "grad_norm": 0.49609375, "learning_rate": 0.0001357559878106976, "loss": 0.8901, "step": 31040 }, { "epoch": 0.45, "grad_norm": 0.55078125, "learning_rate": 0.00013573260562660232, "loss": 1.1547, "step": 31045 }, { "epoch": 0.45, "grad_norm": 0.5546875, "learning_rate": 0.00013570922120274666, "loss": 1.0983, "step": 31050 }, { "epoch": 0.45, "grad_norm": 0.52734375, "learning_rate": 0.00013568583454059632, "loss": 0.9642, "step": 31055 }, { "epoch": 0.45, "grad_norm": 0.61328125, "learning_rate": 0.00013566244564161727, "loss": 0.9549, "step": 31060 }, { "epoch": 0.45, "grad_norm": 0.57421875, "learning_rate": 0.00013563905450727546, "loss": 1.0332, "step": 31065 }, { "epoch": 0.45, "grad_norm": 0.5546875, "learning_rate": 0.00013561566113903716, "loss": 1.0243, "step": 31070 }, { "epoch": 0.45, "grad_norm": 0.5390625, "learning_rate": 0.00013559226553836866, "loss": 0.9611, "step": 31075 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013556886770673638, "loss": 0.9881, "step": 31080 }, { "epoch": 0.45, "grad_norm": 0.609375, "learning_rate": 0.00013554546764560702, "loss": 0.8778, "step": 31085 }, { "epoch": 0.45, "grad_norm": 0.5390625, "learning_rate": 0.00013552206535644723, "loss": 0.9477, "step": 31090 }, { "epoch": 0.45, "grad_norm": 0.609375, "learning_rate": 0.00013549866084072392, "loss": 0.8397, "step": 31095 }, { "epoch": 0.45, "grad_norm": 0.482421875, "learning_rate": 0.00013547525409990415, "loss": 0.9632, "step": 31100 }, { "epoch": 0.45, "grad_norm": 0.4375, "learning_rate": 0.000135451845135455, "loss": 0.7316, "step": 31105 }, { "epoch": 0.45, "grad_norm": 0.5703125, "learning_rate": 0.0001354284339488439, "loss": 1.0173, "step": 31110 }, { "epoch": 0.45, "grad_norm": 0.57421875, "learning_rate": 0.00013540502054153815, "loss": 0.8948, "step": 31115 }, { "epoch": 0.45, "grad_norm": 0.54296875, "learning_rate": 0.0001353816049150054, "loss": 0.8243, "step": 31120 }, { "epoch": 0.45, "grad_norm": 0.490234375, "learning_rate": 0.00013535818707071336, "loss": 0.8822, "step": 31125 }, { "epoch": 0.45, "grad_norm": 0.54296875, "learning_rate": 0.0001353347670101299, "loss": 0.958, "step": 31130 }, { "epoch": 0.45, "grad_norm": 0.55078125, "learning_rate": 0.00013531134473472297, "loss": 0.906, "step": 31135 }, { "epoch": 0.45, "grad_norm": 0.62109375, "learning_rate": 0.00013528792024596075, "loss": 0.9097, "step": 31140 }, { "epoch": 0.45, "grad_norm": 0.51953125, "learning_rate": 0.0001352644935453115, "loss": 1.0661, "step": 31145 }, { "epoch": 0.45, "grad_norm": 0.6484375, "learning_rate": 0.00013524106463424365, "loss": 0.9701, "step": 31150 }, { "epoch": 0.45, "grad_norm": 0.55859375, "learning_rate": 0.00013521763351422573, "loss": 1.0976, "step": 31155 }, { "epoch": 0.45, "grad_norm": 0.55078125, "learning_rate": 0.0001351942001867264, "loss": 1.0888, "step": 31160 }, { "epoch": 0.45, "grad_norm": 0.65625, "learning_rate": 0.00013517076465321455, "loss": 0.9993, "step": 31165 }, { "epoch": 0.45, "grad_norm": 0.65234375, "learning_rate": 0.00013514732691515907, "loss": 1.0969, "step": 31170 }, { "epoch": 0.45, "grad_norm": 0.5390625, "learning_rate": 0.00013512388697402912, "loss": 0.9507, "step": 31175 }, { "epoch": 0.45, "grad_norm": 0.57421875, "learning_rate": 0.00013510044483129398, "loss": 1.1196, "step": 31180 }, { "epoch": 0.45, "grad_norm": 0.5390625, "learning_rate": 0.00013507700048842296, "loss": 0.8669, "step": 31185 }, { "epoch": 0.45, "grad_norm": 0.5859375, "learning_rate": 0.00013505355394688562, "loss": 1.0395, "step": 31190 }, { "epoch": 0.45, "grad_norm": 0.57421875, "learning_rate": 0.00013503010520815156, "loss": 1.0378, "step": 31195 }, { "epoch": 0.45, "grad_norm": 0.5625, "learning_rate": 0.0001350066542736906, "loss": 1.0775, "step": 31200 }, { "epoch": 0.45, "grad_norm": 0.58203125, "learning_rate": 0.00013498320114497273, "loss": 0.8533, "step": 31205 }, { "epoch": 0.45, "grad_norm": 0.55078125, "learning_rate": 0.00013495974582346793, "loss": 0.9821, "step": 31210 }, { "epoch": 0.45, "grad_norm": 0.546875, "learning_rate": 0.00013493628831064646, "loss": 0.9691, "step": 31215 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013491282860797864, "loss": 0.9099, "step": 31220 }, { "epoch": 0.45, "grad_norm": 0.53125, "learning_rate": 0.00013488936671693496, "loss": 0.9978, "step": 31225 }, { "epoch": 0.45, "grad_norm": 0.53125, "learning_rate": 0.00013486590263898603, "loss": 0.9646, "step": 31230 }, { "epoch": 0.45, "grad_norm": 0.55859375, "learning_rate": 0.00013484243637560265, "loss": 0.8978, "step": 31235 }, { "epoch": 0.45, "grad_norm": 0.60546875, "learning_rate": 0.00013481896792825565, "loss": 0.8367, "step": 31240 }, { "epoch": 0.45, "grad_norm": 0.5859375, "learning_rate": 0.0001347954972984161, "loss": 0.9076, "step": 31245 }, { "epoch": 0.45, "grad_norm": 0.62109375, "learning_rate": 0.00013477202448755515, "loss": 1.0487, "step": 31250 }, { "epoch": 0.45, "grad_norm": 0.51953125, "learning_rate": 0.00013474854949714407, "loss": 0.7774, "step": 31255 }, { "epoch": 0.45, "grad_norm": 0.66796875, "learning_rate": 0.00013472507232865436, "loss": 1.0968, "step": 31260 }, { "epoch": 0.45, "grad_norm": 0.63671875, "learning_rate": 0.00013470159298355758, "loss": 1.0582, "step": 31265 }, { "epoch": 0.45, "grad_norm": 0.5859375, "learning_rate": 0.0001346781114633254, "loss": 1.0763, "step": 31270 }, { "epoch": 0.45, "grad_norm": 0.609375, "learning_rate": 0.00013465462776942973, "loss": 1.0131, "step": 31275 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.0001346311419033425, "loss": 0.9431, "step": 31280 }, { "epoch": 0.45, "grad_norm": 0.55859375, "learning_rate": 0.00013460765386653583, "loss": 0.9907, "step": 31285 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013458416366048204, "loss": 0.9854, "step": 31290 }, { "epoch": 0.45, "grad_norm": 0.65625, "learning_rate": 0.00013456067128665347, "loss": 1.0112, "step": 31295 }, { "epoch": 0.45, "grad_norm": 0.5625, "learning_rate": 0.0001345371767465227, "loss": 1.0002, "step": 31300 }, { "epoch": 0.45, "grad_norm": 0.498046875, "learning_rate": 0.00013451368004156232, "loss": 0.9572, "step": 31305 }, { "epoch": 0.45, "grad_norm": 0.625, "learning_rate": 0.00013449018117324516, "loss": 0.9806, "step": 31310 }, { "epoch": 0.45, "grad_norm": 0.5703125, "learning_rate": 0.00013446668014304418, "loss": 0.9183, "step": 31315 }, { "epoch": 0.45, "grad_norm": 0.52734375, "learning_rate": 0.00013444317695243243, "loss": 0.8783, "step": 31320 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.0001344196716028831, "loss": 1.051, "step": 31325 }, { "epoch": 0.45, "grad_norm": 0.52734375, "learning_rate": 0.0001343961640958696, "loss": 0.8848, "step": 31330 }, { "epoch": 0.45, "grad_norm": 0.50390625, "learning_rate": 0.0001343726544328653, "loss": 0.9742, "step": 31335 }, { "epoch": 0.45, "grad_norm": 0.5859375, "learning_rate": 0.0001343491426153439, "loss": 1.0643, "step": 31340 }, { "epoch": 0.45, "grad_norm": 0.546875, "learning_rate": 0.00013432562864477916, "loss": 0.8818, "step": 31345 }, { "epoch": 0.45, "grad_norm": 0.640625, "learning_rate": 0.0001343021125226449, "loss": 0.9056, "step": 31350 }, { "epoch": 0.45, "grad_norm": 0.546875, "learning_rate": 0.00013427859425041514, "loss": 0.929, "step": 31355 }, { "epoch": 0.45, "grad_norm": 0.55078125, "learning_rate": 0.00013425507382956405, "loss": 0.8192, "step": 31360 }, { "epoch": 0.45, "grad_norm": 0.55078125, "learning_rate": 0.00013423155126156592, "loss": 1.0017, "step": 31365 }, { "epoch": 0.45, "grad_norm": 0.65234375, "learning_rate": 0.0001342080265478952, "loss": 0.981, "step": 31370 }, { "epoch": 0.45, "grad_norm": 0.57421875, "learning_rate": 0.00013418449969002636, "loss": 0.9689, "step": 31375 }, { "epoch": 0.45, "grad_norm": 0.5546875, "learning_rate": 0.00013416097068943422, "loss": 0.8402, "step": 31380 }, { "epoch": 0.45, "grad_norm": 0.62890625, "learning_rate": 0.0001341374395475935, "loss": 0.9719, "step": 31385 }, { "epoch": 0.45, "grad_norm": 0.53125, "learning_rate": 0.00013411390626597917, "loss": 1.0305, "step": 31390 }, { "epoch": 0.45, "grad_norm": 0.498046875, "learning_rate": 0.00013409037084606635, "loss": 0.9859, "step": 31395 }, { "epoch": 0.45, "grad_norm": 0.54296875, "learning_rate": 0.0001340668332893303, "loss": 1.1799, "step": 31400 }, { "epoch": 0.45, "grad_norm": 0.640625, "learning_rate": 0.0001340432935972463, "loss": 1.0158, "step": 31405 }, { "epoch": 0.45, "grad_norm": 0.48828125, "learning_rate": 0.0001340197517712899, "loss": 0.8952, "step": 31410 }, { "epoch": 0.45, "grad_norm": 0.5546875, "learning_rate": 0.0001339962078129367, "loss": 0.9101, "step": 31415 }, { "epoch": 0.45, "grad_norm": 0.6484375, "learning_rate": 0.00013397266172366246, "loss": 1.0594, "step": 31420 }, { "epoch": 0.45, "grad_norm": 0.50390625, "learning_rate": 0.00013394911350494314, "loss": 0.8013, "step": 31425 }, { "epoch": 0.45, "grad_norm": 0.5625, "learning_rate": 0.0001339255631582547, "loss": 0.9715, "step": 31430 }, { "epoch": 0.45, "grad_norm": 0.5078125, "learning_rate": 0.0001339020106850733, "loss": 0.9856, "step": 31435 }, { "epoch": 0.45, "grad_norm": 0.49609375, "learning_rate": 0.00013387845608687528, "loss": 0.9213, "step": 31440 }, { "epoch": 0.45, "grad_norm": 0.625, "learning_rate": 0.00013385489936513703, "loss": 1.1167, "step": 31445 }, { "epoch": 0.45, "grad_norm": 0.515625, "learning_rate": 0.00013383134052133517, "loss": 0.9305, "step": 31450 }, { "epoch": 0.45, "grad_norm": 0.5546875, "learning_rate": 0.00013380777955694634, "loss": 0.9467, "step": 31455 }, { "epoch": 0.45, "grad_norm": 0.6015625, "learning_rate": 0.00013378421647344737, "loss": 0.8589, "step": 31460 }, { "epoch": 0.45, "grad_norm": 0.515625, "learning_rate": 0.00013376065127231523, "loss": 0.9975, "step": 31465 }, { "epoch": 0.45, "grad_norm": 0.5390625, "learning_rate": 0.00013373708395502705, "loss": 0.9272, "step": 31470 }, { "epoch": 0.45, "grad_norm": 0.5625, "learning_rate": 0.00013371351452306, "loss": 1.0828, "step": 31475 }, { "epoch": 0.45, "grad_norm": 0.5234375, "learning_rate": 0.00013368994297789145, "loss": 0.9316, "step": 31480 }, { "epoch": 0.45, "grad_norm": 0.5546875, "learning_rate": 0.00013366636932099892, "loss": 1.1062, "step": 31485 }, { "epoch": 0.45, "grad_norm": 0.6015625, "learning_rate": 0.00013364279355386003, "loss": 0.975, "step": 31490 }, { "epoch": 0.45, "grad_norm": 0.48828125, "learning_rate": 0.0001336192156779525, "loss": 0.8687, "step": 31495 }, { "epoch": 0.45, "grad_norm": 0.62890625, "learning_rate": 0.00013359563569475424, "loss": 1.0445, "step": 31500 }, { "epoch": 0.45, "grad_norm": 0.52734375, "learning_rate": 0.0001335720536057433, "loss": 0.8226, "step": 31505 }, { "epoch": 0.45, "grad_norm": 0.61328125, "learning_rate": 0.0001335484694123978, "loss": 0.8799, "step": 31510 }, { "epoch": 0.45, "grad_norm": 0.55859375, "learning_rate": 0.00013352488311619602, "loss": 0.9934, "step": 31515 }, { "epoch": 0.45, "grad_norm": 0.5078125, "learning_rate": 0.00013350129471861638, "loss": 0.7989, "step": 31520 }, { "epoch": 0.45, "grad_norm": 0.6015625, "learning_rate": 0.00013347770422113744, "loss": 1.0236, "step": 31525 }, { "epoch": 0.45, "grad_norm": 0.5703125, "learning_rate": 0.00013345411162523786, "loss": 0.9755, "step": 31530 }, { "epoch": 0.45, "grad_norm": 0.59375, "learning_rate": 0.00013343051693239647, "loss": 1.1008, "step": 31535 }, { "epoch": 0.45, "grad_norm": 0.59765625, "learning_rate": 0.00013340692014409222, "loss": 0.9241, "step": 31540 }, { "epoch": 0.45, "grad_norm": 0.58203125, "learning_rate": 0.00013338332126180415, "loss": 1.1343, "step": 31545 }, { "epoch": 0.45, "grad_norm": 0.578125, "learning_rate": 0.00013335972028701148, "loss": 0.9986, "step": 31550 }, { "epoch": 0.45, "grad_norm": 0.55859375, "learning_rate": 0.00013333611722119357, "loss": 0.9672, "step": 31555 }, { "epoch": 0.45, "grad_norm": 0.54296875, "learning_rate": 0.00013331251206582983, "loss": 1.0488, "step": 31560 }, { "epoch": 0.45, "grad_norm": 0.6875, "learning_rate": 0.00013328890482239994, "loss": 1.0026, "step": 31565 }, { "epoch": 0.45, "grad_norm": 0.609375, "learning_rate": 0.00013326529549238352, "loss": 1.0177, "step": 31570 }, { "epoch": 0.45, "grad_norm": 0.56640625, "learning_rate": 0.00013324168407726056, "loss": 0.9506, "step": 31575 }, { "epoch": 0.45, "grad_norm": 0.6328125, "learning_rate": 0.00013321807057851094, "loss": 0.9342, "step": 31580 }, { "epoch": 0.45, "grad_norm": 0.56640625, "learning_rate": 0.00013319445499761486, "loss": 1.0342, "step": 31585 }, { "epoch": 0.45, "grad_norm": 0.52734375, "learning_rate": 0.00013317083733605252, "loss": 1.0099, "step": 31590 }, { "epoch": 0.45, "grad_norm": 0.5234375, "learning_rate": 0.0001331472175953043, "loss": 0.9359, "step": 31595 }, { "epoch": 0.45, "grad_norm": 0.5546875, "learning_rate": 0.00013312359577685072, "loss": 0.971, "step": 31600 }, { "epoch": 0.45, "grad_norm": 0.546875, "learning_rate": 0.00013309997188217247, "loss": 0.93, "step": 31605 }, { "epoch": 0.45, "grad_norm": 0.51953125, "learning_rate": 0.00013307634591275028, "loss": 0.9013, "step": 31610 }, { "epoch": 0.45, "grad_norm": 0.54296875, "learning_rate": 0.00013305271787006503, "loss": 0.9941, "step": 31615 }, { "epoch": 0.45, "grad_norm": 0.6328125, "learning_rate": 0.0001330290877555978, "loss": 1.1095, "step": 31620 }, { "epoch": 0.45, "grad_norm": 0.50390625, "learning_rate": 0.0001330054555708297, "loss": 0.9006, "step": 31625 }, { "epoch": 0.45, "grad_norm": 0.515625, "learning_rate": 0.00013298182131724211, "loss": 0.9277, "step": 31630 }, { "epoch": 0.45, "grad_norm": 0.640625, "learning_rate": 0.00013295818499631636, "loss": 1.009, "step": 31635 }, { "epoch": 0.45, "grad_norm": 0.5234375, "learning_rate": 0.00013293454660953403, "loss": 0.9823, "step": 31640 }, { "epoch": 0.45, "grad_norm": 0.55078125, "learning_rate": 0.00013291090615837685, "loss": 1.0153, "step": 31645 }, { "epoch": 0.45, "grad_norm": 0.50390625, "learning_rate": 0.00013288726364432652, "loss": 0.8506, "step": 31650 }, { "epoch": 0.45, "grad_norm": 0.73828125, "learning_rate": 0.00013286361906886512, "loss": 1.0333, "step": 31655 }, { "epoch": 0.45, "grad_norm": 0.57421875, "learning_rate": 0.00013283997243347464, "loss": 1.1103, "step": 31660 }, { "epoch": 0.45, "grad_norm": 0.578125, "learning_rate": 0.00013281632373963727, "loss": 0.9147, "step": 31665 }, { "epoch": 0.45, "grad_norm": 0.61328125, "learning_rate": 0.00013279267298883535, "loss": 0.9931, "step": 31670 }, { "epoch": 0.45, "grad_norm": 0.59375, "learning_rate": 0.00013276902018255132, "loss": 1.0037, "step": 31675 }, { "epoch": 0.45, "grad_norm": 0.609375, "learning_rate": 0.0001327453653222678, "loss": 0.9983, "step": 31680 }, { "epoch": 0.45, "grad_norm": 0.61328125, "learning_rate": 0.00013272170840946754, "loss": 1.1003, "step": 31685 }, { "epoch": 0.45, "grad_norm": 0.5390625, "learning_rate": 0.00013269804944563327, "loss": 1.095, "step": 31690 }, { "epoch": 0.45, "grad_norm": 0.59375, "learning_rate": 0.00013267438843224803, "loss": 1.0487, "step": 31695 }, { "epoch": 0.45, "grad_norm": 0.64453125, "learning_rate": 0.00013265072537079492, "loss": 1.1067, "step": 31700 }, { "epoch": 0.45, "grad_norm": 0.56640625, "learning_rate": 0.00013262706026275712, "loss": 0.8894, "step": 31705 }, { "epoch": 0.45, "grad_norm": 0.59375, "learning_rate": 0.00013260339310961806, "loss": 0.8737, "step": 31710 }, { "epoch": 0.45, "grad_norm": 0.462890625, "learning_rate": 0.0001325797239128612, "loss": 0.7964, "step": 31715 }, { "epoch": 0.46, "grad_norm": 0.59375, "learning_rate": 0.00013255605267397012, "loss": 0.8535, "step": 31720 }, { "epoch": 0.46, "grad_norm": 0.6484375, "learning_rate": 0.0001325323793944286, "loss": 1.0003, "step": 31725 }, { "epoch": 0.46, "grad_norm": 0.625, "learning_rate": 0.00013250870407572045, "loss": 0.8312, "step": 31730 }, { "epoch": 0.46, "grad_norm": 0.59765625, "learning_rate": 0.00013248502671932971, "loss": 1.0002, "step": 31735 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.00013246134732674056, "loss": 0.912, "step": 31740 }, { "epoch": 0.46, "grad_norm": 0.58984375, "learning_rate": 0.0001324376658994371, "loss": 0.928, "step": 31745 }, { "epoch": 0.46, "grad_norm": 0.6640625, "learning_rate": 0.00013241398243890386, "loss": 1.0814, "step": 31750 }, { "epoch": 0.46, "grad_norm": 0.4140625, "learning_rate": 0.00013239029694662527, "loss": 0.7331, "step": 31755 }, { "epoch": 0.46, "grad_norm": 0.5625, "learning_rate": 0.00013236660942408596, "loss": 0.9826, "step": 31760 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.00013234291987277076, "loss": 0.8716, "step": 31765 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.00013231922829416443, "loss": 0.8813, "step": 31770 }, { "epoch": 0.46, "grad_norm": 0.546875, "learning_rate": 0.0001322955346897521, "loss": 1.0243, "step": 31775 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.0001322718390610189, "loss": 0.9718, "step": 31780 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.00013224814140945003, "loss": 0.838, "step": 31785 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.00013222444173653097, "loss": 0.8757, "step": 31790 }, { "epoch": 0.46, "grad_norm": 0.58203125, "learning_rate": 0.0001322007400437472, "loss": 0.9052, "step": 31795 }, { "epoch": 0.46, "grad_norm": 0.54296875, "learning_rate": 0.00013217703633258433, "loss": 0.9327, "step": 31800 }, { "epoch": 0.46, "grad_norm": 0.5546875, "learning_rate": 0.00013215333060452816, "loss": 0.9738, "step": 31805 }, { "epoch": 0.46, "grad_norm": 0.546875, "learning_rate": 0.00013212962286106468, "loss": 0.8292, "step": 31810 }, { "epoch": 0.46, "grad_norm": 0.59375, "learning_rate": 0.00013210591310367978, "loss": 0.9225, "step": 31815 }, { "epoch": 0.46, "grad_norm": 0.7109375, "learning_rate": 0.00013208220133385974, "loss": 1.0917, "step": 31820 }, { "epoch": 0.46, "grad_norm": 0.63671875, "learning_rate": 0.00013205848755309073, "loss": 0.9889, "step": 31825 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.00013203477176285924, "loss": 0.9825, "step": 31830 }, { "epoch": 0.46, "grad_norm": 0.59765625, "learning_rate": 0.00013201105396465178, "loss": 1.0294, "step": 31835 }, { "epoch": 0.46, "grad_norm": 0.75, "learning_rate": 0.00013198733415995494, "loss": 1.1127, "step": 31840 }, { "epoch": 0.46, "grad_norm": 0.58984375, "learning_rate": 0.00013196361235025562, "loss": 0.8949, "step": 31845 }, { "epoch": 0.46, "grad_norm": 0.5625, "learning_rate": 0.00013193988853704068, "loss": 0.845, "step": 31850 }, { "epoch": 0.46, "grad_norm": 0.5390625, "learning_rate": 0.00013191616272179713, "loss": 0.8679, "step": 31855 }, { "epoch": 0.46, "grad_norm": 0.60546875, "learning_rate": 0.00013189243490601215, "loss": 0.9756, "step": 31860 }, { "epoch": 0.46, "grad_norm": 0.53515625, "learning_rate": 0.00013186870509117302, "loss": 1.0327, "step": 31865 }, { "epoch": 0.46, "grad_norm": 0.5859375, "learning_rate": 0.00013184497327876717, "loss": 0.9826, "step": 31870 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.00013182123947028216, "loss": 0.7791, "step": 31875 }, { "epoch": 0.46, "grad_norm": 0.578125, "learning_rate": 0.00013179750366720556, "loss": 0.9861, "step": 31880 }, { "epoch": 0.46, "grad_norm": 0.6015625, "learning_rate": 0.00013177376587102522, "loss": 0.9623, "step": 31885 }, { "epoch": 0.46, "grad_norm": 0.5625, "learning_rate": 0.0001317500260832291, "loss": 0.9592, "step": 31890 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.00013172628430530513, "loss": 0.9209, "step": 31895 }, { "epoch": 0.46, "grad_norm": 0.5390625, "learning_rate": 0.00013170254053874157, "loss": 0.7683, "step": 31900 }, { "epoch": 0.46, "grad_norm": 0.515625, "learning_rate": 0.00013167879478502665, "loss": 0.9936, "step": 31905 }, { "epoch": 0.46, "grad_norm": 0.53515625, "learning_rate": 0.00013165504704564876, "loss": 1.0267, "step": 31910 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.00013163129732209652, "loss": 0.9512, "step": 31915 }, { "epoch": 0.46, "grad_norm": 0.60546875, "learning_rate": 0.0001316075456158585, "loss": 0.9878, "step": 31920 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.00013158379192842353, "loss": 1.0096, "step": 31925 }, { "epoch": 0.46, "grad_norm": 0.5078125, "learning_rate": 0.00013156003626128054, "loss": 0.9902, "step": 31930 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00013153627861591847, "loss": 0.7962, "step": 31935 }, { "epoch": 0.46, "grad_norm": 0.59765625, "learning_rate": 0.00013151251899382662, "loss": 0.873, "step": 31940 }, { "epoch": 0.46, "grad_norm": 0.62890625, "learning_rate": 0.00013148875739649413, "loss": 0.9694, "step": 31945 }, { "epoch": 0.46, "grad_norm": 0.6875, "learning_rate": 0.00013146499382541048, "loss": 1.0138, "step": 31950 }, { "epoch": 0.46, "grad_norm": 0.53515625, "learning_rate": 0.00013144122828206523, "loss": 0.9132, "step": 31955 }, { "epoch": 0.46, "grad_norm": 0.53515625, "learning_rate": 0.00013141746076794793, "loss": 0.9452, "step": 31960 }, { "epoch": 0.46, "grad_norm": 0.5859375, "learning_rate": 0.0001313936912845484, "loss": 1.0403, "step": 31965 }, { "epoch": 0.46, "grad_norm": 0.5625, "learning_rate": 0.00013136991983335656, "loss": 0.9956, "step": 31970 }, { "epoch": 0.46, "grad_norm": 0.6484375, "learning_rate": 0.00013134614641586244, "loss": 0.8793, "step": 31975 }, { "epoch": 0.46, "grad_norm": 0.57421875, "learning_rate": 0.00013132237103355613, "loss": 0.9511, "step": 31980 }, { "epoch": 0.46, "grad_norm": 0.57421875, "learning_rate": 0.00013129859368792794, "loss": 0.9065, "step": 31985 }, { "epoch": 0.46, "grad_norm": 0.54296875, "learning_rate": 0.00013127481438046824, "loss": 0.9021, "step": 31990 }, { "epoch": 0.46, "grad_norm": 0.5625, "learning_rate": 0.00013125103311266756, "loss": 0.982, "step": 31995 }, { "epoch": 0.46, "grad_norm": 0.45703125, "learning_rate": 0.00013122724988601656, "loss": 0.8726, "step": 32000 }, { "epoch": 0.46, "grad_norm": 0.609375, "learning_rate": 0.00013120346470200594, "loss": 0.845, "step": 32005 }, { "epoch": 0.46, "grad_norm": 0.53515625, "learning_rate": 0.00013117967756212667, "loss": 1.1373, "step": 32010 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.00013115588846786963, "loss": 0.9724, "step": 32015 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.00013113209742072606, "loss": 1.0094, "step": 32020 }, { "epoch": 0.46, "grad_norm": 0.609375, "learning_rate": 0.00013110830442218714, "loss": 0.9913, "step": 32025 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.0001310845094737443, "loss": 0.919, "step": 32030 }, { "epoch": 0.46, "grad_norm": 0.67578125, "learning_rate": 0.00013106071257688897, "loss": 1.0586, "step": 32035 }, { "epoch": 0.46, "grad_norm": 0.546875, "learning_rate": 0.0001310369137331128, "loss": 1.0326, "step": 32040 }, { "epoch": 0.46, "grad_norm": 0.54296875, "learning_rate": 0.00013101311294390756, "loss": 0.996, "step": 32045 }, { "epoch": 0.46, "grad_norm": 0.58984375, "learning_rate": 0.00013098931021076506, "loss": 1.1005, "step": 32050 }, { "epoch": 0.46, "grad_norm": 0.640625, "learning_rate": 0.00013096550553517734, "loss": 0.9351, "step": 32055 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.0001309416989186364, "loss": 0.8718, "step": 32060 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.0001309178903626346, "loss": 0.9508, "step": 32065 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.00013089407986866414, "loss": 0.9767, "step": 32070 }, { "epoch": 0.46, "grad_norm": 0.484375, "learning_rate": 0.00013087026743821765, "loss": 0.9856, "step": 32075 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.00013084645307278762, "loss": 0.9379, "step": 32080 }, { "epoch": 0.46, "grad_norm": 0.609375, "learning_rate": 0.00013082263677386674, "loss": 0.9073, "step": 32085 }, { "epoch": 0.46, "grad_norm": 0.515625, "learning_rate": 0.00013079881854294792, "loss": 0.7946, "step": 32090 }, { "epoch": 0.46, "grad_norm": 0.5546875, "learning_rate": 0.00013077499838152405, "loss": 0.9327, "step": 32095 }, { "epoch": 0.46, "grad_norm": 0.66796875, "learning_rate": 0.00013075117629108825, "loss": 0.963, "step": 32100 }, { "epoch": 0.46, "grad_norm": 0.5859375, "learning_rate": 0.0001307273522731337, "loss": 1.0912, "step": 32105 }, { "epoch": 0.46, "grad_norm": 0.75, "learning_rate": 0.0001307035263291537, "loss": 1.0582, "step": 32110 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.0001306796984606417, "loss": 1.0168, "step": 32115 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.00013065586866909128, "loss": 0.9312, "step": 32120 }, { "epoch": 0.46, "grad_norm": 0.625, "learning_rate": 0.00013063203695599606, "loss": 1.0982, "step": 32125 }, { "epoch": 0.46, "grad_norm": 1.09375, "learning_rate": 0.0001306082033228499, "loss": 1.0406, "step": 32130 }, { "epoch": 0.46, "grad_norm": 0.609375, "learning_rate": 0.00013058436777114673, "loss": 1.037, "step": 32135 }, { "epoch": 0.46, "grad_norm": 0.58203125, "learning_rate": 0.0001305605303023805, "loss": 0.8963, "step": 32140 }, { "epoch": 0.46, "grad_norm": 0.453125, "learning_rate": 0.00013053669091804546, "loss": 0.9866, "step": 32145 }, { "epoch": 0.46, "grad_norm": 0.61328125, "learning_rate": 0.00013051284961963585, "loss": 1.0073, "step": 32150 }, { "epoch": 0.46, "grad_norm": 0.6640625, "learning_rate": 0.00013048900640864606, "loss": 1.0038, "step": 32155 }, { "epoch": 0.46, "grad_norm": 0.5390625, "learning_rate": 0.00013046516128657065, "loss": 1.0067, "step": 32160 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00013044131425490418, "loss": 0.9225, "step": 32165 }, { "epoch": 0.46, "grad_norm": 0.54296875, "learning_rate": 0.0001304174653151415, "loss": 0.8713, "step": 32170 }, { "epoch": 0.46, "grad_norm": 0.50390625, "learning_rate": 0.00013039361446877745, "loss": 0.8281, "step": 32175 }, { "epoch": 0.46, "grad_norm": 0.54296875, "learning_rate": 0.000130369761717307, "loss": 0.9131, "step": 32180 }, { "epoch": 0.46, "grad_norm": 0.58984375, "learning_rate": 0.00013034590706222538, "loss": 0.891, "step": 32185 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.0001303220505050277, "loss": 1.1046, "step": 32190 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.00013029819204720932, "loss": 1.0849, "step": 32195 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.0001302743316902658, "loss": 0.9852, "step": 32200 }, { "epoch": 0.46, "grad_norm": 0.58203125, "learning_rate": 0.00013025046943569268, "loss": 0.9682, "step": 32205 }, { "epoch": 0.46, "grad_norm": 0.6328125, "learning_rate": 0.00013022660528498568, "loss": 0.937, "step": 32210 }, { "epoch": 0.46, "grad_norm": 0.51171875, "learning_rate": 0.00013020273923964064, "loss": 0.8642, "step": 32215 }, { "epoch": 0.46, "grad_norm": 0.609375, "learning_rate": 0.00013017887130115349, "loss": 0.8892, "step": 32220 }, { "epoch": 0.46, "grad_norm": 0.48046875, "learning_rate": 0.00013015500147102032, "loss": 0.9228, "step": 32225 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.00013013112975073733, "loss": 1.0077, "step": 32230 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.0001301072561418008, "loss": 1.0031, "step": 32235 }, { "epoch": 0.46, "grad_norm": 0.5390625, "learning_rate": 0.00013008338064570717, "loss": 0.9054, "step": 32240 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.000130059503263953, "loss": 1.0037, "step": 32245 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.00013003562399803488, "loss": 0.8962, "step": 32250 }, { "epoch": 0.46, "grad_norm": 0.5546875, "learning_rate": 0.00013001174284944968, "loss": 0.8093, "step": 32255 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.00012998785981969423, "loss": 0.9555, "step": 32260 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.00012996397491026558, "loss": 0.9951, "step": 32265 }, { "epoch": 0.46, "grad_norm": 0.5546875, "learning_rate": 0.0001299400881226609, "loss": 0.764, "step": 32270 }, { "epoch": 0.46, "grad_norm": 0.4765625, "learning_rate": 0.00012991619945837735, "loss": 0.9576, "step": 32275 }, { "epoch": 0.46, "grad_norm": 0.62109375, "learning_rate": 0.00012989230891891236, "loss": 0.8473, "step": 32280 }, { "epoch": 0.46, "grad_norm": 0.5078125, "learning_rate": 0.0001298684165057634, "loss": 0.803, "step": 32285 }, { "epoch": 0.46, "grad_norm": 0.55859375, "learning_rate": 0.0001298445222204281, "loss": 0.8959, "step": 32290 }, { "epoch": 0.46, "grad_norm": 0.6484375, "learning_rate": 0.00012982062606440412, "loss": 1.0361, "step": 32295 }, { "epoch": 0.46, "grad_norm": 0.546875, "learning_rate": 0.00012979672803918938, "loss": 0.9159, "step": 32300 }, { "epoch": 0.46, "grad_norm": 0.56640625, "learning_rate": 0.00012977282814628172, "loss": 1.0206, "step": 32305 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.00012974892638717932, "loss": 0.9815, "step": 32310 }, { "epoch": 0.46, "grad_norm": 0.5859375, "learning_rate": 0.00012972502276338034, "loss": 1.0164, "step": 32315 }, { "epoch": 0.46, "grad_norm": 0.53125, "learning_rate": 0.0001297011172763831, "loss": 1.0121, "step": 32320 }, { "epoch": 0.46, "grad_norm": 0.69140625, "learning_rate": 0.00012967720992768596, "loss": 1.0233, "step": 32325 }, { "epoch": 0.46, "grad_norm": 0.59765625, "learning_rate": 0.00012965330071878752, "loss": 1.0093, "step": 32330 }, { "epoch": 0.46, "grad_norm": 0.5390625, "learning_rate": 0.00012962938965118643, "loss": 0.935, "step": 32335 }, { "epoch": 0.46, "grad_norm": 0.5078125, "learning_rate": 0.00012960547672638144, "loss": 0.9044, "step": 32340 }, { "epoch": 0.46, "grad_norm": 0.640625, "learning_rate": 0.00012958156194587146, "loss": 1.0922, "step": 32345 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00012955764531115548, "loss": 0.9507, "step": 32350 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00012953372682373264, "loss": 0.9014, "step": 32355 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00012950980648510213, "loss": 1.078, "step": 32360 }, { "epoch": 0.46, "grad_norm": 0.578125, "learning_rate": 0.00012948588429676335, "loss": 1.0288, "step": 32365 }, { "epoch": 0.46, "grad_norm": 0.6015625, "learning_rate": 0.00012946196026021578, "loss": 1.0478, "step": 32370 }, { "epoch": 0.46, "grad_norm": 0.58203125, "learning_rate": 0.000129438034376959, "loss": 0.9876, "step": 32375 }, { "epoch": 0.46, "grad_norm": 0.515625, "learning_rate": 0.0001294141066484927, "loss": 0.7554, "step": 32380 }, { "epoch": 0.46, "grad_norm": 0.53515625, "learning_rate": 0.00012939017707631664, "loss": 0.9796, "step": 32385 }, { "epoch": 0.46, "grad_norm": 0.57421875, "learning_rate": 0.00012936624566193086, "loss": 1.0194, "step": 32390 }, { "epoch": 0.46, "grad_norm": 0.5, "learning_rate": 0.00012934231240683536, "loss": 0.8431, "step": 32395 }, { "epoch": 0.46, "grad_norm": 0.5390625, "learning_rate": 0.00012931837731253027, "loss": 0.9893, "step": 32400 }, { "epoch": 0.46, "grad_norm": 0.6640625, "learning_rate": 0.0001292944403805159, "loss": 1.1741, "step": 32405 }, { "epoch": 0.46, "grad_norm": 0.5390625, "learning_rate": 0.00012927050161229265, "loss": 0.6753, "step": 32410 }, { "epoch": 0.46, "grad_norm": 0.578125, "learning_rate": 0.00012924656100936103, "loss": 0.9545, "step": 32415 }, { "epoch": 0.47, "grad_norm": 0.51171875, "learning_rate": 0.0001292226185732217, "loss": 1.087, "step": 32420 }, { "epoch": 0.47, "grad_norm": 0.578125, "learning_rate": 0.00012919867430537525, "loss": 1.0627, "step": 32425 }, { "epoch": 0.47, "grad_norm": 0.515625, "learning_rate": 0.00012917472820732272, "loss": 1.1009, "step": 32430 }, { "epoch": 0.47, "grad_norm": 0.5390625, "learning_rate": 0.00012915078028056498, "loss": 0.9163, "step": 32435 }, { "epoch": 0.47, "grad_norm": 0.51953125, "learning_rate": 0.00012912683052660313, "loss": 0.922, "step": 32440 }, { "epoch": 0.47, "grad_norm": 0.6953125, "learning_rate": 0.00012910287894693834, "loss": 0.8632, "step": 32445 }, { "epoch": 0.47, "grad_norm": 0.62890625, "learning_rate": 0.000129078925543072, "loss": 1.0744, "step": 32450 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.00012905497031650548, "loss": 0.9194, "step": 32455 }, { "epoch": 0.47, "grad_norm": 0.494140625, "learning_rate": 0.00012903101326874032, "loss": 1.0821, "step": 32460 }, { "epoch": 0.47, "grad_norm": 0.5703125, "learning_rate": 0.00012900705440127818, "loss": 0.9133, "step": 32465 }, { "epoch": 0.47, "grad_norm": 0.5078125, "learning_rate": 0.00012898309371562084, "loss": 0.9024, "step": 32470 }, { "epoch": 0.47, "grad_norm": 0.578125, "learning_rate": 0.0001289591312132702, "loss": 1.0314, "step": 32475 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.0001289351668957282, "loss": 1.0295, "step": 32480 }, { "epoch": 0.47, "grad_norm": 0.52734375, "learning_rate": 0.00012891120076449699, "loss": 0.9198, "step": 32485 }, { "epoch": 0.47, "grad_norm": 0.56640625, "learning_rate": 0.0001288872328210788, "loss": 1.0714, "step": 32490 }, { "epoch": 0.47, "grad_norm": 0.54296875, "learning_rate": 0.00012886326306697595, "loss": 0.9769, "step": 32495 }, { "epoch": 0.47, "grad_norm": 0.50390625, "learning_rate": 0.00012883929150369093, "loss": 1.016, "step": 32500 }, { "epoch": 0.47, "grad_norm": 0.6015625, "learning_rate": 0.0001288153181327262, "loss": 1.0573, "step": 32505 }, { "epoch": 0.47, "grad_norm": 0.5546875, "learning_rate": 0.00012879134295558457, "loss": 1.1267, "step": 32510 }, { "epoch": 0.47, "grad_norm": 0.56640625, "learning_rate": 0.00012876736597376874, "loss": 1.0102, "step": 32515 }, { "epoch": 0.47, "grad_norm": 0.5625, "learning_rate": 0.00012874338718878167, "loss": 0.9099, "step": 32520 }, { "epoch": 0.47, "grad_norm": 0.52734375, "learning_rate": 0.00012871940660212636, "loss": 1.0459, "step": 32525 }, { "epoch": 0.47, "grad_norm": 0.6015625, "learning_rate": 0.00012869542421530594, "loss": 1.0324, "step": 32530 }, { "epoch": 0.47, "grad_norm": 0.5703125, "learning_rate": 0.0001286714400298236, "loss": 0.8854, "step": 32535 }, { "epoch": 0.47, "grad_norm": 0.5234375, "learning_rate": 0.0001286474540471828, "loss": 1.0234, "step": 32540 }, { "epoch": 0.47, "grad_norm": 0.578125, "learning_rate": 0.00012862346626888694, "loss": 1.0255, "step": 32545 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.00012859947669643958, "loss": 0.9126, "step": 32550 }, { "epoch": 0.47, "grad_norm": 0.46484375, "learning_rate": 0.00012857548533134452, "loss": 1.0146, "step": 32555 }, { "epoch": 0.47, "grad_norm": 0.578125, "learning_rate": 0.00012855149217510544, "loss": 0.852, "step": 32560 }, { "epoch": 0.47, "grad_norm": 0.5390625, "learning_rate": 0.0001285274972292263, "loss": 0.8511, "step": 32565 }, { "epoch": 0.47, "grad_norm": 0.55078125, "learning_rate": 0.0001285035004952112, "loss": 1.1151, "step": 32570 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.00012847950197456416, "loss": 1.0862, "step": 32575 }, { "epoch": 0.47, "grad_norm": 0.58984375, "learning_rate": 0.00012845550166878957, "loss": 0.9294, "step": 32580 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.0001284314995793917, "loss": 1.1179, "step": 32585 }, { "epoch": 0.47, "grad_norm": 0.55078125, "learning_rate": 0.000128407495707875, "loss": 0.9428, "step": 32590 }, { "epoch": 0.47, "grad_norm": 0.56640625, "learning_rate": 0.00012838349005574417, "loss": 1.0087, "step": 32595 }, { "epoch": 0.47, "grad_norm": 0.52734375, "learning_rate": 0.00012835948262450385, "loss": 0.8835, "step": 32600 }, { "epoch": 0.47, "grad_norm": 0.53515625, "learning_rate": 0.00012833547341565887, "loss": 1.0477, "step": 32605 }, { "epoch": 0.47, "grad_norm": 0.58203125, "learning_rate": 0.00012831146243071415, "loss": 1.0284, "step": 32610 }, { "epoch": 0.47, "grad_norm": 0.54296875, "learning_rate": 0.00012828744967117469, "loss": 1.0356, "step": 32615 }, { "epoch": 0.47, "grad_norm": 0.5703125, "learning_rate": 0.00012826343513854568, "loss": 1.0029, "step": 32620 }, { "epoch": 0.47, "grad_norm": 0.5546875, "learning_rate": 0.00012823941883433236, "loss": 0.9734, "step": 32625 }, { "epoch": 0.47, "grad_norm": 0.62890625, "learning_rate": 0.00012821540076004016, "loss": 1.0066, "step": 32630 }, { "epoch": 0.47, "grad_norm": 0.578125, "learning_rate": 0.00012819138091717445, "loss": 0.9152, "step": 32635 }, { "epoch": 0.47, "grad_norm": 0.53125, "learning_rate": 0.00012816735930724088, "loss": 0.8258, "step": 32640 }, { "epoch": 0.47, "grad_norm": 0.4921875, "learning_rate": 0.00012814333593174515, "loss": 1.0175, "step": 32645 }, { "epoch": 0.47, "grad_norm": 0.57421875, "learning_rate": 0.00012811931079219309, "loss": 1.1105, "step": 32650 }, { "epoch": 0.47, "grad_norm": 0.58984375, "learning_rate": 0.00012809528389009058, "loss": 0.878, "step": 32655 }, { "epoch": 0.47, "grad_norm": 0.58203125, "learning_rate": 0.0001280712552269437, "loss": 0.9104, "step": 32660 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.00012804722480425857, "loss": 1.0456, "step": 32665 }, { "epoch": 0.47, "grad_norm": 0.53515625, "learning_rate": 0.00012802319262354142, "loss": 0.8883, "step": 32670 }, { "epoch": 0.47, "grad_norm": 0.609375, "learning_rate": 0.00012799915868629867, "loss": 1.0246, "step": 32675 }, { "epoch": 0.47, "grad_norm": 0.6171875, "learning_rate": 0.00012797512299403673, "loss": 0.9482, "step": 32680 }, { "epoch": 0.47, "grad_norm": 0.5625, "learning_rate": 0.00012795108554826228, "loss": 0.9019, "step": 32685 }, { "epoch": 0.47, "grad_norm": 0.5, "learning_rate": 0.0001279270463504819, "loss": 0.963, "step": 32690 }, { "epoch": 0.47, "grad_norm": 0.65234375, "learning_rate": 0.0001279030054022025, "loss": 0.8933, "step": 32695 }, { "epoch": 0.47, "grad_norm": 0.64453125, "learning_rate": 0.00012787896270493088, "loss": 1.0861, "step": 32700 }, { "epoch": 0.47, "grad_norm": 0.53515625, "learning_rate": 0.00012785491826017414, "loss": 0.9753, "step": 32705 }, { "epoch": 0.47, "grad_norm": 0.5234375, "learning_rate": 0.00012783087206943942, "loss": 0.9748, "step": 32710 }, { "epoch": 0.47, "grad_norm": 0.57421875, "learning_rate": 0.00012780682413423395, "loss": 0.9589, "step": 32715 }, { "epoch": 0.47, "grad_norm": 0.5, "learning_rate": 0.00012778277445606506, "loss": 0.805, "step": 32720 }, { "epoch": 0.47, "grad_norm": 0.6171875, "learning_rate": 0.00012775872303644021, "loss": 1.01, "step": 32725 }, { "epoch": 0.47, "grad_norm": 0.7109375, "learning_rate": 0.000127734669876867, "loss": 1.0963, "step": 32730 }, { "epoch": 0.47, "grad_norm": 0.59375, "learning_rate": 0.00012771061497885312, "loss": 0.9843, "step": 32735 }, { "epoch": 0.47, "grad_norm": 0.55859375, "learning_rate": 0.0001276865583439063, "loss": 0.9517, "step": 32740 }, { "epoch": 0.47, "grad_norm": 0.478515625, "learning_rate": 0.00012766249997353448, "loss": 0.8473, "step": 32745 }, { "epoch": 0.47, "grad_norm": 0.6640625, "learning_rate": 0.00012763843986924564, "loss": 0.919, "step": 32750 }, { "epoch": 0.47, "grad_norm": 0.62109375, "learning_rate": 0.00012761437803254793, "loss": 0.949, "step": 32755 }, { "epoch": 0.47, "grad_norm": 0.5625, "learning_rate": 0.00012759031446494957, "loss": 0.9639, "step": 32760 }, { "epoch": 0.47, "grad_norm": 0.5390625, "learning_rate": 0.00012756624916795885, "loss": 0.9237, "step": 32765 }, { "epoch": 0.47, "grad_norm": 0.55078125, "learning_rate": 0.00012754218214308427, "loss": 0.947, "step": 32770 }, { "epoch": 0.47, "grad_norm": 0.48828125, "learning_rate": 0.0001275181133918343, "loss": 0.9556, "step": 32775 }, { "epoch": 0.47, "grad_norm": 0.51953125, "learning_rate": 0.00012749404291571766, "loss": 1.0168, "step": 32780 }, { "epoch": 0.47, "grad_norm": 0.5546875, "learning_rate": 0.0001274699707162431, "loss": 0.9174, "step": 32785 }, { "epoch": 0.47, "grad_norm": 0.56640625, "learning_rate": 0.0001274458967949195, "loss": 0.8608, "step": 32790 }, { "epoch": 0.47, "grad_norm": 0.6015625, "learning_rate": 0.00012742182115325584, "loss": 1.1732, "step": 32795 }, { "epoch": 0.47, "grad_norm": 0.59765625, "learning_rate": 0.00012739774379276117, "loss": 1.1252, "step": 32800 }, { "epoch": 0.47, "grad_norm": 0.67578125, "learning_rate": 0.00012737366471494472, "loss": 1.0629, "step": 32805 }, { "epoch": 0.47, "grad_norm": 0.6796875, "learning_rate": 0.0001273495839213158, "loss": 1.0418, "step": 32810 }, { "epoch": 0.47, "grad_norm": 0.60546875, "learning_rate": 0.0001273255014133838, "loss": 0.868, "step": 32815 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.00012730141719265826, "loss": 0.9079, "step": 32820 }, { "epoch": 0.47, "grad_norm": 0.60546875, "learning_rate": 0.0001272773312606488, "loss": 0.9518, "step": 32825 }, { "epoch": 0.47, "grad_norm": 0.65625, "learning_rate": 0.00012725324361886515, "loss": 0.9046, "step": 32830 }, { "epoch": 0.47, "grad_norm": 0.54296875, "learning_rate": 0.00012722915426881715, "loss": 0.9271, "step": 32835 }, { "epoch": 0.47, "grad_norm": 0.6484375, "learning_rate": 0.00012720506321201472, "loss": 0.9904, "step": 32840 }, { "epoch": 0.47, "grad_norm": 0.59375, "learning_rate": 0.00012718097044996798, "loss": 0.9101, "step": 32845 }, { "epoch": 0.47, "grad_norm": 0.53515625, "learning_rate": 0.00012715687598418706, "loss": 0.937, "step": 32850 }, { "epoch": 0.47, "grad_norm": 0.5390625, "learning_rate": 0.00012713277981618218, "loss": 1.0701, "step": 32855 }, { "epoch": 0.47, "grad_norm": 0.5859375, "learning_rate": 0.0001271086819474638, "loss": 0.8446, "step": 32860 }, { "epoch": 0.47, "grad_norm": 0.609375, "learning_rate": 0.00012708458237954234, "loss": 0.926, "step": 32865 }, { "epoch": 0.47, "grad_norm": 0.64453125, "learning_rate": 0.00012706048111392845, "loss": 1.0323, "step": 32870 }, { "epoch": 0.47, "grad_norm": 0.578125, "learning_rate": 0.00012703637815213273, "loss": 0.9371, "step": 32875 }, { "epoch": 0.47, "grad_norm": 0.6015625, "learning_rate": 0.00012701227349566608, "loss": 0.8327, "step": 32880 }, { "epoch": 0.47, "grad_norm": 0.58984375, "learning_rate": 0.00012698816714603933, "loss": 1.0061, "step": 32885 }, { "epoch": 0.47, "grad_norm": 0.60546875, "learning_rate": 0.0001269640591047635, "loss": 0.91, "step": 32890 }, { "epoch": 0.47, "grad_norm": 0.49609375, "learning_rate": 0.00012693994937334983, "loss": 0.823, "step": 32895 }, { "epoch": 0.47, "grad_norm": 0.5703125, "learning_rate": 0.0001269158379533094, "loss": 0.9868, "step": 32900 }, { "epoch": 0.47, "grad_norm": 0.59765625, "learning_rate": 0.00012689172484615357, "loss": 0.9551, "step": 32905 }, { "epoch": 0.47, "grad_norm": 0.64453125, "learning_rate": 0.00012686761005339383, "loss": 0.8869, "step": 32910 }, { "epoch": 0.47, "grad_norm": 0.5625, "learning_rate": 0.00012684349357654166, "loss": 0.941, "step": 32915 }, { "epoch": 0.47, "grad_norm": 0.6171875, "learning_rate": 0.00012681937541710877, "loss": 0.9691, "step": 32920 }, { "epoch": 0.47, "grad_norm": 0.486328125, "learning_rate": 0.0001267952555766069, "loss": 0.8838, "step": 32925 }, { "epoch": 0.47, "grad_norm": 0.60546875, "learning_rate": 0.00012677113405654784, "loss": 0.8744, "step": 32930 }, { "epoch": 0.47, "grad_norm": 0.6171875, "learning_rate": 0.00012674701085844365, "loss": 1.0615, "step": 32935 }, { "epoch": 0.47, "grad_norm": 0.6171875, "learning_rate": 0.00012672288598380632, "loss": 0.9642, "step": 32940 }, { "epoch": 0.47, "grad_norm": 0.486328125, "learning_rate": 0.0001266987594341481, "loss": 0.9284, "step": 32945 }, { "epoch": 0.47, "grad_norm": 0.625, "learning_rate": 0.00012667463121098123, "loss": 1.0717, "step": 32950 }, { "epoch": 0.47, "grad_norm": 0.62109375, "learning_rate": 0.00012665050131581806, "loss": 0.9979, "step": 32955 }, { "epoch": 0.47, "grad_norm": 0.65625, "learning_rate": 0.00012662636975017114, "loss": 1.0399, "step": 32960 }, { "epoch": 0.47, "grad_norm": 0.59375, "learning_rate": 0.00012660223651555304, "loss": 1.1574, "step": 32965 }, { "epoch": 0.47, "grad_norm": 0.6640625, "learning_rate": 0.00012657810161347644, "loss": 0.8294, "step": 32970 }, { "epoch": 0.47, "grad_norm": 0.5390625, "learning_rate": 0.00012655396504545417, "loss": 1.0261, "step": 32975 }, { "epoch": 0.47, "grad_norm": 0.5546875, "learning_rate": 0.00012652982681299915, "loss": 0.9312, "step": 32980 }, { "epoch": 0.47, "grad_norm": 0.59765625, "learning_rate": 0.00012650568691762435, "loss": 0.8175, "step": 32985 }, { "epoch": 0.47, "grad_norm": 0.5703125, "learning_rate": 0.00012648154536084292, "loss": 1.0048, "step": 32990 }, { "epoch": 0.47, "grad_norm": 0.5546875, "learning_rate": 0.00012645740214416805, "loss": 0.8705, "step": 32995 }, { "epoch": 0.47, "grad_norm": 0.51171875, "learning_rate": 0.0001264332572691131, "loss": 0.8976, "step": 33000 }, { "epoch": 0.47, "grad_norm": 0.70703125, "learning_rate": 0.00012640911073719146, "loss": 1.0398, "step": 33005 }, { "epoch": 0.47, "grad_norm": 0.55078125, "learning_rate": 0.00012638496254991667, "loss": 1.1284, "step": 33010 }, { "epoch": 0.47, "grad_norm": 0.5234375, "learning_rate": 0.0001263608127088024, "loss": 0.9641, "step": 33015 }, { "epoch": 0.47, "grad_norm": 0.65234375, "learning_rate": 0.00012633666121536236, "loss": 0.8835, "step": 33020 }, { "epoch": 0.47, "grad_norm": 0.5546875, "learning_rate": 0.00012631250807111042, "loss": 0.8836, "step": 33025 }, { "epoch": 0.47, "grad_norm": 0.5703125, "learning_rate": 0.0001262883532775605, "loss": 0.9024, "step": 33030 }, { "epoch": 0.47, "grad_norm": 0.55078125, "learning_rate": 0.00012626419683622664, "loss": 0.8939, "step": 33035 }, { "epoch": 0.47, "grad_norm": 0.56640625, "learning_rate": 0.000126240038748623, "loss": 0.8697, "step": 33040 }, { "epoch": 0.47, "grad_norm": 0.59375, "learning_rate": 0.00012621587901626385, "loss": 0.9675, "step": 33045 }, { "epoch": 0.47, "grad_norm": 0.66015625, "learning_rate": 0.00012619171764066358, "loss": 1.0066, "step": 33050 }, { "epoch": 0.47, "grad_norm": 0.484375, "learning_rate": 0.0001261675546233366, "loss": 0.9397, "step": 33055 }, { "epoch": 0.47, "grad_norm": 0.51171875, "learning_rate": 0.00012614338996579748, "loss": 0.9274, "step": 33060 }, { "epoch": 0.47, "grad_norm": 0.478515625, "learning_rate": 0.0001261192236695609, "loss": 0.8634, "step": 33065 }, { "epoch": 0.47, "grad_norm": 0.609375, "learning_rate": 0.00012609505573614167, "loss": 0.9893, "step": 33070 }, { "epoch": 0.47, "grad_norm": 0.55859375, "learning_rate": 0.0001260708861670546, "loss": 0.927, "step": 33075 }, { "epoch": 0.47, "grad_norm": 0.61328125, "learning_rate": 0.0001260467149638147, "loss": 0.8995, "step": 33080 }, { "epoch": 0.47, "grad_norm": 0.5546875, "learning_rate": 0.00012602254212793702, "loss": 0.9559, "step": 33085 }, { "epoch": 0.47, "grad_norm": 0.578125, "learning_rate": 0.00012599836766093677, "loss": 0.8693, "step": 33090 }, { "epoch": 0.47, "grad_norm": 0.5390625, "learning_rate": 0.00012597419156432923, "loss": 0.8482, "step": 33095 }, { "epoch": 0.47, "grad_norm": 0.5703125, "learning_rate": 0.0001259500138396298, "loss": 1.0206, "step": 33100 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.00012592583448835394, "loss": 1.0884, "step": 33105 }, { "epoch": 0.47, "grad_norm": 0.703125, "learning_rate": 0.00012590165351201725, "loss": 1.1486, "step": 33110 }, { "epoch": 0.48, "grad_norm": 0.65234375, "learning_rate": 0.0001258774709121354, "loss": 0.8503, "step": 33115 }, { "epoch": 0.48, "grad_norm": 0.62109375, "learning_rate": 0.0001258532866902242, "loss": 0.9517, "step": 33120 }, { "epoch": 0.48, "grad_norm": 0.58203125, "learning_rate": 0.00012582910084779956, "loss": 1.1299, "step": 33125 }, { "epoch": 0.48, "grad_norm": 0.55078125, "learning_rate": 0.00012580491338637744, "loss": 0.9218, "step": 33130 }, { "epoch": 0.48, "grad_norm": 0.5390625, "learning_rate": 0.000125780724307474, "loss": 0.9923, "step": 33135 }, { "epoch": 0.48, "grad_norm": 0.490234375, "learning_rate": 0.0001257565336126054, "loss": 0.9179, "step": 33140 }, { "epoch": 0.48, "grad_norm": 0.5859375, "learning_rate": 0.00012573234130328789, "loss": 0.8966, "step": 33145 }, { "epoch": 0.48, "grad_norm": 0.5703125, "learning_rate": 0.00012570814738103794, "loss": 0.7933, "step": 33150 }, { "epoch": 0.48, "grad_norm": 0.55078125, "learning_rate": 0.00012568395184737205, "loss": 0.9099, "step": 33155 }, { "epoch": 0.48, "grad_norm": 0.53125, "learning_rate": 0.0001256597547038068, "loss": 1.0154, "step": 33160 }, { "epoch": 0.48, "grad_norm": 0.59765625, "learning_rate": 0.0001256355559518589, "loss": 0.9243, "step": 33165 }, { "epoch": 0.48, "grad_norm": 0.59765625, "learning_rate": 0.00012561135559304516, "loss": 1.0208, "step": 33170 }, { "epoch": 0.48, "grad_norm": 0.53515625, "learning_rate": 0.00012558715362888246, "loss": 1.0687, "step": 33175 }, { "epoch": 0.48, "grad_norm": 0.51953125, "learning_rate": 0.00012556295006088783, "loss": 0.9965, "step": 33180 }, { "epoch": 0.48, "grad_norm": 0.55859375, "learning_rate": 0.0001255387448905784, "loss": 1.0748, "step": 33185 }, { "epoch": 0.48, "grad_norm": 0.53515625, "learning_rate": 0.00012551453811947136, "loss": 0.8903, "step": 33190 }, { "epoch": 0.48, "grad_norm": 0.55859375, "learning_rate": 0.000125490329749084, "loss": 1.049, "step": 33195 }, { "epoch": 0.48, "grad_norm": 0.5078125, "learning_rate": 0.0001254661197809337, "loss": 0.7615, "step": 33200 }, { "epoch": 0.48, "grad_norm": 0.640625, "learning_rate": 0.00012544190821653806, "loss": 0.8462, "step": 33205 }, { "epoch": 0.48, "grad_norm": 0.58203125, "learning_rate": 0.00012541769505741465, "loss": 0.8998, "step": 33210 }, { "epoch": 0.48, "grad_norm": 0.65234375, "learning_rate": 0.00012539348030508115, "loss": 0.8996, "step": 33215 }, { "epoch": 0.48, "grad_norm": 0.59375, "learning_rate": 0.00012536926396105534, "loss": 0.9038, "step": 33220 }, { "epoch": 0.48, "grad_norm": 0.546875, "learning_rate": 0.00012534504602685522, "loss": 0.8514, "step": 33225 }, { "epoch": 0.48, "grad_norm": 0.53515625, "learning_rate": 0.00012532082650399873, "loss": 0.8233, "step": 33230 }, { "epoch": 0.48, "grad_norm": 0.515625, "learning_rate": 0.000125296605394004, "loss": 0.9716, "step": 33235 }, { "epoch": 0.48, "grad_norm": 0.5546875, "learning_rate": 0.0001252723826983892, "loss": 1.0804, "step": 33240 }, { "epoch": 0.48, "grad_norm": 0.75390625, "learning_rate": 0.00012524815841867272, "loss": 1.1893, "step": 33245 }, { "epoch": 0.48, "grad_norm": 0.54296875, "learning_rate": 0.00012522393255637293, "loss": 0.9279, "step": 33250 }, { "epoch": 0.48, "grad_norm": 0.5546875, "learning_rate": 0.00012519970511300826, "loss": 0.8892, "step": 33255 }, { "epoch": 0.48, "grad_norm": 0.71875, "learning_rate": 0.00012517547609009738, "loss": 0.9727, "step": 33260 }, { "epoch": 0.48, "grad_norm": 0.7421875, "learning_rate": 0.00012515124548915905, "loss": 0.9415, "step": 33265 }, { "epoch": 0.48, "grad_norm": 0.6015625, "learning_rate": 0.00012512701331171195, "loss": 0.9745, "step": 33270 }, { "epoch": 0.48, "grad_norm": 0.5078125, "learning_rate": 0.00012510277955927505, "loss": 0.8611, "step": 33275 }, { "epoch": 0.48, "grad_norm": 0.54296875, "learning_rate": 0.00012507854423336737, "loss": 1.0678, "step": 33280 }, { "epoch": 0.48, "grad_norm": 0.59375, "learning_rate": 0.0001250543073355079, "loss": 0.8843, "step": 33285 }, { "epoch": 0.48, "grad_norm": 0.625, "learning_rate": 0.000125030068867216, "loss": 0.8916, "step": 33290 }, { "epoch": 0.48, "grad_norm": 0.5703125, "learning_rate": 0.0001250058288300108, "loss": 0.9249, "step": 33295 }, { "epoch": 0.48, "grad_norm": 0.625, "learning_rate": 0.00012498158722541183, "loss": 0.9577, "step": 33300 }, { "epoch": 0.48, "grad_norm": 0.52734375, "learning_rate": 0.0001249573440549385, "loss": 1.1123, "step": 33305 }, { "epoch": 0.48, "grad_norm": 0.5390625, "learning_rate": 0.00012493309932011038, "loss": 0.8586, "step": 33310 }, { "epoch": 0.48, "grad_norm": 0.609375, "learning_rate": 0.0001249088530224473, "loss": 1.082, "step": 33315 }, { "epoch": 0.48, "grad_norm": 0.53515625, "learning_rate": 0.00012488460516346886, "loss": 0.9669, "step": 33320 }, { "epoch": 0.48, "grad_norm": 0.5703125, "learning_rate": 0.000124860355744695, "loss": 0.9882, "step": 33325 }, { "epoch": 0.48, "grad_norm": 0.54296875, "learning_rate": 0.0001248361047676458, "loss": 0.9785, "step": 33330 }, { "epoch": 0.48, "grad_norm": 0.50390625, "learning_rate": 0.00012481185223384123, "loss": 0.8829, "step": 33335 }, { "epoch": 0.48, "grad_norm": 0.59375, "learning_rate": 0.00012478759814480155, "loss": 1.1137, "step": 33340 }, { "epoch": 0.48, "grad_norm": 0.6484375, "learning_rate": 0.00012476334250204694, "loss": 1.1887, "step": 33345 }, { "epoch": 0.48, "grad_norm": 0.58203125, "learning_rate": 0.00012473908530709782, "loss": 0.847, "step": 33350 }, { "epoch": 0.48, "grad_norm": 0.53125, "learning_rate": 0.00012471482656147467, "loss": 0.9713, "step": 33355 }, { "epoch": 0.48, "grad_norm": 0.5625, "learning_rate": 0.00012469056626669803, "loss": 0.8042, "step": 33360 }, { "epoch": 0.48, "grad_norm": 0.6171875, "learning_rate": 0.0001246663044242886, "loss": 0.937, "step": 33365 }, { "epoch": 0.48, "grad_norm": 0.625, "learning_rate": 0.0001246420410357671, "loss": 0.9339, "step": 33370 }, { "epoch": 0.48, "grad_norm": 0.59375, "learning_rate": 0.00012461777610265444, "loss": 0.8133, "step": 33375 }, { "epoch": 0.48, "grad_norm": 0.55859375, "learning_rate": 0.00012459350962647147, "loss": 0.9549, "step": 33380 }, { "epoch": 0.48, "grad_norm": 0.57421875, "learning_rate": 0.00012456924160873936, "loss": 0.9742, "step": 33385 }, { "epoch": 0.48, "grad_norm": 0.55859375, "learning_rate": 0.00012454497205097916, "loss": 1.0086, "step": 33390 }, { "epoch": 0.48, "grad_norm": 0.55078125, "learning_rate": 0.0001245207009547122, "loss": 0.9169, "step": 33395 }, { "epoch": 0.48, "grad_norm": 0.62890625, "learning_rate": 0.00012449642832145977, "loss": 1.0439, "step": 33400 }, { "epoch": 0.48, "grad_norm": 0.5546875, "learning_rate": 0.0001244721541527433, "loss": 0.9336, "step": 33405 }, { "epoch": 0.48, "grad_norm": 0.609375, "learning_rate": 0.00012444787845008432, "loss": 0.972, "step": 33410 }, { "epoch": 0.48, "grad_norm": 0.6953125, "learning_rate": 0.00012442360121500448, "loss": 1.0084, "step": 33415 }, { "epoch": 0.48, "grad_norm": 0.5546875, "learning_rate": 0.00012439932244902554, "loss": 0.9818, "step": 33420 }, { "epoch": 0.48, "grad_norm": 0.625, "learning_rate": 0.00012437504215366926, "loss": 0.9483, "step": 33425 }, { "epoch": 0.48, "grad_norm": 0.71875, "learning_rate": 0.00012435076033045757, "loss": 0.9617, "step": 33430 }, { "epoch": 0.48, "grad_norm": 0.56640625, "learning_rate": 0.0001243264769809125, "loss": 0.9819, "step": 33435 }, { "epoch": 0.48, "grad_norm": 0.5390625, "learning_rate": 0.0001243021921065561, "loss": 1.0103, "step": 33440 }, { "epoch": 0.48, "grad_norm": 0.59765625, "learning_rate": 0.00012427790570891068, "loss": 0.8925, "step": 33445 }, { "epoch": 0.48, "grad_norm": 0.52734375, "learning_rate": 0.00012425361778949846, "loss": 0.8525, "step": 33450 }, { "epoch": 0.48, "grad_norm": 0.60546875, "learning_rate": 0.00012422932834984187, "loss": 1.038, "step": 33455 }, { "epoch": 0.48, "grad_norm": 0.53125, "learning_rate": 0.00012420503739146333, "loss": 0.9946, "step": 33460 }, { "epoch": 0.48, "grad_norm": 0.58203125, "learning_rate": 0.00012418074491588553, "loss": 1.1663, "step": 33465 }, { "epoch": 0.48, "grad_norm": 0.55859375, "learning_rate": 0.0001241564509246311, "loss": 0.9745, "step": 33470 }, { "epoch": 0.48, "grad_norm": 0.423828125, "learning_rate": 0.00012413215541922282, "loss": 0.8693, "step": 33475 }, { "epoch": 0.48, "grad_norm": 0.55078125, "learning_rate": 0.00012410785840118353, "loss": 1.0118, "step": 33480 }, { "epoch": 0.48, "grad_norm": 0.66015625, "learning_rate": 0.0001240835598720362, "loss": 0.9253, "step": 33485 }, { "epoch": 0.48, "grad_norm": 0.515625, "learning_rate": 0.00012405925983330392, "loss": 0.829, "step": 33490 }, { "epoch": 0.48, "grad_norm": 0.5625, "learning_rate": 0.00012403495828650985, "loss": 0.8592, "step": 33495 }, { "epoch": 0.48, "grad_norm": 0.578125, "learning_rate": 0.00012401065523317723, "loss": 0.9773, "step": 33500 }, { "epoch": 0.48, "grad_norm": 0.50390625, "learning_rate": 0.00012398635067482937, "loss": 1.0826, "step": 33505 }, { "epoch": 0.48, "grad_norm": 0.53125, "learning_rate": 0.00012396204461298974, "loss": 0.9736, "step": 33510 }, { "epoch": 0.48, "grad_norm": 0.640625, "learning_rate": 0.00012393773704918185, "loss": 0.9207, "step": 33515 }, { "epoch": 0.48, "grad_norm": 0.55078125, "learning_rate": 0.0001239134279849294, "loss": 1.0038, "step": 33520 }, { "epoch": 0.48, "grad_norm": 0.5859375, "learning_rate": 0.00012388911742175599, "loss": 1.0029, "step": 33525 }, { "epoch": 0.48, "grad_norm": 0.6015625, "learning_rate": 0.0001238648053611855, "loss": 1.0463, "step": 33530 }, { "epoch": 0.48, "grad_norm": 0.6015625, "learning_rate": 0.00012384049180474182, "loss": 1.1262, "step": 33535 }, { "epoch": 0.48, "grad_norm": 0.55859375, "learning_rate": 0.00012381617675394897, "loss": 0.8751, "step": 33540 }, { "epoch": 0.48, "grad_norm": 0.6484375, "learning_rate": 0.00012379186021033105, "loss": 1.022, "step": 33545 }, { "epoch": 0.48, "grad_norm": 0.58203125, "learning_rate": 0.00012376754217541225, "loss": 1.0283, "step": 33550 }, { "epoch": 0.48, "grad_norm": 0.55078125, "learning_rate": 0.00012374322265071682, "loss": 1.0613, "step": 33555 }, { "epoch": 0.48, "grad_norm": 0.578125, "learning_rate": 0.00012371890163776912, "loss": 0.9887, "step": 33560 }, { "epoch": 0.48, "grad_norm": 0.6171875, "learning_rate": 0.0001236945791380937, "loss": 1.0686, "step": 33565 }, { "epoch": 0.48, "grad_norm": 0.578125, "learning_rate": 0.00012367025515321503, "loss": 1.0544, "step": 33570 }, { "epoch": 0.48, "grad_norm": 0.73046875, "learning_rate": 0.00012364592968465784, "loss": 1.1148, "step": 33575 }, { "epoch": 0.48, "grad_norm": 0.5625, "learning_rate": 0.00012362160273394685, "loss": 1.0274, "step": 33580 }, { "epoch": 0.48, "grad_norm": 0.6875, "learning_rate": 0.0001235972743026069, "loss": 1.0751, "step": 33585 }, { "epoch": 0.48, "grad_norm": 0.53125, "learning_rate": 0.0001235729443921629, "loss": 1.0264, "step": 33590 }, { "epoch": 0.48, "grad_norm": 0.55859375, "learning_rate": 0.0001235486130041399, "loss": 0.924, "step": 33595 }, { "epoch": 0.48, "grad_norm": 0.5859375, "learning_rate": 0.00012352428014006302, "loss": 1.1058, "step": 33600 }, { "epoch": 0.48, "grad_norm": 0.5625, "learning_rate": 0.0001234999458014575, "loss": 0.944, "step": 33605 }, { "epoch": 0.48, "grad_norm": 0.54296875, "learning_rate": 0.00012347560998984857, "loss": 1.0496, "step": 33610 }, { "epoch": 0.48, "grad_norm": 0.5703125, "learning_rate": 0.0001234512727067617, "loss": 1.0869, "step": 33615 }, { "epoch": 0.48, "grad_norm": 0.53125, "learning_rate": 0.00012342693395372232, "loss": 0.9073, "step": 33620 }, { "epoch": 0.48, "grad_norm": 0.61328125, "learning_rate": 0.00012340259373225604, "loss": 0.9367, "step": 33625 }, { "epoch": 0.48, "grad_norm": 0.6328125, "learning_rate": 0.00012337825204388858, "loss": 0.9548, "step": 33630 }, { "epoch": 0.48, "grad_norm": 0.5703125, "learning_rate": 0.0001233539088901456, "loss": 0.902, "step": 33635 }, { "epoch": 0.48, "grad_norm": 0.609375, "learning_rate": 0.000123329564272553, "loss": 1.0901, "step": 33640 }, { "epoch": 0.48, "grad_norm": 0.58984375, "learning_rate": 0.0001233052181926368, "loss": 0.9495, "step": 33645 }, { "epoch": 0.48, "grad_norm": 0.515625, "learning_rate": 0.00012328087065192295, "loss": 1.1589, "step": 33650 }, { "epoch": 0.48, "grad_norm": 0.58203125, "learning_rate": 0.00012325652165193763, "loss": 0.8439, "step": 33655 }, { "epoch": 0.48, "grad_norm": 0.64453125, "learning_rate": 0.00012323217119420706, "loss": 0.9891, "step": 33660 }, { "epoch": 0.48, "grad_norm": 0.5546875, "learning_rate": 0.00012320781928025747, "loss": 0.8962, "step": 33665 }, { "epoch": 0.48, "grad_norm": 0.66015625, "learning_rate": 0.0001231834659116154, "loss": 0.9551, "step": 33670 }, { "epoch": 0.48, "grad_norm": 0.56640625, "learning_rate": 0.00012315911108980727, "loss": 0.9165, "step": 33675 }, { "epoch": 0.48, "grad_norm": 0.59765625, "learning_rate": 0.00012313475481635965, "loss": 0.8704, "step": 33680 }, { "epoch": 0.48, "grad_norm": 0.5546875, "learning_rate": 0.0001231103970927993, "loss": 1.014, "step": 33685 }, { "epoch": 0.48, "grad_norm": 0.515625, "learning_rate": 0.0001230860379206529, "loss": 0.9021, "step": 33690 }, { "epoch": 0.48, "grad_norm": 0.51953125, "learning_rate": 0.00012306167730144737, "loss": 0.9223, "step": 33695 }, { "epoch": 0.48, "grad_norm": 0.57421875, "learning_rate": 0.00012303731523670964, "loss": 0.8964, "step": 33700 }, { "epoch": 0.48, "grad_norm": 0.5546875, "learning_rate": 0.00012301295172796673, "loss": 1.0116, "step": 33705 }, { "epoch": 0.48, "grad_norm": 0.5390625, "learning_rate": 0.00012298858677674585, "loss": 0.9617, "step": 33710 }, { "epoch": 0.48, "grad_norm": 0.84765625, "learning_rate": 0.00012296422038457413, "loss": 0.9698, "step": 33715 }, { "epoch": 0.48, "grad_norm": 0.6171875, "learning_rate": 0.0001229398525529789, "loss": 0.9268, "step": 33720 }, { "epoch": 0.48, "grad_norm": 0.515625, "learning_rate": 0.00012291548328348764, "loss": 1.0052, "step": 33725 }, { "epoch": 0.48, "grad_norm": 0.546875, "learning_rate": 0.00012289111257762775, "loss": 1.0781, "step": 33730 }, { "epoch": 0.48, "grad_norm": 0.62109375, "learning_rate": 0.00012286674043692687, "loss": 1.0105, "step": 33735 }, { "epoch": 0.48, "grad_norm": 0.515625, "learning_rate": 0.00012284236686291265, "loss": 0.9892, "step": 33740 }, { "epoch": 0.48, "grad_norm": 0.546875, "learning_rate": 0.00012281799185711285, "loss": 0.9825, "step": 33745 }, { "epoch": 0.48, "grad_norm": 0.51171875, "learning_rate": 0.00012279361542105537, "loss": 0.9173, "step": 33750 }, { "epoch": 0.48, "grad_norm": 0.58203125, "learning_rate": 0.0001227692375562681, "loss": 0.956, "step": 33755 }, { "epoch": 0.48, "grad_norm": 0.609375, "learning_rate": 0.00012274485826427905, "loss": 1.0413, "step": 33760 }, { "epoch": 0.48, "grad_norm": 0.59375, "learning_rate": 0.00012272047754661642, "loss": 0.9672, "step": 33765 }, { "epoch": 0.48, "grad_norm": 0.5234375, "learning_rate": 0.00012269609540480834, "loss": 0.9924, "step": 33770 }, { "epoch": 0.48, "grad_norm": 0.62109375, "learning_rate": 0.0001226717118403832, "loss": 0.915, "step": 33775 }, { "epoch": 0.48, "grad_norm": 0.74609375, "learning_rate": 0.00012264732685486932, "loss": 0.9189, "step": 33780 }, { "epoch": 0.48, "grad_norm": 0.5, "learning_rate": 0.0001226229404497952, "loss": 0.9149, "step": 33785 }, { "epoch": 0.48, "grad_norm": 0.54296875, "learning_rate": 0.0001225985526266894, "loss": 0.9591, "step": 33790 }, { "epoch": 0.48, "grad_norm": 0.58984375, "learning_rate": 0.0001225741633870806, "loss": 0.9421, "step": 33795 }, { "epoch": 0.48, "grad_norm": 0.546875, "learning_rate": 0.00012254977273249752, "loss": 0.8335, "step": 33800 }, { "epoch": 0.48, "grad_norm": 0.490234375, "learning_rate": 0.000122525380664469, "loss": 0.8758, "step": 33805 }, { "epoch": 0.48, "grad_norm": 0.6328125, "learning_rate": 0.00012250098718452398, "loss": 0.9467, "step": 33810 }, { "epoch": 0.49, "grad_norm": 0.57421875, "learning_rate": 0.00012247659229419147, "loss": 0.9301, "step": 33815 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.0001224521959950005, "loss": 0.9686, "step": 33820 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00012242779828848033, "loss": 0.8002, "step": 33825 }, { "epoch": 0.49, "grad_norm": 0.58984375, "learning_rate": 0.00012240339917616027, "loss": 0.9443, "step": 33830 }, { "epoch": 0.49, "grad_norm": 0.5546875, "learning_rate": 0.0001223789986595696, "loss": 1.0118, "step": 33835 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.0001223545967402378, "loss": 0.9694, "step": 33840 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00012233019341969443, "loss": 0.9285, "step": 33845 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.00012230578869946909, "loss": 1.1095, "step": 33850 }, { "epoch": 0.49, "grad_norm": 0.73046875, "learning_rate": 0.00012228138258109153, "loss": 1.1028, "step": 33855 }, { "epoch": 0.49, "grad_norm": 0.56640625, "learning_rate": 0.0001222569750660915, "loss": 0.9773, "step": 33860 }, { "epoch": 0.49, "grad_norm": 0.5703125, "learning_rate": 0.00012223256615599896, "loss": 0.9878, "step": 33865 }, { "epoch": 0.49, "grad_norm": 0.59375, "learning_rate": 0.00012220815585234384, "loss": 1.0119, "step": 33870 }, { "epoch": 0.49, "grad_norm": 0.59375, "learning_rate": 0.00012218374415665624, "loss": 1.0379, "step": 33875 }, { "epoch": 0.49, "grad_norm": 0.51171875, "learning_rate": 0.00012215933107046626, "loss": 1.0315, "step": 33880 }, { "epoch": 0.49, "grad_norm": 0.578125, "learning_rate": 0.00012213491659530417, "loss": 0.8895, "step": 33885 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.0001221105007327003, "loss": 0.8463, "step": 33890 }, { "epoch": 0.49, "grad_norm": 0.50390625, "learning_rate": 0.0001220860834841851, "loss": 0.8936, "step": 33895 }, { "epoch": 0.49, "grad_norm": 0.5546875, "learning_rate": 0.00012206166485128898, "loss": 0.9165, "step": 33900 }, { "epoch": 0.49, "grad_norm": 0.68359375, "learning_rate": 0.00012203724483554262, "loss": 1.2033, "step": 33905 }, { "epoch": 0.49, "grad_norm": 0.51171875, "learning_rate": 0.00012201282343847662, "loss": 1.0308, "step": 33910 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.00012198840066162178, "loss": 0.9276, "step": 33915 }, { "epoch": 0.49, "grad_norm": 0.546875, "learning_rate": 0.00012196397650650897, "loss": 0.8148, "step": 33920 }, { "epoch": 0.49, "grad_norm": 0.625, "learning_rate": 0.00012193955097466909, "loss": 1.0166, "step": 33925 }, { "epoch": 0.49, "grad_norm": 0.54296875, "learning_rate": 0.00012191512406763319, "loss": 0.8964, "step": 33930 }, { "epoch": 0.49, "grad_norm": 0.5234375, "learning_rate": 0.0001218906957869323, "loss": 0.7973, "step": 33935 }, { "epoch": 0.49, "grad_norm": 0.6171875, "learning_rate": 0.00012186626613409771, "loss": 0.8976, "step": 33940 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00012184183511066065, "loss": 0.8737, "step": 33945 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.00012181740271815248, "loss": 0.8809, "step": 33950 }, { "epoch": 0.49, "grad_norm": 0.57421875, "learning_rate": 0.00012179296895810466, "loss": 0.875, "step": 33955 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.00012176853383204873, "loss": 0.8637, "step": 33960 }, { "epoch": 0.49, "grad_norm": 0.60546875, "learning_rate": 0.00012174409734151628, "loss": 0.9441, "step": 33965 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.0001217196594880391, "loss": 0.9632, "step": 33970 }, { "epoch": 0.49, "grad_norm": 0.55859375, "learning_rate": 0.00012169522027314888, "loss": 1.0033, "step": 33975 }, { "epoch": 0.49, "grad_norm": 0.57421875, "learning_rate": 0.00012167077969837755, "loss": 0.9105, "step": 33980 }, { "epoch": 0.49, "grad_norm": 0.62109375, "learning_rate": 0.0001216463377652571, "loss": 1.0741, "step": 33985 }, { "epoch": 0.49, "grad_norm": 0.58203125, "learning_rate": 0.00012162189447531949, "loss": 1.0443, "step": 33990 }, { "epoch": 0.49, "grad_norm": 0.5546875, "learning_rate": 0.00012159744983009695, "loss": 1.0444, "step": 33995 }, { "epoch": 0.49, "grad_norm": 0.54296875, "learning_rate": 0.00012157300383112167, "loss": 1.0718, "step": 34000 }, { "epoch": 0.49, "grad_norm": 0.6171875, "learning_rate": 0.00012154855647992591, "loss": 0.9506, "step": 34005 }, { "epoch": 0.49, "grad_norm": 0.51953125, "learning_rate": 0.00012152410777804209, "loss": 0.9411, "step": 34010 }, { "epoch": 0.49, "grad_norm": 0.5234375, "learning_rate": 0.00012149965772700269, "loss": 0.9822, "step": 34015 }, { "epoch": 0.49, "grad_norm": 0.56640625, "learning_rate": 0.00012147520632834023, "loss": 0.9988, "step": 34020 }, { "epoch": 0.49, "grad_norm": 0.50390625, "learning_rate": 0.00012145075358358744, "loss": 0.9612, "step": 34025 }, { "epoch": 0.49, "grad_norm": 0.6171875, "learning_rate": 0.00012142629949427693, "loss": 0.9243, "step": 34030 }, { "epoch": 0.49, "grad_norm": 0.58203125, "learning_rate": 0.0001214018440619416, "loss": 1.0716, "step": 34035 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.0001213773872881143, "loss": 0.996, "step": 34040 }, { "epoch": 0.49, "grad_norm": 0.765625, "learning_rate": 0.00012135292917432799, "loss": 0.842, "step": 34045 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.0001213284697221158, "loss": 0.915, "step": 34050 }, { "epoch": 0.49, "grad_norm": 0.609375, "learning_rate": 0.00012130400893301081, "loss": 0.9155, "step": 34055 }, { "epoch": 0.49, "grad_norm": 0.5625, "learning_rate": 0.00012127954680854628, "loss": 0.8287, "step": 34060 }, { "epoch": 0.49, "grad_norm": 0.55859375, "learning_rate": 0.00012125508335025552, "loss": 0.829, "step": 34065 }, { "epoch": 0.49, "grad_norm": 0.546875, "learning_rate": 0.00012123061855967195, "loss": 0.9948, "step": 34070 }, { "epoch": 0.49, "grad_norm": 0.72265625, "learning_rate": 0.00012120615243832903, "loss": 0.9894, "step": 34075 }, { "epoch": 0.49, "grad_norm": 0.5859375, "learning_rate": 0.0001211816849877603, "loss": 1.0433, "step": 34080 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.00012115721620949942, "loss": 0.9365, "step": 34085 }, { "epoch": 0.49, "grad_norm": 0.458984375, "learning_rate": 0.00012113274610508013, "loss": 1.0574, "step": 34090 }, { "epoch": 0.49, "grad_norm": 0.5, "learning_rate": 0.00012110827467603629, "loss": 0.8886, "step": 34095 }, { "epoch": 0.49, "grad_norm": 0.4921875, "learning_rate": 0.00012108380192390172, "loss": 0.8493, "step": 34100 }, { "epoch": 0.49, "grad_norm": 0.6484375, "learning_rate": 0.00012105932785021046, "loss": 0.9578, "step": 34105 }, { "epoch": 0.49, "grad_norm": 0.65234375, "learning_rate": 0.00012103485245649651, "loss": 1.0486, "step": 34110 }, { "epoch": 0.49, "grad_norm": 0.59765625, "learning_rate": 0.00012101037574429409, "loss": 0.9805, "step": 34115 }, { "epoch": 0.49, "grad_norm": 0.546875, "learning_rate": 0.00012098589771513736, "loss": 0.9243, "step": 34120 }, { "epoch": 0.49, "grad_norm": 0.53515625, "learning_rate": 0.00012096141837056067, "loss": 0.8967, "step": 34125 }, { "epoch": 0.49, "grad_norm": 0.56640625, "learning_rate": 0.0001209369377120984, "loss": 0.902, "step": 34130 }, { "epoch": 0.49, "grad_norm": 0.60546875, "learning_rate": 0.00012091245574128505, "loss": 1.0998, "step": 34135 }, { "epoch": 0.49, "grad_norm": 0.6640625, "learning_rate": 0.0001208879724596551, "loss": 0.9651, "step": 34140 }, { "epoch": 0.49, "grad_norm": 0.65625, "learning_rate": 0.00012086348786874331, "loss": 0.9981, "step": 34145 }, { "epoch": 0.49, "grad_norm": 0.6328125, "learning_rate": 0.0001208390019700843, "loss": 1.0658, "step": 34150 }, { "epoch": 0.49, "grad_norm": 0.51953125, "learning_rate": 0.00012081451476521293, "loss": 0.9321, "step": 34155 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.00012079002625566409, "loss": 0.9644, "step": 34160 }, { "epoch": 0.49, "grad_norm": 0.62890625, "learning_rate": 0.00012076553644297268, "loss": 1.0264, "step": 34165 }, { "epoch": 0.49, "grad_norm": 0.5703125, "learning_rate": 0.00012074104532867381, "loss": 0.9996, "step": 34170 }, { "epoch": 0.49, "grad_norm": 0.6484375, "learning_rate": 0.00012071655291430261, "loss": 0.9504, "step": 34175 }, { "epoch": 0.49, "grad_norm": 0.5625, "learning_rate": 0.00012069205920139428, "loss": 0.9919, "step": 34180 }, { "epoch": 0.49, "grad_norm": 0.53515625, "learning_rate": 0.0001206675641914841, "loss": 1.0253, "step": 34185 }, { "epoch": 0.49, "grad_norm": 0.5859375, "learning_rate": 0.00012064306788610749, "loss": 1.0116, "step": 34190 }, { "epoch": 0.49, "grad_norm": 0.546875, "learning_rate": 0.00012061857028679982, "loss": 0.9081, "step": 34195 }, { "epoch": 0.49, "grad_norm": 0.61328125, "learning_rate": 0.00012059407139509671, "loss": 0.9639, "step": 34200 }, { "epoch": 0.49, "grad_norm": 0.494140625, "learning_rate": 0.00012056957121253377, "loss": 0.9585, "step": 34205 }, { "epoch": 0.49, "grad_norm": 0.62109375, "learning_rate": 0.00012054506974064665, "loss": 0.8973, "step": 34210 }, { "epoch": 0.49, "grad_norm": 0.5625, "learning_rate": 0.00012052056698097118, "loss": 0.9475, "step": 34215 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.00012049606293504317, "loss": 0.9987, "step": 34220 }, { "epoch": 0.49, "grad_norm": 0.8671875, "learning_rate": 0.00012047155760439861, "loss": 0.9729, "step": 34225 }, { "epoch": 0.49, "grad_norm": 0.5859375, "learning_rate": 0.00012044705099057352, "loss": 0.8508, "step": 34230 }, { "epoch": 0.49, "grad_norm": 0.53515625, "learning_rate": 0.00012042254309510398, "loss": 1.0183, "step": 34235 }, { "epoch": 0.49, "grad_norm": 0.52734375, "learning_rate": 0.00012039803391952617, "loss": 0.856, "step": 34240 }, { "epoch": 0.49, "grad_norm": 0.91796875, "learning_rate": 0.00012037352346537639, "loss": 0.7685, "step": 34245 }, { "epoch": 0.49, "grad_norm": 0.640625, "learning_rate": 0.00012034901173419091, "loss": 0.9855, "step": 34250 }, { "epoch": 0.49, "grad_norm": 0.5625, "learning_rate": 0.00012032449872750621, "loss": 0.9725, "step": 34255 }, { "epoch": 0.49, "grad_norm": 0.52734375, "learning_rate": 0.00012029998444685881, "loss": 0.9326, "step": 34260 }, { "epoch": 0.49, "grad_norm": 0.68359375, "learning_rate": 0.00012027546889378525, "loss": 0.9455, "step": 34265 }, { "epoch": 0.49, "grad_norm": 0.50390625, "learning_rate": 0.0001202509520698222, "loss": 0.9497, "step": 34270 }, { "epoch": 0.49, "grad_norm": 0.73828125, "learning_rate": 0.00012022643397650642, "loss": 1.266, "step": 34275 }, { "epoch": 0.49, "grad_norm": 0.49609375, "learning_rate": 0.00012020191461537471, "loss": 0.9617, "step": 34280 }, { "epoch": 0.49, "grad_norm": 0.76171875, "learning_rate": 0.00012017739398796401, "loss": 0.9301, "step": 34285 }, { "epoch": 0.49, "grad_norm": 0.58203125, "learning_rate": 0.00012015287209581125, "loss": 0.9156, "step": 34290 }, { "epoch": 0.49, "grad_norm": 0.5625, "learning_rate": 0.00012012834894045353, "loss": 0.9515, "step": 34295 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00012010382452342797, "loss": 0.8186, "step": 34300 }, { "epoch": 0.49, "grad_norm": 0.64453125, "learning_rate": 0.00012007929884627176, "loss": 0.9397, "step": 34305 }, { "epoch": 0.49, "grad_norm": 0.6015625, "learning_rate": 0.00012005477191052228, "loss": 0.9877, "step": 34310 }, { "epoch": 0.49, "grad_norm": 0.5859375, "learning_rate": 0.00012003024371771683, "loss": 1.0068, "step": 34315 }, { "epoch": 0.49, "grad_norm": 0.5703125, "learning_rate": 0.00012000571426939289, "loss": 0.913, "step": 34320 }, { "epoch": 0.49, "grad_norm": 0.56640625, "learning_rate": 0.000119981183567088, "loss": 0.9533, "step": 34325 }, { "epoch": 0.49, "grad_norm": 0.53515625, "learning_rate": 0.00011995665161233977, "loss": 0.9048, "step": 34330 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.00011993211840668588, "loss": 0.9978, "step": 34335 }, { "epoch": 0.49, "grad_norm": 0.5625, "learning_rate": 0.00011990758395166415, "loss": 0.9513, "step": 34340 }, { "epoch": 0.49, "grad_norm": 0.71875, "learning_rate": 0.00011988304824881234, "loss": 0.9389, "step": 34345 }, { "epoch": 0.49, "grad_norm": 0.54296875, "learning_rate": 0.00011985851129966843, "loss": 0.9301, "step": 34350 }, { "epoch": 0.49, "grad_norm": 0.5703125, "learning_rate": 0.0001198339731057704, "loss": 1.1964, "step": 34355 }, { "epoch": 0.49, "grad_norm": 0.5703125, "learning_rate": 0.00011980943366865636, "loss": 1.0127, "step": 34360 }, { "epoch": 0.49, "grad_norm": 0.625, "learning_rate": 0.00011978489298986448, "loss": 0.8845, "step": 34365 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.00011976035107093294, "loss": 0.9555, "step": 34370 }, { "epoch": 0.49, "grad_norm": 0.46875, "learning_rate": 0.00011973580791340011, "loss": 0.903, "step": 34375 }, { "epoch": 0.49, "grad_norm": 0.5, "learning_rate": 0.00011971126351880435, "loss": 0.9846, "step": 34380 }, { "epoch": 0.49, "grad_norm": 0.57421875, "learning_rate": 0.00011968671788868413, "loss": 0.9165, "step": 34385 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00011966217102457807, "loss": 1.0022, "step": 34390 }, { "epoch": 0.49, "grad_norm": 0.625, "learning_rate": 0.0001196376229280247, "loss": 1.0029, "step": 34395 }, { "epoch": 0.49, "grad_norm": 0.5703125, "learning_rate": 0.00011961307360056273, "loss": 0.9662, "step": 34400 }, { "epoch": 0.49, "grad_norm": 0.5625, "learning_rate": 0.00011958852304373099, "loss": 1.0455, "step": 34405 }, { "epoch": 0.49, "grad_norm": 0.625, "learning_rate": 0.00011956397125906834, "loss": 0.8823, "step": 34410 }, { "epoch": 0.49, "grad_norm": 0.5234375, "learning_rate": 0.00011953941824811363, "loss": 0.8869, "step": 34415 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00011951486401240601, "loss": 0.8601, "step": 34420 }, { "epoch": 0.49, "grad_norm": 0.609375, "learning_rate": 0.00011949030855348445, "loss": 0.9119, "step": 34425 }, { "epoch": 0.49, "grad_norm": 0.53515625, "learning_rate": 0.00011946575187288815, "loss": 0.937, "step": 34430 }, { "epoch": 0.49, "grad_norm": 0.51953125, "learning_rate": 0.00011944119397215634, "loss": 0.9595, "step": 34435 }, { "epoch": 0.49, "grad_norm": 0.6328125, "learning_rate": 0.00011941663485282837, "loss": 0.9202, "step": 34440 }, { "epoch": 0.49, "grad_norm": 0.5546875, "learning_rate": 0.00011939207451644363, "loss": 0.9054, "step": 34445 }, { "epoch": 0.49, "grad_norm": 0.55859375, "learning_rate": 0.00011936751296454155, "loss": 1.0402, "step": 34450 }, { "epoch": 0.49, "grad_norm": 0.6171875, "learning_rate": 0.00011934295019866168, "loss": 1.0998, "step": 34455 }, { "epoch": 0.49, "grad_norm": 0.484375, "learning_rate": 0.00011931838622034371, "loss": 0.9521, "step": 34460 }, { "epoch": 0.49, "grad_norm": 0.60546875, "learning_rate": 0.00011929382103112725, "loss": 0.9183, "step": 34465 }, { "epoch": 0.49, "grad_norm": 0.57421875, "learning_rate": 0.00011926925463255214, "loss": 1.0728, "step": 34470 }, { "epoch": 0.49, "grad_norm": 0.490234375, "learning_rate": 0.00011924468702615818, "loss": 0.8879, "step": 34475 }, { "epoch": 0.49, "grad_norm": 0.74609375, "learning_rate": 0.00011922011821348533, "loss": 0.9262, "step": 34480 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.00011919554819607359, "loss": 0.9469, "step": 34485 }, { "epoch": 0.49, "grad_norm": 0.609375, "learning_rate": 0.00011917097697546303, "loss": 1.0009, "step": 34490 }, { "epoch": 0.49, "grad_norm": 0.65625, "learning_rate": 0.00011914640455319377, "loss": 0.8685, "step": 34495 }, { "epoch": 0.49, "grad_norm": 0.71875, "learning_rate": 0.00011912183093080611, "loss": 0.942, "step": 34500 }, { "epoch": 0.49, "grad_norm": 0.578125, "learning_rate": 0.00011909725610984026, "loss": 0.9429, "step": 34505 }, { "epoch": 0.5, "grad_norm": 0.546875, "learning_rate": 0.00011907268009183668, "loss": 0.9046, "step": 34510 }, { "epoch": 0.5, "grad_norm": 0.486328125, "learning_rate": 0.00011904810287833579, "loss": 0.9142, "step": 34515 }, { "epoch": 0.5, "grad_norm": 0.58984375, "learning_rate": 0.0001190235244708781, "loss": 1.062, "step": 34520 }, { "epoch": 0.5, "grad_norm": 0.5390625, "learning_rate": 0.00011899894487100425, "loss": 0.8536, "step": 34525 }, { "epoch": 0.5, "grad_norm": 0.57421875, "learning_rate": 0.00011897436408025488, "loss": 1.0806, "step": 34530 }, { "epoch": 0.5, "grad_norm": 0.546875, "learning_rate": 0.00011894978210017076, "loss": 1.1766, "step": 34535 }, { "epoch": 0.5, "grad_norm": 0.52734375, "learning_rate": 0.00011892519893229272, "loss": 0.9887, "step": 34540 }, { "epoch": 0.5, "grad_norm": 0.5, "learning_rate": 0.00011890061457816167, "loss": 0.9299, "step": 34545 }, { "epoch": 0.5, "grad_norm": 0.5546875, "learning_rate": 0.00011887602903931856, "loss": 0.9877, "step": 34550 }, { "epoch": 0.5, "grad_norm": 0.61328125, "learning_rate": 0.00011885144231730445, "loss": 0.8862, "step": 34555 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.00011882685441366046, "loss": 0.9471, "step": 34560 }, { "epoch": 0.5, "grad_norm": 0.55078125, "learning_rate": 0.0001188022653299278, "loss": 0.9956, "step": 34565 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.00011877767506764774, "loss": 1.0488, "step": 34570 }, { "epoch": 0.5, "grad_norm": 0.66796875, "learning_rate": 0.00011875308362836163, "loss": 0.964, "step": 34575 }, { "epoch": 0.5, "grad_norm": 0.482421875, "learning_rate": 0.00011872849101361088, "loss": 0.8103, "step": 34580 }, { "epoch": 0.5, "grad_norm": 0.5078125, "learning_rate": 0.00011870389722493698, "loss": 0.9446, "step": 34585 }, { "epoch": 0.5, "grad_norm": 0.439453125, "learning_rate": 0.00011867930226388147, "loss": 0.8547, "step": 34590 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.00011865470613198603, "loss": 0.8854, "step": 34595 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 0.0001186301088307924, "loss": 1.0066, "step": 34600 }, { "epoch": 0.5, "grad_norm": 0.58984375, "learning_rate": 0.00011860551036184229, "loss": 0.8916, "step": 34605 }, { "epoch": 0.5, "grad_norm": 0.546875, "learning_rate": 0.00011858091072667763, "loss": 1.0072, "step": 34610 }, { "epoch": 0.5, "grad_norm": 0.56640625, "learning_rate": 0.00011855630992684028, "loss": 0.8621, "step": 34615 }, { "epoch": 0.5, "grad_norm": 1.8828125, "learning_rate": 0.00011853170796387233, "loss": 0.9904, "step": 34620 }, { "epoch": 0.5, "grad_norm": 0.51953125, "learning_rate": 0.00011850710483931581, "loss": 1.0208, "step": 34625 }, { "epoch": 0.5, "grad_norm": 0.66796875, "learning_rate": 0.00011848250055471288, "loss": 0.8806, "step": 34630 }, { "epoch": 0.5, "grad_norm": 0.52734375, "learning_rate": 0.00011845789511160579, "loss": 0.9947, "step": 34635 }, { "epoch": 0.5, "grad_norm": 0.58984375, "learning_rate": 0.00011843328851153679, "loss": 0.974, "step": 34640 }, { "epoch": 0.5, "grad_norm": 0.6015625, "learning_rate": 0.00011840868075604825, "loss": 0.9877, "step": 34645 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.00011838407184668265, "loss": 0.9903, "step": 34650 }, { "epoch": 0.5, "grad_norm": 0.640625, "learning_rate": 0.0001183594617849825, "loss": 0.9617, "step": 34655 }, { "epoch": 0.5, "grad_norm": 0.5546875, "learning_rate": 0.00011833485057249038, "loss": 0.9091, "step": 34660 }, { "epoch": 0.5, "grad_norm": 0.58203125, "learning_rate": 0.00011831023821074893, "loss": 0.9464, "step": 34665 }, { "epoch": 0.5, "grad_norm": 0.52734375, "learning_rate": 0.00011828562470130088, "loss": 0.8344, "step": 34670 }, { "epoch": 0.5, "grad_norm": 0.6640625, "learning_rate": 0.00011826101004568908, "loss": 0.9523, "step": 34675 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.00011823639424545639, "loss": 0.9397, "step": 34680 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.0001182117773021457, "loss": 0.9879, "step": 34685 }, { "epoch": 0.5, "grad_norm": 0.5078125, "learning_rate": 0.00011818715921730006, "loss": 0.9295, "step": 34690 }, { "epoch": 0.5, "grad_norm": 0.625, "learning_rate": 0.00011816253999246258, "loss": 1.0233, "step": 34695 }, { "epoch": 0.5, "grad_norm": 0.58984375, "learning_rate": 0.00011813791962917642, "loss": 1.0116, "step": 34700 }, { "epoch": 0.5, "grad_norm": 0.62890625, "learning_rate": 0.00011811329812898482, "loss": 0.9946, "step": 34705 }, { "epoch": 0.5, "grad_norm": 0.609375, "learning_rate": 0.000118088675493431, "loss": 1.0791, "step": 34710 }, { "epoch": 0.5, "grad_norm": 0.5859375, "learning_rate": 0.00011806405172405845, "loss": 1.0352, "step": 34715 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.00011803942682241053, "loss": 0.8494, "step": 34720 }, { "epoch": 0.5, "grad_norm": 0.515625, "learning_rate": 0.0001180148007900308, "loss": 0.955, "step": 34725 }, { "epoch": 0.5, "grad_norm": 0.55078125, "learning_rate": 0.00011799017362846287, "loss": 1.0439, "step": 34730 }, { "epoch": 0.5, "grad_norm": 0.58203125, "learning_rate": 0.00011796554533925037, "loss": 0.924, "step": 34735 }, { "epoch": 0.5, "grad_norm": 0.5546875, "learning_rate": 0.000117940915923937, "loss": 0.8782, "step": 34740 }, { "epoch": 0.5, "grad_norm": 0.5390625, "learning_rate": 0.00011791628538406659, "loss": 0.923, "step": 34745 }, { "epoch": 0.5, "grad_norm": 0.482421875, "learning_rate": 0.00011789165372118301, "loss": 1.0874, "step": 34750 }, { "epoch": 0.5, "grad_norm": 0.478515625, "learning_rate": 0.00011786702093683018, "loss": 0.7821, "step": 34755 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011784238703255217, "loss": 0.8877, "step": 34760 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011781775200989298, "loss": 0.8183, "step": 34765 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.0001177931158703968, "loss": 0.933, "step": 34770 }, { "epoch": 0.5, "grad_norm": 0.59765625, "learning_rate": 0.0001177684786156079, "loss": 1.0486, "step": 34775 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011774384024707046, "loss": 1.1156, "step": 34780 }, { "epoch": 0.5, "grad_norm": 0.5546875, "learning_rate": 0.00011771920076632897, "loss": 1.1137, "step": 34785 }, { "epoch": 0.5, "grad_norm": 0.5859375, "learning_rate": 0.00011769456017492777, "loss": 0.8434, "step": 34790 }, { "epoch": 0.5, "grad_norm": 0.5859375, "learning_rate": 0.00011766991847441136, "loss": 1.0149, "step": 34795 }, { "epoch": 0.5, "grad_norm": 0.62109375, "learning_rate": 0.00011764527566632435, "loss": 0.9178, "step": 34800 }, { "epoch": 0.5, "grad_norm": 0.60546875, "learning_rate": 0.00011762063175221139, "loss": 0.9654, "step": 34805 }, { "epoch": 0.5, "grad_norm": 0.458984375, "learning_rate": 0.00011759598673361714, "loss": 0.9076, "step": 34810 }, { "epoch": 0.5, "grad_norm": 0.515625, "learning_rate": 0.00011757134061208642, "loss": 1.0185, "step": 34815 }, { "epoch": 0.5, "grad_norm": 0.5390625, "learning_rate": 0.00011754669338916401, "loss": 0.9442, "step": 34820 }, { "epoch": 0.5, "grad_norm": 0.578125, "learning_rate": 0.00011752204506639493, "loss": 0.9538, "step": 34825 }, { "epoch": 0.5, "grad_norm": 0.546875, "learning_rate": 0.00011749739564532407, "loss": 1.0216, "step": 34830 }, { "epoch": 0.5, "grad_norm": 0.5859375, "learning_rate": 0.00011747274512749653, "loss": 0.9645, "step": 34835 }, { "epoch": 0.5, "grad_norm": 0.443359375, "learning_rate": 0.00011744809351445747, "loss": 0.9073, "step": 34840 }, { "epoch": 0.5, "grad_norm": 0.54296875, "learning_rate": 0.00011742344080775198, "loss": 0.9201, "step": 34845 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.00011739878700892541, "loss": 1.1989, "step": 34850 }, { "epoch": 0.5, "grad_norm": 0.52734375, "learning_rate": 0.00011737413211952304, "loss": 0.861, "step": 34855 }, { "epoch": 0.5, "grad_norm": 0.59375, "learning_rate": 0.00011734947614109029, "loss": 1.0942, "step": 34860 }, { "epoch": 0.5, "grad_norm": 0.5625, "learning_rate": 0.00011732481907517261, "loss": 1.0245, "step": 34865 }, { "epoch": 0.5, "grad_norm": 0.83203125, "learning_rate": 0.00011730016092331554, "loss": 0.9959, "step": 34870 }, { "epoch": 0.5, "grad_norm": 0.5546875, "learning_rate": 0.00011727550168706467, "loss": 0.9585, "step": 34875 }, { "epoch": 0.5, "grad_norm": 0.62109375, "learning_rate": 0.00011725084136796569, "loss": 1.0257, "step": 34880 }, { "epoch": 0.5, "grad_norm": 0.63671875, "learning_rate": 0.00011722617996756433, "loss": 0.9892, "step": 34885 }, { "epoch": 0.5, "grad_norm": 0.51953125, "learning_rate": 0.00011720151748740639, "loss": 0.8525, "step": 34890 }, { "epoch": 0.5, "grad_norm": 0.65234375, "learning_rate": 0.00011717685392903774, "loss": 1.065, "step": 34895 }, { "epoch": 0.5, "grad_norm": 0.5546875, "learning_rate": 0.0001171521892940043, "loss": 0.9966, "step": 34900 }, { "epoch": 0.5, "grad_norm": 0.65625, "learning_rate": 0.00011712752358385216, "loss": 0.9208, "step": 34905 }, { "epoch": 0.5, "grad_norm": 0.55078125, "learning_rate": 0.0001171028568001273, "loss": 0.8902, "step": 34910 }, { "epoch": 0.5, "grad_norm": 0.578125, "learning_rate": 0.00011707818894437587, "loss": 0.8528, "step": 34915 }, { "epoch": 0.5, "grad_norm": 0.57421875, "learning_rate": 0.00011705352001814415, "loss": 1.0698, "step": 34920 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 0.00011702885002297836, "loss": 0.8962, "step": 34925 }, { "epoch": 0.5, "grad_norm": 0.5078125, "learning_rate": 0.00011700417896042484, "loss": 1.0051, "step": 34930 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.00011697950683203003, "loss": 0.8504, "step": 34935 }, { "epoch": 0.5, "grad_norm": 0.57421875, "learning_rate": 0.00011695483363934038, "loss": 0.8656, "step": 34940 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 0.00011693015938390249, "loss": 0.8445, "step": 34945 }, { "epoch": 0.5, "grad_norm": 0.6015625, "learning_rate": 0.0001169054840672629, "loss": 1.068, "step": 34950 }, { "epoch": 0.5, "grad_norm": 0.6328125, "learning_rate": 0.00011688080769096827, "loss": 1.0567, "step": 34955 }, { "epoch": 0.5, "grad_norm": 0.609375, "learning_rate": 0.00011685613025656543, "loss": 1.0828, "step": 34960 }, { "epoch": 0.5, "grad_norm": 0.5390625, "learning_rate": 0.00011683145176560117, "loss": 0.9981, "step": 34965 }, { "epoch": 0.5, "grad_norm": 0.6796875, "learning_rate": 0.00011680677221962233, "loss": 0.963, "step": 34970 }, { "epoch": 0.5, "grad_norm": 0.5703125, "learning_rate": 0.00011678209162017586, "loss": 0.9921, "step": 34975 }, { "epoch": 0.5, "grad_norm": 0.578125, "learning_rate": 0.00011675740996880877, "loss": 0.9473, "step": 34980 }, { "epoch": 0.5, "grad_norm": 0.61328125, "learning_rate": 0.00011673272726706812, "loss": 0.9033, "step": 34985 }, { "epoch": 0.5, "grad_norm": 0.494140625, "learning_rate": 0.00011670804351650109, "loss": 0.9988, "step": 34990 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 0.00011668335871865487, "loss": 0.7865, "step": 34995 }, { "epoch": 0.5, "grad_norm": 0.6171875, "learning_rate": 0.00011665867287507672, "loss": 0.9627, "step": 35000 }, { "epoch": 0.5, "grad_norm": 0.59765625, "learning_rate": 0.00011663398598731399, "loss": 0.9706, "step": 35005 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011660929805691402, "loss": 0.8479, "step": 35010 }, { "epoch": 0.5, "grad_norm": 0.52734375, "learning_rate": 0.00011658460908542438, "loss": 0.9575, "step": 35015 }, { "epoch": 0.5, "grad_norm": 0.58984375, "learning_rate": 0.00011655991907439257, "loss": 1.0464, "step": 35020 }, { "epoch": 0.5, "grad_norm": 0.6015625, "learning_rate": 0.00011653522802536613, "loss": 0.9209, "step": 35025 }, { "epoch": 0.5, "grad_norm": 0.498046875, "learning_rate": 0.0001165105359398928, "loss": 0.855, "step": 35030 }, { "epoch": 0.5, "grad_norm": 0.5625, "learning_rate": 0.00011648584281952026, "loss": 0.9487, "step": 35035 }, { "epoch": 0.5, "grad_norm": 0.57421875, "learning_rate": 0.00011646114866579632, "loss": 0.9703, "step": 35040 }, { "epoch": 0.5, "grad_norm": 0.51171875, "learning_rate": 0.00011643645348026882, "loss": 0.929, "step": 35045 }, { "epoch": 0.5, "grad_norm": 0.609375, "learning_rate": 0.0001164117572644857, "loss": 0.9992, "step": 35050 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.00011638706001999495, "loss": 0.8706, "step": 35055 }, { "epoch": 0.5, "grad_norm": 0.55078125, "learning_rate": 0.00011636236174834463, "loss": 0.8983, "step": 35060 }, { "epoch": 0.5, "grad_norm": 0.58203125, "learning_rate": 0.0001163376624510828, "loss": 1.0189, "step": 35065 }, { "epoch": 0.5, "grad_norm": 0.609375, "learning_rate": 0.00011631296212975771, "loss": 0.9888, "step": 35070 }, { "epoch": 0.5, "grad_norm": 0.62109375, "learning_rate": 0.0001162882607859176, "loss": 0.9474, "step": 35075 }, { "epoch": 0.5, "grad_norm": 0.54296875, "learning_rate": 0.0001162635584211107, "loss": 0.9624, "step": 35080 }, { "epoch": 0.5, "grad_norm": 0.5, "learning_rate": 0.00011623885503688546, "loss": 0.8918, "step": 35085 }, { "epoch": 0.5, "grad_norm": 0.50390625, "learning_rate": 0.00011621415063479028, "loss": 0.8406, "step": 35090 }, { "epoch": 0.5, "grad_norm": 0.58203125, "learning_rate": 0.00011618944521637368, "loss": 0.9026, "step": 35095 }, { "epoch": 0.5, "grad_norm": 0.64453125, "learning_rate": 0.00011616473878318424, "loss": 1.0991, "step": 35100 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011614003133677052, "loss": 0.9176, "step": 35105 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011611532287868128, "loss": 0.9866, "step": 35110 }, { "epoch": 0.5, "grad_norm": 0.640625, "learning_rate": 0.00011609061341046523, "loss": 0.9598, "step": 35115 }, { "epoch": 0.5, "grad_norm": 0.5390625, "learning_rate": 0.00011606590293367121, "loss": 0.94, "step": 35120 }, { "epoch": 0.5, "grad_norm": 0.546875, "learning_rate": 0.0001160411914498481, "loss": 0.9376, "step": 35125 }, { "epoch": 0.5, "grad_norm": 0.64453125, "learning_rate": 0.00011601647896054486, "loss": 1.0, "step": 35130 }, { "epoch": 0.5, "grad_norm": 0.5546875, "learning_rate": 0.00011599176546731045, "loss": 0.9325, "step": 35135 }, { "epoch": 0.5, "grad_norm": 0.6015625, "learning_rate": 0.000115967050971694, "loss": 1.0462, "step": 35140 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 0.00011594233547524458, "loss": 1.0102, "step": 35145 }, { "epoch": 0.5, "grad_norm": 0.60546875, "learning_rate": 0.00011591761897951141, "loss": 0.9795, "step": 35150 }, { "epoch": 0.5, "grad_norm": 0.67578125, "learning_rate": 0.0001158929014860438, "loss": 0.9715, "step": 35155 }, { "epoch": 0.5, "grad_norm": 0.609375, "learning_rate": 0.00011586818299639097, "loss": 0.9373, "step": 35160 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.00011584346351210238, "loss": 0.8551, "step": 35165 }, { "epoch": 0.5, "grad_norm": 0.5703125, "learning_rate": 0.00011581874303472746, "loss": 1.0983, "step": 35170 }, { "epoch": 0.5, "grad_norm": 0.57421875, "learning_rate": 0.00011579402156581568, "loss": 0.9248, "step": 35175 }, { "epoch": 0.5, "grad_norm": 0.51171875, "learning_rate": 0.0001157692991069167, "loss": 0.7816, "step": 35180 }, { "epoch": 0.5, "grad_norm": 0.625, "learning_rate": 0.00011574457565958003, "loss": 0.9678, "step": 35185 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.00011571985122535547, "loss": 1.0563, "step": 35190 }, { "epoch": 0.5, "grad_norm": 0.58984375, "learning_rate": 0.00011569512580579271, "loss": 0.9766, "step": 35195 }, { "epoch": 0.5, "grad_norm": 0.54296875, "learning_rate": 0.0001156703994024416, "loss": 1.0294, "step": 35200 }, { "epoch": 0.5, "grad_norm": 0.47265625, "learning_rate": 0.00011564567201685202, "loss": 0.908, "step": 35205 }, { "epoch": 0.51, "grad_norm": 0.6171875, "learning_rate": 0.00011562094365057388, "loss": 0.9647, "step": 35210 }, { "epoch": 0.51, "grad_norm": 0.54296875, "learning_rate": 0.00011559621430515717, "loss": 0.7352, "step": 35215 }, { "epoch": 0.51, "grad_norm": 0.59375, "learning_rate": 0.00011557148398215203, "loss": 0.8482, "step": 35220 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.00011554675268310853, "loss": 0.8756, "step": 35225 }, { "epoch": 0.51, "grad_norm": 0.59375, "learning_rate": 0.00011552202040957684, "loss": 0.926, "step": 35230 }, { "epoch": 0.51, "grad_norm": 0.5625, "learning_rate": 0.00011549728716310728, "loss": 0.7878, "step": 35235 }, { "epoch": 0.51, "grad_norm": 0.57421875, "learning_rate": 0.00011547255294525006, "loss": 1.1045, "step": 35240 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011544781775755565, "loss": 0.9171, "step": 35245 }, { "epoch": 0.51, "grad_norm": 0.58984375, "learning_rate": 0.0001154230816015744, "loss": 0.8814, "step": 35250 }, { "epoch": 0.51, "grad_norm": 0.5625, "learning_rate": 0.0001153983444788568, "loss": 0.7096, "step": 35255 }, { "epoch": 0.51, "grad_norm": 0.9296875, "learning_rate": 0.00011537360639095349, "loss": 1.052, "step": 35260 }, { "epoch": 0.51, "grad_norm": 0.5859375, "learning_rate": 0.00011534886733941502, "loss": 0.8847, "step": 35265 }, { "epoch": 0.51, "grad_norm": 0.5859375, "learning_rate": 0.00011532412732579201, "loss": 0.9098, "step": 35270 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011529938635163529, "loss": 0.9207, "step": 35275 }, { "epoch": 0.51, "grad_norm": 0.56640625, "learning_rate": 0.0001152746444184956, "loss": 0.8311, "step": 35280 }, { "epoch": 0.51, "grad_norm": 0.56640625, "learning_rate": 0.00011524990152792381, "loss": 0.9728, "step": 35285 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011522515768147082, "loss": 0.846, "step": 35290 }, { "epoch": 0.51, "grad_norm": 0.640625, "learning_rate": 0.00011520041288068757, "loss": 0.919, "step": 35295 }, { "epoch": 0.51, "grad_norm": 0.55078125, "learning_rate": 0.00011517566712712516, "loss": 1.0759, "step": 35300 }, { "epoch": 0.51, "grad_norm": 0.60546875, "learning_rate": 0.00011515092042233466, "loss": 0.9652, "step": 35305 }, { "epoch": 0.51, "grad_norm": 0.625, "learning_rate": 0.00011512617276786719, "loss": 1.0368, "step": 35310 }, { "epoch": 0.51, "grad_norm": 0.59765625, "learning_rate": 0.00011510142416527401, "loss": 1.1348, "step": 35315 }, { "epoch": 0.51, "grad_norm": 0.64453125, "learning_rate": 0.00011507667461610637, "loss": 0.8869, "step": 35320 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.00011505192412191554, "loss": 0.9564, "step": 35325 }, { "epoch": 0.51, "grad_norm": 0.62109375, "learning_rate": 0.000115027172684253, "loss": 1.0288, "step": 35330 }, { "epoch": 0.51, "grad_norm": 0.56640625, "learning_rate": 0.00011500242030467017, "loss": 0.8114, "step": 35335 }, { "epoch": 0.51, "grad_norm": 0.578125, "learning_rate": 0.00011497766698471852, "loss": 1.0091, "step": 35340 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011495291272594968, "loss": 0.9346, "step": 35345 }, { "epoch": 0.51, "grad_norm": 0.578125, "learning_rate": 0.00011492815752991521, "loss": 1.0135, "step": 35350 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.00011490340139816684, "loss": 0.8858, "step": 35355 }, { "epoch": 0.51, "grad_norm": 0.6875, "learning_rate": 0.0001148786443322563, "loss": 0.9391, "step": 35360 }, { "epoch": 0.51, "grad_norm": 0.6015625, "learning_rate": 0.00011485388633373538, "loss": 0.9083, "step": 35365 }, { "epoch": 0.51, "grad_norm": 0.5, "learning_rate": 0.00011482912740415595, "loss": 0.86, "step": 35370 }, { "epoch": 0.51, "grad_norm": 0.58984375, "learning_rate": 0.00011480436754506993, "loss": 0.9986, "step": 35375 }, { "epoch": 0.51, "grad_norm": 0.53125, "learning_rate": 0.00011477960675802926, "loss": 0.8895, "step": 35380 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.00011475484504458604, "loss": 1.0366, "step": 35385 }, { "epoch": 0.51, "grad_norm": 0.498046875, "learning_rate": 0.00011473008240629233, "loss": 0.8077, "step": 35390 }, { "epoch": 0.51, "grad_norm": 0.51171875, "learning_rate": 0.00011470531884470026, "loss": 0.9371, "step": 35395 }, { "epoch": 0.51, "grad_norm": 0.474609375, "learning_rate": 0.00011468055436136209, "loss": 0.892, "step": 35400 }, { "epoch": 0.51, "grad_norm": 0.71484375, "learning_rate": 0.00011465578895783001, "loss": 0.9811, "step": 35405 }, { "epoch": 0.51, "grad_norm": 0.57421875, "learning_rate": 0.00011463102263565639, "loss": 0.9964, "step": 35410 }, { "epoch": 0.51, "grad_norm": 0.60546875, "learning_rate": 0.00011460625539639367, "loss": 0.9848, "step": 35415 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.00011458148724159414, "loss": 0.9357, "step": 35420 }, { "epoch": 0.51, "grad_norm": 0.50390625, "learning_rate": 0.00011455671817281044, "loss": 1.0116, "step": 35425 }, { "epoch": 0.51, "grad_norm": 0.640625, "learning_rate": 0.00011453194819159506, "loss": 0.9715, "step": 35430 }, { "epoch": 0.51, "grad_norm": 0.59375, "learning_rate": 0.00011450717729950058, "loss": 0.901, "step": 35435 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.00011448240549807974, "loss": 0.9623, "step": 35440 }, { "epoch": 0.51, "grad_norm": 0.5078125, "learning_rate": 0.00011445763278888522, "loss": 0.9551, "step": 35445 }, { "epoch": 0.51, "grad_norm": 0.6328125, "learning_rate": 0.00011443285917346981, "loss": 1.0564, "step": 35450 }, { "epoch": 0.51, "grad_norm": 0.490234375, "learning_rate": 0.00011440808465338634, "loss": 0.9243, "step": 35455 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.00011438330923018771, "loss": 0.8432, "step": 35460 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.0001143585329054269, "loss": 0.925, "step": 35465 }, { "epoch": 0.51, "grad_norm": 0.5625, "learning_rate": 0.00011433375568065692, "loss": 0.9936, "step": 35470 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011430897755743075, "loss": 0.8261, "step": 35475 }, { "epoch": 0.51, "grad_norm": 0.6640625, "learning_rate": 0.0001142841985373016, "loss": 1.0874, "step": 35480 }, { "epoch": 0.51, "grad_norm": 0.53125, "learning_rate": 0.00011425941862182261, "loss": 0.8031, "step": 35485 }, { "epoch": 0.51, "grad_norm": 0.53125, "learning_rate": 0.00011423463781254702, "loss": 0.9591, "step": 35490 }, { "epoch": 0.51, "grad_norm": 0.61328125, "learning_rate": 0.00011420985611102814, "loss": 0.8532, "step": 35495 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011418507351881927, "loss": 0.8923, "step": 35500 }, { "epoch": 0.51, "grad_norm": 0.7109375, "learning_rate": 0.00011416029003747383, "loss": 1.0846, "step": 35505 }, { "epoch": 0.51, "grad_norm": 0.625, "learning_rate": 0.00011413550566854532, "loss": 0.9509, "step": 35510 }, { "epoch": 0.51, "grad_norm": 0.66015625, "learning_rate": 0.00011411072041358717, "loss": 0.8973, "step": 35515 }, { "epoch": 0.51, "grad_norm": 0.5078125, "learning_rate": 0.00011408593427415304, "loss": 1.0902, "step": 35520 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011406114725179647, "loss": 0.8178, "step": 35525 }, { "epoch": 0.51, "grad_norm": 0.52734375, "learning_rate": 0.0001140363593480712, "loss": 1.0575, "step": 35530 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011401157056453091, "loss": 0.9071, "step": 35535 }, { "epoch": 0.51, "grad_norm": 0.66015625, "learning_rate": 0.00011398678090272945, "loss": 1.016, "step": 35540 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011396199036422062, "loss": 0.9777, "step": 35545 }, { "epoch": 0.51, "grad_norm": 0.609375, "learning_rate": 0.00011393719895055834, "loss": 0.8702, "step": 35550 }, { "epoch": 0.51, "grad_norm": 0.60546875, "learning_rate": 0.00011391240666329655, "loss": 0.83, "step": 35555 }, { "epoch": 0.51, "grad_norm": 0.51953125, "learning_rate": 0.00011388761350398927, "loss": 1.0824, "step": 35560 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.00011386281947419055, "loss": 0.9833, "step": 35565 }, { "epoch": 0.51, "grad_norm": 0.53125, "learning_rate": 0.00011383802457545452, "loss": 1.0281, "step": 35570 }, { "epoch": 0.51, "grad_norm": 0.828125, "learning_rate": 0.00011381322880933536, "loss": 0.8782, "step": 35575 }, { "epoch": 0.51, "grad_norm": 0.466796875, "learning_rate": 0.00011378843217738726, "loss": 1.0211, "step": 35580 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011376363468116456, "loss": 0.956, "step": 35585 }, { "epoch": 0.51, "grad_norm": 0.546875, "learning_rate": 0.00011373883632222156, "loss": 0.8512, "step": 35590 }, { "epoch": 0.51, "grad_norm": 0.54296875, "learning_rate": 0.00011371403710211262, "loss": 0.9951, "step": 35595 }, { "epoch": 0.51, "grad_norm": 0.50390625, "learning_rate": 0.00011368923702239225, "loss": 0.9283, "step": 35600 }, { "epoch": 0.51, "grad_norm": 0.5078125, "learning_rate": 0.0001136644360846149, "loss": 0.8853, "step": 35605 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.00011363963429033513, "loss": 0.865, "step": 35610 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011361483164110756, "loss": 0.962, "step": 35615 }, { "epoch": 0.51, "grad_norm": 0.515625, "learning_rate": 0.00011359002813848682, "loss": 0.9415, "step": 35620 }, { "epoch": 0.51, "grad_norm": 0.6171875, "learning_rate": 0.00011356522378402765, "loss": 1.0479, "step": 35625 }, { "epoch": 0.51, "grad_norm": 0.51953125, "learning_rate": 0.00011354041857928481, "loss": 0.9286, "step": 35630 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011351561252581308, "loss": 0.978, "step": 35635 }, { "epoch": 0.51, "grad_norm": 0.50390625, "learning_rate": 0.00011349080562516738, "loss": 0.9409, "step": 35640 }, { "epoch": 0.51, "grad_norm": 0.5625, "learning_rate": 0.00011346599787890264, "loss": 0.8311, "step": 35645 }, { "epoch": 0.51, "grad_norm": 0.58203125, "learning_rate": 0.00011344118928857379, "loss": 0.9399, "step": 35650 }, { "epoch": 0.51, "grad_norm": 0.5859375, "learning_rate": 0.00011341637985573592, "loss": 0.987, "step": 35655 }, { "epoch": 0.51, "grad_norm": 0.484375, "learning_rate": 0.00011339156958194405, "loss": 1.0669, "step": 35660 }, { "epoch": 0.51, "grad_norm": 0.65234375, "learning_rate": 0.00011336675846875335, "loss": 1.0025, "step": 35665 }, { "epoch": 0.51, "grad_norm": 0.57421875, "learning_rate": 0.000113341946517719, "loss": 0.9605, "step": 35670 }, { "epoch": 0.51, "grad_norm": 0.494140625, "learning_rate": 0.00011331713373039628, "loss": 0.9151, "step": 35675 }, { "epoch": 0.51, "grad_norm": 0.5859375, "learning_rate": 0.00011329232010834043, "loss": 0.8997, "step": 35680 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.00011326750565310681, "loss": 0.9927, "step": 35685 }, { "epoch": 0.51, "grad_norm": 0.56640625, "learning_rate": 0.00011324269036625082, "loss": 1.0114, "step": 35690 }, { "epoch": 0.51, "grad_norm": 0.52734375, "learning_rate": 0.00011321787424932793, "loss": 0.9672, "step": 35695 }, { "epoch": 0.51, "grad_norm": 0.55078125, "learning_rate": 0.00011319305730389363, "loss": 0.9736, "step": 35700 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011316823953150349, "loss": 0.8902, "step": 35705 }, { "epoch": 0.51, "grad_norm": 0.546875, "learning_rate": 0.00011314342093371307, "loss": 0.8646, "step": 35710 }, { "epoch": 0.51, "grad_norm": 0.51171875, "learning_rate": 0.00011311860151207807, "loss": 0.9136, "step": 35715 }, { "epoch": 0.51, "grad_norm": 0.54296875, "learning_rate": 0.00011309378126815416, "loss": 1.0051, "step": 35720 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011306896020349715, "loss": 0.8496, "step": 35725 }, { "epoch": 0.51, "grad_norm": 0.64453125, "learning_rate": 0.00011304413831966282, "loss": 0.9563, "step": 35730 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011301931561820706, "loss": 1.019, "step": 35735 }, { "epoch": 0.51, "grad_norm": 0.52734375, "learning_rate": 0.00011299449210068577, "loss": 1.0119, "step": 35740 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.0001129696677686549, "loss": 0.8935, "step": 35745 }, { "epoch": 0.51, "grad_norm": 0.49609375, "learning_rate": 0.00011294484262367049, "loss": 1.0451, "step": 35750 }, { "epoch": 0.51, "grad_norm": 0.55078125, "learning_rate": 0.0001129200166672886, "loss": 0.9686, "step": 35755 }, { "epoch": 0.51, "grad_norm": 0.515625, "learning_rate": 0.00011289518990106535, "loss": 0.7722, "step": 35760 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011287036232655692, "loss": 0.9714, "step": 35765 }, { "epoch": 0.51, "grad_norm": 0.546875, "learning_rate": 0.00011284553394531951, "loss": 1.0167, "step": 35770 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.0001128207047589094, "loss": 0.8774, "step": 35775 }, { "epoch": 0.51, "grad_norm": 0.63671875, "learning_rate": 0.00011279587476888292, "loss": 1.0096, "step": 35780 }, { "epoch": 0.51, "grad_norm": 0.609375, "learning_rate": 0.00011277104397679646, "loss": 1.0358, "step": 35785 }, { "epoch": 0.51, "grad_norm": 0.49609375, "learning_rate": 0.00011274621238420637, "loss": 0.9213, "step": 35790 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.0001127213799926692, "loss": 0.9913, "step": 35795 }, { "epoch": 0.51, "grad_norm": 0.625, "learning_rate": 0.00011269654680374144, "loss": 0.8914, "step": 35800 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011267171281897966, "loss": 0.8998, "step": 35805 }, { "epoch": 0.51, "grad_norm": 0.5625, "learning_rate": 0.00011264687803994052, "loss": 0.8822, "step": 35810 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011262204246818062, "loss": 0.8956, "step": 35815 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.00011259720610525674, "loss": 0.895, "step": 35820 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.00011257236895272565, "loss": 0.9545, "step": 35825 }, { "epoch": 0.51, "grad_norm": 0.58203125, "learning_rate": 0.00011254753101214412, "loss": 1.0074, "step": 35830 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011252269228506909, "loss": 0.8765, "step": 35835 }, { "epoch": 0.51, "grad_norm": 0.7890625, "learning_rate": 0.00011249785277305743, "loss": 0.9155, "step": 35840 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011247301247766611, "loss": 0.9397, "step": 35845 }, { "epoch": 0.51, "grad_norm": 0.55859375, "learning_rate": 0.00011244817140045219, "loss": 1.038, "step": 35850 }, { "epoch": 0.51, "grad_norm": 0.55859375, "learning_rate": 0.00011242332954297268, "loss": 0.9239, "step": 35855 }, { "epoch": 0.51, "grad_norm": 0.546875, "learning_rate": 0.00011239848690678474, "loss": 0.9346, "step": 35860 }, { "epoch": 0.51, "grad_norm": 0.51171875, "learning_rate": 0.00011237364349344553, "loss": 1.0483, "step": 35865 }, { "epoch": 0.51, "grad_norm": 0.625, "learning_rate": 0.00011234879930451223, "loss": 0.8625, "step": 35870 }, { "epoch": 0.51, "grad_norm": 0.484375, "learning_rate": 0.00011232395434154215, "loss": 0.8454, "step": 35875 }, { "epoch": 0.51, "grad_norm": 0.5546875, "learning_rate": 0.00011229910860609256, "loss": 0.85, "step": 35880 }, { "epoch": 0.51, "grad_norm": 0.515625, "learning_rate": 0.00011227426209972082, "loss": 0.9926, "step": 35885 }, { "epoch": 0.51, "grad_norm": 0.671875, "learning_rate": 0.00011224941482398441, "loss": 1.1048, "step": 35890 }, { "epoch": 0.51, "grad_norm": 0.55078125, "learning_rate": 0.00011222456678044067, "loss": 0.8748, "step": 35895 }, { "epoch": 0.51, "grad_norm": 0.57421875, "learning_rate": 0.00011219971797064718, "loss": 1.0172, "step": 35900 }, { "epoch": 0.52, "grad_norm": 0.578125, "learning_rate": 0.0001121748683961615, "loss": 1.0032, "step": 35905 }, { "epoch": 0.52, "grad_norm": 0.478515625, "learning_rate": 0.00011215001805854116, "loss": 0.9065, "step": 35910 }, { "epoch": 0.52, "grad_norm": 0.55078125, "learning_rate": 0.00011212516695934388, "loss": 0.9394, "step": 35915 }, { "epoch": 0.52, "grad_norm": 0.515625, "learning_rate": 0.0001121003151001273, "loss": 0.9367, "step": 35920 }, { "epoch": 0.52, "grad_norm": 0.59765625, "learning_rate": 0.0001120754624824492, "loss": 0.9802, "step": 35925 }, { "epoch": 0.52, "grad_norm": 0.6640625, "learning_rate": 0.00011205060910786737, "loss": 0.8837, "step": 35930 }, { "epoch": 0.52, "grad_norm": 0.5625, "learning_rate": 0.00011202575497793961, "loss": 1.0204, "step": 35935 }, { "epoch": 0.52, "grad_norm": 0.546875, "learning_rate": 0.00011200090009422388, "loss": 0.9781, "step": 35940 }, { "epoch": 0.52, "grad_norm": 0.55859375, "learning_rate": 0.00011197604445827803, "loss": 0.9268, "step": 35945 }, { "epoch": 0.52, "grad_norm": 0.6171875, "learning_rate": 0.00011195118807166008, "loss": 0.7944, "step": 35950 }, { "epoch": 0.52, "grad_norm": 0.58984375, "learning_rate": 0.00011192633093592803, "loss": 0.8921, "step": 35955 }, { "epoch": 0.52, "grad_norm": 0.50390625, "learning_rate": 0.00011190147305264, "loss": 1.0664, "step": 35960 }, { "epoch": 0.52, "grad_norm": 0.59765625, "learning_rate": 0.00011187661442335407, "loss": 0.9301, "step": 35965 }, { "epoch": 0.52, "grad_norm": 0.53125, "learning_rate": 0.00011185175504962846, "loss": 0.833, "step": 35970 }, { "epoch": 0.52, "grad_norm": 0.53125, "learning_rate": 0.00011182689493302128, "loss": 0.8505, "step": 35975 }, { "epoch": 0.52, "grad_norm": 0.55859375, "learning_rate": 0.0001118020340750909, "loss": 0.9573, "step": 35980 }, { "epoch": 0.52, "grad_norm": 0.60546875, "learning_rate": 0.00011177717247739559, "loss": 0.9987, "step": 35985 }, { "epoch": 0.52, "grad_norm": 0.5078125, "learning_rate": 0.00011175231014149366, "loss": 0.8967, "step": 35990 }, { "epoch": 0.52, "grad_norm": 0.609375, "learning_rate": 0.00011172744706894357, "loss": 0.9154, "step": 35995 }, { "epoch": 0.52, "grad_norm": 0.515625, "learning_rate": 0.00011170258326130374, "loss": 0.8346, "step": 36000 }, { "epoch": 0.52, "grad_norm": 0.6328125, "learning_rate": 0.00011167771872013264, "loss": 1.0448, "step": 36005 }, { "epoch": 0.52, "grad_norm": 0.62109375, "learning_rate": 0.00011165285344698886, "loss": 0.912, "step": 36010 }, { "epoch": 0.52, "grad_norm": 0.56640625, "learning_rate": 0.00011162798744343094, "loss": 0.8778, "step": 36015 }, { "epoch": 0.52, "grad_norm": 0.54296875, "learning_rate": 0.00011160312071101755, "loss": 0.9186, "step": 36020 }, { "epoch": 0.52, "grad_norm": 0.5234375, "learning_rate": 0.0001115782532513073, "loss": 0.8453, "step": 36025 }, { "epoch": 0.52, "grad_norm": 0.58203125, "learning_rate": 0.00011155338506585895, "loss": 1.0438, "step": 36030 }, { "epoch": 0.52, "grad_norm": 0.578125, "learning_rate": 0.0001115285161562313, "loss": 1.0465, "step": 36035 }, { "epoch": 0.52, "grad_norm": 0.60546875, "learning_rate": 0.0001115036465239831, "loss": 0.6984, "step": 36040 }, { "epoch": 0.52, "grad_norm": 0.486328125, "learning_rate": 0.00011147877617067326, "loss": 0.9824, "step": 36045 }, { "epoch": 0.52, "grad_norm": 0.55859375, "learning_rate": 0.00011145390509786065, "loss": 1.0159, "step": 36050 }, { "epoch": 0.52, "grad_norm": 0.546875, "learning_rate": 0.00011142903330710422, "loss": 0.8533, "step": 36055 }, { "epoch": 0.52, "grad_norm": 0.470703125, "learning_rate": 0.00011140416079996294, "loss": 0.8495, "step": 36060 }, { "epoch": 0.52, "grad_norm": 0.6171875, "learning_rate": 0.0001113792875779959, "loss": 0.9059, "step": 36065 }, { "epoch": 0.52, "grad_norm": 0.95703125, "learning_rate": 0.00011135441364276214, "loss": 0.9416, "step": 36070 }, { "epoch": 0.52, "grad_norm": 0.5390625, "learning_rate": 0.00011132953899582081, "loss": 1.1856, "step": 36075 }, { "epoch": 0.52, "grad_norm": 0.5703125, "learning_rate": 0.00011130466363873109, "loss": 0.9599, "step": 36080 }, { "epoch": 0.52, "grad_norm": 0.58203125, "learning_rate": 0.00011127978757305213, "loss": 0.8942, "step": 36085 }, { "epoch": 0.52, "grad_norm": 0.5078125, "learning_rate": 0.00011125491080034327, "loss": 0.9854, "step": 36090 }, { "epoch": 0.52, "grad_norm": 0.6328125, "learning_rate": 0.0001112300333221638, "loss": 0.9776, "step": 36095 }, { "epoch": 0.52, "grad_norm": 0.5390625, "learning_rate": 0.00011120515514007302, "loss": 0.9301, "step": 36100 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00011118027625563037, "loss": 1.0208, "step": 36105 }, { "epoch": 0.52, "grad_norm": 0.58203125, "learning_rate": 0.00011115539667039528, "loss": 0.8565, "step": 36110 }, { "epoch": 0.52, "grad_norm": 0.578125, "learning_rate": 0.00011113051638592717, "loss": 0.9049, "step": 36115 }, { "epoch": 0.52, "grad_norm": 0.5546875, "learning_rate": 0.00011110563540378567, "loss": 0.9627, "step": 36120 }, { "epoch": 0.52, "grad_norm": 0.57421875, "learning_rate": 0.00011108075372553029, "loss": 0.9632, "step": 36125 }, { "epoch": 0.52, "grad_norm": 0.5625, "learning_rate": 0.00011105587135272064, "loss": 0.9762, "step": 36130 }, { "epoch": 0.52, "grad_norm": 0.494140625, "learning_rate": 0.00011103098828691639, "loss": 0.9035, "step": 36135 }, { "epoch": 0.52, "grad_norm": 0.59375, "learning_rate": 0.00011100610452967719, "loss": 0.9729, "step": 36140 }, { "epoch": 0.52, "grad_norm": 0.66015625, "learning_rate": 0.00011098122008256286, "loss": 0.9325, "step": 36145 }, { "epoch": 0.52, "grad_norm": 0.52734375, "learning_rate": 0.00011095633494713315, "loss": 0.9717, "step": 36150 }, { "epoch": 0.52, "grad_norm": 0.65625, "learning_rate": 0.00011093144912494788, "loss": 1.0258, "step": 36155 }, { "epoch": 0.52, "grad_norm": 0.546875, "learning_rate": 0.00011090656261756694, "loss": 0.946, "step": 36160 }, { "epoch": 0.52, "grad_norm": 0.56640625, "learning_rate": 0.00011088167542655022, "loss": 0.8508, "step": 36165 }, { "epoch": 0.52, "grad_norm": 0.609375, "learning_rate": 0.00011085678755345768, "loss": 0.9069, "step": 36170 }, { "epoch": 0.52, "grad_norm": 0.55859375, "learning_rate": 0.00011083189899984933, "loss": 0.8887, "step": 36175 }, { "epoch": 0.52, "grad_norm": 0.50390625, "learning_rate": 0.00011080700976728525, "loss": 1.1359, "step": 36180 }, { "epoch": 0.52, "grad_norm": 0.58203125, "learning_rate": 0.00011078211985732545, "loss": 0.9425, "step": 36185 }, { "epoch": 0.52, "grad_norm": 0.6015625, "learning_rate": 0.00011075722927153012, "loss": 0.8805, "step": 36190 }, { "epoch": 0.52, "grad_norm": 0.61328125, "learning_rate": 0.0001107323380114594, "loss": 1.0638, "step": 36195 }, { "epoch": 0.52, "grad_norm": 0.55078125, "learning_rate": 0.00011070744607867352, "loss": 0.8841, "step": 36200 }, { "epoch": 0.52, "grad_norm": 0.482421875, "learning_rate": 0.00011068255347473271, "loss": 0.8163, "step": 36205 }, { "epoch": 0.52, "grad_norm": 0.486328125, "learning_rate": 0.0001106576602011973, "loss": 0.9477, "step": 36210 }, { "epoch": 0.52, "grad_norm": 0.6796875, "learning_rate": 0.0001106327662596276, "loss": 0.9684, "step": 36215 }, { "epoch": 0.52, "grad_norm": 0.55859375, "learning_rate": 0.000110607871651584, "loss": 0.9523, "step": 36220 }, { "epoch": 0.52, "grad_norm": 0.5078125, "learning_rate": 0.00011058297637862691, "loss": 0.8194, "step": 36225 }, { "epoch": 0.52, "grad_norm": 0.6015625, "learning_rate": 0.00011055808044231686, "loss": 1.1832, "step": 36230 }, { "epoch": 0.52, "grad_norm": 0.51171875, "learning_rate": 0.00011053318384421423, "loss": 1.05, "step": 36235 }, { "epoch": 0.52, "grad_norm": 0.57421875, "learning_rate": 0.00011050828658587968, "loss": 0.959, "step": 36240 }, { "epoch": 0.52, "grad_norm": 0.6328125, "learning_rate": 0.00011048338866887376, "loss": 0.8375, "step": 36245 }, { "epoch": 0.52, "grad_norm": 0.6953125, "learning_rate": 0.00011045849009475709, "loss": 1.179, "step": 36250 }, { "epoch": 0.52, "grad_norm": 0.5625, "learning_rate": 0.00011043359086509038, "loss": 0.9546, "step": 36255 }, { "epoch": 0.52, "grad_norm": 0.55859375, "learning_rate": 0.00011040869098143425, "loss": 0.9842, "step": 36260 }, { "epoch": 0.52, "grad_norm": 0.6015625, "learning_rate": 0.00011038379044534957, "loss": 0.865, "step": 36265 }, { "epoch": 0.52, "grad_norm": 0.5390625, "learning_rate": 0.00011035888925839705, "loss": 0.958, "step": 36270 }, { "epoch": 0.52, "grad_norm": 0.5703125, "learning_rate": 0.00011033398742213755, "loss": 0.9291, "step": 36275 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00011030908493813197, "loss": 0.9853, "step": 36280 }, { "epoch": 0.52, "grad_norm": 0.6015625, "learning_rate": 0.00011028418180794122, "loss": 0.982, "step": 36285 }, { "epoch": 0.52, "grad_norm": 0.5546875, "learning_rate": 0.00011025927803312619, "loss": 0.9879, "step": 36290 }, { "epoch": 0.52, "grad_norm": 0.6796875, "learning_rate": 0.00011023437361524795, "loss": 0.9501, "step": 36295 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00011020946855586753, "loss": 1.0892, "step": 36300 }, { "epoch": 0.52, "grad_norm": 0.5078125, "learning_rate": 0.00011018456285654597, "loss": 0.9984, "step": 36305 }, { "epoch": 0.52, "grad_norm": 0.61328125, "learning_rate": 0.00011015965651884446, "loss": 1.0251, "step": 36310 }, { "epoch": 0.52, "grad_norm": 0.58984375, "learning_rate": 0.00011013474954432405, "loss": 0.9086, "step": 36315 }, { "epoch": 0.52, "grad_norm": 0.546875, "learning_rate": 0.00011010984193454603, "loss": 0.9331, "step": 36320 }, { "epoch": 0.52, "grad_norm": 0.515625, "learning_rate": 0.0001100849336910716, "loss": 0.7747, "step": 36325 }, { "epoch": 0.52, "grad_norm": 0.6328125, "learning_rate": 0.00011006002481546201, "loss": 1.2171, "step": 36330 }, { "epoch": 0.52, "grad_norm": 0.48046875, "learning_rate": 0.00011003511530927866, "loss": 1.0336, "step": 36335 }, { "epoch": 0.52, "grad_norm": 0.515625, "learning_rate": 0.00011001020517408283, "loss": 0.9015, "step": 36340 }, { "epoch": 0.52, "grad_norm": 0.5625, "learning_rate": 0.00010998529441143593, "loss": 0.9482, "step": 36345 }, { "epoch": 0.52, "grad_norm": 0.56640625, "learning_rate": 0.00010996038302289939, "loss": 0.9363, "step": 36350 }, { "epoch": 0.52, "grad_norm": 0.64453125, "learning_rate": 0.00010993547101003474, "loss": 1.0001, "step": 36355 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00010991055837440343, "loss": 1.002, "step": 36360 }, { "epoch": 0.52, "grad_norm": 0.515625, "learning_rate": 0.00010988564511756703, "loss": 0.8675, "step": 36365 }, { "epoch": 0.52, "grad_norm": 0.57421875, "learning_rate": 0.00010986073124108711, "loss": 1.0402, "step": 36370 }, { "epoch": 0.52, "grad_norm": 0.5234375, "learning_rate": 0.00010983581674652536, "loss": 0.952, "step": 36375 }, { "epoch": 0.52, "grad_norm": 0.58203125, "learning_rate": 0.00010981090163544341, "loss": 0.9668, "step": 36380 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00010978598590940294, "loss": 0.9341, "step": 36385 }, { "epoch": 0.52, "grad_norm": 0.640625, "learning_rate": 0.00010976106956996579, "loss": 0.7929, "step": 36390 }, { "epoch": 0.52, "grad_norm": 0.578125, "learning_rate": 0.00010973615261869365, "loss": 0.9433, "step": 36395 }, { "epoch": 0.52, "grad_norm": 0.578125, "learning_rate": 0.00010971123505714835, "loss": 0.843, "step": 36400 }, { "epoch": 0.52, "grad_norm": 0.6484375, "learning_rate": 0.00010968631688689181, "loss": 1.108, "step": 36405 }, { "epoch": 0.52, "grad_norm": 0.52734375, "learning_rate": 0.00010966139810948591, "loss": 0.8542, "step": 36410 }, { "epoch": 0.52, "grad_norm": 0.54296875, "learning_rate": 0.00010963647872649255, "loss": 1.1927, "step": 36415 }, { "epoch": 0.52, "grad_norm": 0.56640625, "learning_rate": 0.00010961155873947373, "loss": 0.9521, "step": 36420 }, { "epoch": 0.52, "grad_norm": 0.5078125, "learning_rate": 0.00010958663814999145, "loss": 0.8526, "step": 36425 }, { "epoch": 0.52, "grad_norm": 0.60546875, "learning_rate": 0.00010956171695960778, "loss": 0.9811, "step": 36430 }, { "epoch": 0.52, "grad_norm": 0.50390625, "learning_rate": 0.0001095367951698848, "loss": 0.8424, "step": 36435 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00010951187278238464, "loss": 0.8957, "step": 36440 }, { "epoch": 0.52, "grad_norm": 0.62109375, "learning_rate": 0.00010948694979866947, "loss": 0.9119, "step": 36445 }, { "epoch": 0.52, "grad_norm": 0.5625, "learning_rate": 0.00010946202622030149, "loss": 0.9794, "step": 36450 }, { "epoch": 0.52, "grad_norm": 0.52734375, "learning_rate": 0.00010943710204884288, "loss": 0.8748, "step": 36455 }, { "epoch": 0.52, "grad_norm": 0.61328125, "learning_rate": 0.000109412177285856, "loss": 0.9861, "step": 36460 }, { "epoch": 0.52, "grad_norm": 0.75390625, "learning_rate": 0.00010938725193290313, "loss": 1.0779, "step": 36465 }, { "epoch": 0.52, "grad_norm": 0.462890625, "learning_rate": 0.00010936232599154664, "loss": 0.8525, "step": 36470 }, { "epoch": 0.52, "grad_norm": 0.58203125, "learning_rate": 0.00010933739946334886, "loss": 0.953, "step": 36475 }, { "epoch": 0.52, "grad_norm": 0.470703125, "learning_rate": 0.00010931247234987223, "loss": 1.0332, "step": 36480 }, { "epoch": 0.52, "grad_norm": 0.5390625, "learning_rate": 0.00010928754465267925, "loss": 0.9057, "step": 36485 }, { "epoch": 0.52, "grad_norm": 0.5390625, "learning_rate": 0.00010926261637333242, "loss": 1.0217, "step": 36490 }, { "epoch": 0.52, "grad_norm": 0.5703125, "learning_rate": 0.00010923768751339422, "loss": 0.9859, "step": 36495 }, { "epoch": 0.52, "grad_norm": 0.5546875, "learning_rate": 0.00010921275807442726, "loss": 0.9719, "step": 36500 }, { "epoch": 0.52, "grad_norm": 0.6015625, "learning_rate": 0.00010918782805799412, "loss": 0.9264, "step": 36505 }, { "epoch": 0.52, "grad_norm": 0.5703125, "learning_rate": 0.00010916289746565742, "loss": 0.9057, "step": 36510 }, { "epoch": 0.52, "grad_norm": 0.49609375, "learning_rate": 0.00010913796629897993, "loss": 0.9249, "step": 36515 }, { "epoch": 0.52, "grad_norm": 0.625, "learning_rate": 0.00010911303455952427, "loss": 0.9619, "step": 36520 }, { "epoch": 0.52, "grad_norm": 0.484375, "learning_rate": 0.00010908810224885325, "loss": 0.9483, "step": 36525 }, { "epoch": 0.52, "grad_norm": 0.5625, "learning_rate": 0.0001090631693685296, "loss": 1.0105, "step": 36530 }, { "epoch": 0.52, "grad_norm": 0.5546875, "learning_rate": 0.00010903823592011617, "loss": 1.0082, "step": 36535 }, { "epoch": 0.52, "grad_norm": 0.59765625, "learning_rate": 0.00010901330190517585, "loss": 0.9012, "step": 36540 }, { "epoch": 0.52, "grad_norm": 0.54296875, "learning_rate": 0.00010898836732527148, "loss": 0.8343, "step": 36545 }, { "epoch": 0.52, "grad_norm": 0.55859375, "learning_rate": 0.00010896343218196603, "loss": 0.8575, "step": 36550 }, { "epoch": 0.52, "grad_norm": 0.52734375, "learning_rate": 0.00010893849647682244, "loss": 0.9455, "step": 36555 }, { "epoch": 0.52, "grad_norm": 0.63671875, "learning_rate": 0.0001089135602114037, "loss": 0.9434, "step": 36560 }, { "epoch": 0.52, "grad_norm": 0.5390625, "learning_rate": 0.00010888862338727284, "loss": 0.8242, "step": 36565 }, { "epoch": 0.52, "grad_norm": 0.515625, "learning_rate": 0.00010886368600599297, "loss": 0.8579, "step": 36570 }, { "epoch": 0.52, "grad_norm": 0.5546875, "learning_rate": 0.00010883874806912715, "loss": 0.9151, "step": 36575 }, { "epoch": 0.52, "grad_norm": 0.5859375, "learning_rate": 0.00010881380957823853, "loss": 0.8738, "step": 36580 }, { "epoch": 0.52, "grad_norm": 0.59375, "learning_rate": 0.00010878887053489031, "loss": 0.9275, "step": 36585 }, { "epoch": 0.52, "grad_norm": 0.58984375, "learning_rate": 0.00010876393094064563, "loss": 0.957, "step": 36590 }, { "epoch": 0.52, "grad_norm": 0.51953125, "learning_rate": 0.00010873899079706782, "loss": 0.9025, "step": 36595 }, { "epoch": 0.53, "grad_norm": 0.56640625, "learning_rate": 0.00010871405010572009, "loss": 1.0292, "step": 36600 }, { "epoch": 0.53, "grad_norm": 0.6015625, "learning_rate": 0.00010868910886816579, "loss": 1.007, "step": 36605 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.00010866416708596827, "loss": 0.8085, "step": 36610 }, { "epoch": 0.53, "grad_norm": 0.5390625, "learning_rate": 0.00010863922476069084, "loss": 0.9712, "step": 36615 }, { "epoch": 0.53, "grad_norm": 0.55859375, "learning_rate": 0.00010861428189389699, "loss": 0.8855, "step": 36620 }, { "epoch": 0.53, "grad_norm": 0.5078125, "learning_rate": 0.00010858933848715016, "loss": 1.0071, "step": 36625 }, { "epoch": 0.53, "grad_norm": 0.5703125, "learning_rate": 0.00010856439454201376, "loss": 1.0599, "step": 36630 }, { "epoch": 0.53, "grad_norm": 0.61328125, "learning_rate": 0.0001085394500600514, "loss": 1.0069, "step": 36635 }, { "epoch": 0.53, "grad_norm": 0.6640625, "learning_rate": 0.00010851450504282658, "loss": 1.013, "step": 36640 }, { "epoch": 0.53, "grad_norm": 0.5703125, "learning_rate": 0.00010848955949190286, "loss": 0.9143, "step": 36645 }, { "epoch": 0.53, "grad_norm": 0.6015625, "learning_rate": 0.00010846461340884393, "loss": 0.9171, "step": 36650 }, { "epoch": 0.53, "grad_norm": 0.515625, "learning_rate": 0.00010843966679521333, "loss": 1.0172, "step": 36655 }, { "epoch": 0.53, "grad_norm": 0.58984375, "learning_rate": 0.00010841471965257483, "loss": 0.8871, "step": 36660 }, { "epoch": 0.53, "grad_norm": 0.5625, "learning_rate": 0.00010838977198249214, "loss": 0.9314, "step": 36665 }, { "epoch": 0.53, "grad_norm": 0.578125, "learning_rate": 0.00010836482378652895, "loss": 0.9426, "step": 36670 }, { "epoch": 0.53, "grad_norm": 0.59765625, "learning_rate": 0.00010833987506624912, "loss": 0.9492, "step": 36675 }, { "epoch": 0.53, "grad_norm": 0.92578125, "learning_rate": 0.00010831492582321641, "loss": 1.0501, "step": 36680 }, { "epoch": 0.53, "grad_norm": 0.58203125, "learning_rate": 0.00010828997605899465, "loss": 0.898, "step": 36685 }, { "epoch": 0.53, "grad_norm": 0.49609375, "learning_rate": 0.00010826502577514777, "loss": 0.8869, "step": 36690 }, { "epoch": 0.53, "grad_norm": 0.58203125, "learning_rate": 0.00010824007497323967, "loss": 1.022, "step": 36695 }, { "epoch": 0.53, "grad_norm": 0.5625, "learning_rate": 0.00010821512365483426, "loss": 0.9217, "step": 36700 }, { "epoch": 0.53, "grad_norm": 0.59765625, "learning_rate": 0.00010819017182149558, "loss": 0.9814, "step": 36705 }, { "epoch": 0.53, "grad_norm": 0.56640625, "learning_rate": 0.00010816521947478757, "loss": 1.0245, "step": 36710 }, { "epoch": 0.53, "grad_norm": 0.61328125, "learning_rate": 0.00010814026661627432, "loss": 0.909, "step": 36715 }, { "epoch": 0.53, "grad_norm": 0.51171875, "learning_rate": 0.00010811531324751989, "loss": 1.0503, "step": 36720 }, { "epoch": 0.53, "grad_norm": 0.5078125, "learning_rate": 0.00010809035937008835, "loss": 1.059, "step": 36725 }, { "epoch": 0.53, "grad_norm": 0.55078125, "learning_rate": 0.00010806540498554392, "loss": 0.9477, "step": 36730 }, { "epoch": 0.53, "grad_norm": 0.5703125, "learning_rate": 0.0001080404500954507, "loss": 0.8944, "step": 36735 }, { "epoch": 0.53, "grad_norm": 0.60546875, "learning_rate": 0.00010801549470137289, "loss": 1.0347, "step": 36740 }, { "epoch": 0.53, "grad_norm": 0.52734375, "learning_rate": 0.00010799053880487477, "loss": 0.997, "step": 36745 }, { "epoch": 0.53, "grad_norm": 0.59765625, "learning_rate": 0.00010796558240752057, "loss": 1.064, "step": 36750 }, { "epoch": 0.53, "grad_norm": 0.76953125, "learning_rate": 0.00010794062551087461, "loss": 1.0009, "step": 36755 }, { "epoch": 0.53, "grad_norm": 0.7421875, "learning_rate": 0.00010791566811650118, "loss": 1.0184, "step": 36760 }, { "epoch": 0.53, "grad_norm": 0.4921875, "learning_rate": 0.00010789071022596461, "loss": 0.9217, "step": 36765 }, { "epoch": 0.53, "grad_norm": 0.56640625, "learning_rate": 0.0001078657518408294, "loss": 0.9953, "step": 36770 }, { "epoch": 0.53, "grad_norm": 0.5546875, "learning_rate": 0.0001078407929626599, "loss": 0.9915, "step": 36775 }, { "epoch": 0.53, "grad_norm": 0.59765625, "learning_rate": 0.00010781583359302056, "loss": 0.8871, "step": 36780 }, { "epoch": 0.53, "grad_norm": 0.57421875, "learning_rate": 0.00010779087373347587, "loss": 1.0015, "step": 36785 }, { "epoch": 0.53, "grad_norm": 0.70703125, "learning_rate": 0.00010776591338559034, "loss": 1.1607, "step": 36790 }, { "epoch": 0.53, "grad_norm": 0.640625, "learning_rate": 0.00010774095255092848, "loss": 0.9473, "step": 36795 }, { "epoch": 0.53, "grad_norm": 0.515625, "learning_rate": 0.00010771599123105495, "loss": 1.0485, "step": 36800 }, { "epoch": 0.53, "grad_norm": 0.52734375, "learning_rate": 0.00010769102942753429, "loss": 1.1098, "step": 36805 }, { "epoch": 0.53, "grad_norm": 0.53515625, "learning_rate": 0.00010766606714193113, "loss": 0.9416, "step": 36810 }, { "epoch": 0.53, "grad_norm": 0.56640625, "learning_rate": 0.00010764110437581017, "loss": 0.9518, "step": 36815 }, { "epoch": 0.53, "grad_norm": 0.51171875, "learning_rate": 0.00010761614113073605, "loss": 0.9433, "step": 36820 }, { "epoch": 0.53, "grad_norm": 0.69140625, "learning_rate": 0.00010759117740827356, "loss": 0.9649, "step": 36825 }, { "epoch": 0.53, "grad_norm": 0.58203125, "learning_rate": 0.00010756621320998743, "loss": 0.9854, "step": 36830 }, { "epoch": 0.53, "grad_norm": 0.62109375, "learning_rate": 0.00010754124853744242, "loss": 1.0369, "step": 36835 }, { "epoch": 0.53, "grad_norm": 0.6484375, "learning_rate": 0.00010751628339220336, "loss": 0.9787, "step": 36840 }, { "epoch": 0.53, "grad_norm": 0.4921875, "learning_rate": 0.00010749131777583512, "loss": 0.8924, "step": 36845 }, { "epoch": 0.53, "grad_norm": 0.5625, "learning_rate": 0.0001074663516899025, "loss": 0.8231, "step": 36850 }, { "epoch": 0.53, "grad_norm": 0.51953125, "learning_rate": 0.00010744138513597051, "loss": 1.0391, "step": 36855 }, { "epoch": 0.53, "grad_norm": 0.498046875, "learning_rate": 0.000107416418115604, "loss": 0.983, "step": 36860 }, { "epoch": 0.53, "grad_norm": 0.55859375, "learning_rate": 0.00010739145063036797, "loss": 0.9169, "step": 36865 }, { "epoch": 0.53, "grad_norm": 0.51953125, "learning_rate": 0.00010736648268182738, "loss": 0.9787, "step": 36870 }, { "epoch": 0.53, "grad_norm": 0.51953125, "learning_rate": 0.00010734151427154726, "loss": 0.7626, "step": 36875 }, { "epoch": 0.53, "grad_norm": 0.62890625, "learning_rate": 0.00010731654540109268, "loss": 0.9184, "step": 36880 }, { "epoch": 0.53, "grad_norm": 0.65625, "learning_rate": 0.00010729157607202873, "loss": 1.0985, "step": 36885 }, { "epoch": 0.53, "grad_norm": 0.6640625, "learning_rate": 0.00010726660628592045, "loss": 0.9564, "step": 36890 }, { "epoch": 0.53, "grad_norm": 0.578125, "learning_rate": 0.00010724163604433302, "loss": 0.9134, "step": 36895 }, { "epoch": 0.53, "grad_norm": 0.54296875, "learning_rate": 0.0001072166653488316, "loss": 0.8505, "step": 36900 }, { "epoch": 0.53, "grad_norm": 0.60546875, "learning_rate": 0.00010719169420098134, "loss": 0.9296, "step": 36905 }, { "epoch": 0.53, "grad_norm": 0.55078125, "learning_rate": 0.00010716672260234753, "loss": 0.9877, "step": 36910 }, { "epoch": 0.53, "grad_norm": 0.57421875, "learning_rate": 0.00010714175055449538, "loss": 0.8699, "step": 36915 }, { "epoch": 0.53, "grad_norm": 0.609375, "learning_rate": 0.00010711677805899017, "loss": 0.9097, "step": 36920 }, { "epoch": 0.53, "grad_norm": 0.478515625, "learning_rate": 0.0001070918051173972, "loss": 0.9882, "step": 36925 }, { "epoch": 0.53, "grad_norm": 0.55859375, "learning_rate": 0.0001070668317312818, "loss": 0.9172, "step": 36930 }, { "epoch": 0.53, "grad_norm": 0.5, "learning_rate": 0.00010704185790220938, "loss": 0.9566, "step": 36935 }, { "epoch": 0.53, "grad_norm": 0.57421875, "learning_rate": 0.00010701688363174524, "loss": 1.0493, "step": 36940 }, { "epoch": 0.53, "grad_norm": 0.4765625, "learning_rate": 0.00010699190892145487, "loss": 0.897, "step": 36945 }, { "epoch": 0.53, "grad_norm": 0.578125, "learning_rate": 0.0001069669337729037, "loss": 0.9825, "step": 36950 }, { "epoch": 0.53, "grad_norm": 0.59765625, "learning_rate": 0.00010694195818765713, "loss": 0.893, "step": 36955 }, { "epoch": 0.53, "grad_norm": 0.55078125, "learning_rate": 0.00010691698216728076, "loss": 0.9239, "step": 36960 }, { "epoch": 0.53, "grad_norm": 0.54296875, "learning_rate": 0.00010689200571334009, "loss": 0.8994, "step": 36965 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.00010686702882740061, "loss": 0.8999, "step": 36970 }, { "epoch": 0.53, "grad_norm": 0.51171875, "learning_rate": 0.00010684205151102795, "loss": 0.9204, "step": 36975 }, { "epoch": 0.53, "grad_norm": 0.56640625, "learning_rate": 0.00010681707376578771, "loss": 0.9035, "step": 36980 }, { "epoch": 0.53, "grad_norm": 0.498046875, "learning_rate": 0.00010679209559324552, "loss": 1.1007, "step": 36985 }, { "epoch": 0.53, "grad_norm": 0.60546875, "learning_rate": 0.00010676711699496706, "loss": 1.1832, "step": 36990 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.00010674213797251798, "loss": 0.9332, "step": 36995 }, { "epoch": 0.53, "grad_norm": 0.55078125, "learning_rate": 0.00010671715852746403, "loss": 0.9942, "step": 37000 }, { "epoch": 0.53, "grad_norm": 0.58203125, "learning_rate": 0.00010669217866137094, "loss": 0.9642, "step": 37005 }, { "epoch": 0.53, "grad_norm": 0.53515625, "learning_rate": 0.00010666719837580443, "loss": 0.8863, "step": 37010 }, { "epoch": 0.53, "grad_norm": 0.52734375, "learning_rate": 0.00010664221767233036, "loss": 0.8984, "step": 37015 }, { "epoch": 0.53, "grad_norm": 0.53125, "learning_rate": 0.00010661723655251454, "loss": 0.9529, "step": 37020 }, { "epoch": 0.53, "grad_norm": 0.55859375, "learning_rate": 0.00010659225501792277, "loss": 0.8908, "step": 37025 }, { "epoch": 0.53, "grad_norm": 0.55078125, "learning_rate": 0.00010656727307012096, "loss": 0.9531, "step": 37030 }, { "epoch": 0.53, "grad_norm": 0.55078125, "learning_rate": 0.00010654229071067499, "loss": 0.8834, "step": 37035 }, { "epoch": 0.53, "grad_norm": 0.546875, "learning_rate": 0.00010651730794115075, "loss": 0.9505, "step": 37040 }, { "epoch": 0.53, "grad_norm": 0.484375, "learning_rate": 0.00010649232476311428, "loss": 0.9008, "step": 37045 }, { "epoch": 0.53, "grad_norm": 0.5859375, "learning_rate": 0.00010646734117813146, "loss": 0.8971, "step": 37050 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.00010644235718776836, "loss": 1.0166, "step": 37055 }, { "epoch": 0.53, "grad_norm": 0.5, "learning_rate": 0.00010641737279359097, "loss": 1.0054, "step": 37060 }, { "epoch": 0.53, "grad_norm": 0.5390625, "learning_rate": 0.00010639238799716533, "loss": 0.9391, "step": 37065 }, { "epoch": 0.53, "grad_norm": 0.6328125, "learning_rate": 0.00010636740280005754, "loss": 0.9439, "step": 37070 }, { "epoch": 0.53, "grad_norm": 0.55859375, "learning_rate": 0.00010634241720383372, "loss": 1.0102, "step": 37075 }, { "epoch": 0.53, "grad_norm": 0.6171875, "learning_rate": 0.0001063174312100599, "loss": 0.9331, "step": 37080 }, { "epoch": 0.53, "grad_norm": 0.6328125, "learning_rate": 0.00010629244482030234, "loss": 1.0283, "step": 37085 }, { "epoch": 0.53, "grad_norm": 0.53515625, "learning_rate": 0.00010626745803612717, "loss": 0.8927, "step": 37090 }, { "epoch": 0.53, "grad_norm": 0.58203125, "learning_rate": 0.0001062424708591006, "loss": 0.9256, "step": 37095 }, { "epoch": 0.53, "grad_norm": 0.5546875, "learning_rate": 0.00010621748329078882, "loss": 0.9316, "step": 37100 }, { "epoch": 0.53, "grad_norm": 0.578125, "learning_rate": 0.0001061924953327581, "loss": 0.7805, "step": 37105 }, { "epoch": 0.53, "grad_norm": 0.55078125, "learning_rate": 0.00010616750698657474, "loss": 0.9389, "step": 37110 }, { "epoch": 0.53, "grad_norm": 0.5859375, "learning_rate": 0.00010614251825380504, "loss": 0.9466, "step": 37115 }, { "epoch": 0.53, "grad_norm": 0.60546875, "learning_rate": 0.00010611752913601525, "loss": 1.0314, "step": 37120 }, { "epoch": 0.53, "grad_norm": 0.50390625, "learning_rate": 0.00010609253963477182, "loss": 0.903, "step": 37125 }, { "epoch": 0.53, "grad_norm": 0.5546875, "learning_rate": 0.00010606754975164103, "loss": 0.8865, "step": 37130 }, { "epoch": 0.53, "grad_norm": 0.5859375, "learning_rate": 0.00010604255948818931, "loss": 0.8133, "step": 37135 }, { "epoch": 0.53, "grad_norm": 0.60546875, "learning_rate": 0.0001060175688459831, "loss": 0.925, "step": 37140 }, { "epoch": 0.53, "grad_norm": 0.5625, "learning_rate": 0.00010599257782658882, "loss": 1.0755, "step": 37145 }, { "epoch": 0.53, "grad_norm": 0.609375, "learning_rate": 0.00010596758643157294, "loss": 1.0568, "step": 37150 }, { "epoch": 0.53, "grad_norm": 0.59765625, "learning_rate": 0.00010594259466250194, "loss": 1.0214, "step": 37155 }, { "epoch": 0.53, "grad_norm": 0.5390625, "learning_rate": 0.0001059176025209423, "loss": 0.9462, "step": 37160 }, { "epoch": 0.53, "grad_norm": 0.65234375, "learning_rate": 0.00010589261000846065, "loss": 0.9693, "step": 37165 }, { "epoch": 0.53, "grad_norm": 0.61328125, "learning_rate": 0.00010586761712662345, "loss": 1.0498, "step": 37170 }, { "epoch": 0.53, "grad_norm": 0.65234375, "learning_rate": 0.00010584262387699737, "loss": 1.0542, "step": 37175 }, { "epoch": 0.53, "grad_norm": 0.5, "learning_rate": 0.00010581763026114894, "loss": 0.9571, "step": 37180 }, { "epoch": 0.53, "grad_norm": 0.5546875, "learning_rate": 0.00010579263628064484, "loss": 1.0827, "step": 37185 }, { "epoch": 0.53, "grad_norm": 0.6171875, "learning_rate": 0.00010576764193705167, "loss": 0.9859, "step": 37190 }, { "epoch": 0.53, "grad_norm": 0.60546875, "learning_rate": 0.00010574264723193617, "loss": 1.0388, "step": 37195 }, { "epoch": 0.53, "grad_norm": 0.52734375, "learning_rate": 0.00010571765216686498, "loss": 0.9622, "step": 37200 }, { "epoch": 0.53, "grad_norm": 0.5078125, "learning_rate": 0.00010569265674340485, "loss": 0.8824, "step": 37205 }, { "epoch": 0.53, "grad_norm": 0.53515625, "learning_rate": 0.00010566766096312251, "loss": 0.8486, "step": 37210 }, { "epoch": 0.53, "grad_norm": 0.640625, "learning_rate": 0.0001056426648275847, "loss": 1.0735, "step": 37215 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.00010561766833835826, "loss": 0.9757, "step": 37220 }, { "epoch": 0.53, "grad_norm": 0.53515625, "learning_rate": 0.00010559267149700998, "loss": 0.9932, "step": 37225 }, { "epoch": 0.53, "grad_norm": 0.5703125, "learning_rate": 0.00010556767430510665, "loss": 0.9202, "step": 37230 }, { "epoch": 0.53, "grad_norm": 0.6015625, "learning_rate": 0.00010554267676421518, "loss": 0.8795, "step": 37235 }, { "epoch": 0.53, "grad_norm": 0.5, "learning_rate": 0.0001055176788759024, "loss": 0.8951, "step": 37240 }, { "epoch": 0.53, "grad_norm": 0.625, "learning_rate": 0.00010549268064173523, "loss": 0.9054, "step": 37245 }, { "epoch": 0.53, "grad_norm": 0.515625, "learning_rate": 0.0001054676820632806, "loss": 0.8105, "step": 37250 }, { "epoch": 0.53, "grad_norm": 0.5546875, "learning_rate": 0.00010544268314210541, "loss": 1.041, "step": 37255 }, { "epoch": 0.53, "grad_norm": 0.5703125, "learning_rate": 0.00010541768387977664, "loss": 1.0327, "step": 37260 }, { "epoch": 0.53, "grad_norm": 0.546875, "learning_rate": 0.00010539268427786129, "loss": 0.8874, "step": 37265 }, { "epoch": 0.53, "grad_norm": 0.5859375, "learning_rate": 0.00010536768433792632, "loss": 0.9032, "step": 37270 }, { "epoch": 0.53, "grad_norm": 0.5078125, "learning_rate": 0.0001053426840615388, "loss": 0.9239, "step": 37275 }, { "epoch": 0.53, "grad_norm": 0.61328125, "learning_rate": 0.00010531768345026576, "loss": 0.9904, "step": 37280 }, { "epoch": 0.53, "grad_norm": 0.515625, "learning_rate": 0.00010529268250567427, "loss": 0.8399, "step": 37285 }, { "epoch": 0.53, "grad_norm": 0.51171875, "learning_rate": 0.00010526768122933142, "loss": 0.9121, "step": 37290 }, { "epoch": 0.53, "grad_norm": 0.53515625, "learning_rate": 0.0001052426796228043, "loss": 0.9694, "step": 37295 }, { "epoch": 0.54, "grad_norm": 0.69140625, "learning_rate": 0.00010521767768766002, "loss": 0.8676, "step": 37300 }, { "epoch": 0.54, "grad_norm": 0.6328125, "learning_rate": 0.0001051926754254658, "loss": 0.9611, "step": 37305 }, { "epoch": 0.54, "grad_norm": 0.5546875, "learning_rate": 0.00010516767283778877, "loss": 1.1587, "step": 37310 }, { "epoch": 0.54, "grad_norm": 0.59375, "learning_rate": 0.0001051426699261961, "loss": 0.9311, "step": 37315 }, { "epoch": 0.54, "grad_norm": 0.55078125, "learning_rate": 0.00010511766669225505, "loss": 0.9896, "step": 37320 }, { "epoch": 0.54, "grad_norm": 0.53125, "learning_rate": 0.00010509266313753279, "loss": 0.9138, "step": 37325 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010506765926359661, "loss": 0.8587, "step": 37330 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.00010504265507201378, "loss": 0.999, "step": 37335 }, { "epoch": 0.54, "grad_norm": 0.55078125, "learning_rate": 0.0001050176505643516, "loss": 0.8369, "step": 37340 }, { "epoch": 0.54, "grad_norm": 0.58984375, "learning_rate": 0.00010499264574217738, "loss": 0.9698, "step": 37345 }, { "epoch": 0.54, "grad_norm": 0.61328125, "learning_rate": 0.0001049676406070584, "loss": 1.0164, "step": 37350 }, { "epoch": 0.54, "grad_norm": 0.5859375, "learning_rate": 0.00010494263516056206, "loss": 1.0847, "step": 37355 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.00010491762940425576, "loss": 0.9465, "step": 37360 }, { "epoch": 0.54, "grad_norm": 0.5390625, "learning_rate": 0.0001048926233397068, "loss": 0.9101, "step": 37365 }, { "epoch": 0.54, "grad_norm": 0.59375, "learning_rate": 0.00010486761696848263, "loss": 0.9026, "step": 37370 }, { "epoch": 0.54, "grad_norm": 0.62890625, "learning_rate": 0.00010484261029215073, "loss": 1.0973, "step": 37375 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.00010481760331227845, "loss": 1.0309, "step": 37380 }, { "epoch": 0.54, "grad_norm": 0.6171875, "learning_rate": 0.00010479259603043336, "loss": 0.9052, "step": 37385 }, { "epoch": 0.54, "grad_norm": 0.5546875, "learning_rate": 0.00010476758844818286, "loss": 0.9943, "step": 37390 }, { "epoch": 0.54, "grad_norm": 0.5390625, "learning_rate": 0.00010474258056709449, "loss": 0.9287, "step": 37395 }, { "epoch": 0.54, "grad_norm": 0.466796875, "learning_rate": 0.00010471757238873578, "loss": 1.0231, "step": 37400 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.00010469256391467424, "loss": 1.0086, "step": 37405 }, { "epoch": 0.54, "grad_norm": 0.55859375, "learning_rate": 0.00010466755514647749, "loss": 0.9275, "step": 37410 }, { "epoch": 0.54, "grad_norm": 0.6171875, "learning_rate": 0.00010464254608571304, "loss": 1.0473, "step": 37415 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.0001046175367339485, "loss": 0.9569, "step": 37420 }, { "epoch": 0.54, "grad_norm": 0.57421875, "learning_rate": 0.00010459252709275152, "loss": 1.0617, "step": 37425 }, { "epoch": 0.54, "grad_norm": 0.49609375, "learning_rate": 0.00010456751716368971, "loss": 0.9215, "step": 37430 }, { "epoch": 0.54, "grad_norm": 0.68359375, "learning_rate": 0.0001045425069483307, "loss": 0.9922, "step": 37435 }, { "epoch": 0.54, "grad_norm": 0.59765625, "learning_rate": 0.00010451749644824222, "loss": 0.9558, "step": 37440 }, { "epoch": 0.54, "grad_norm": 0.546875, "learning_rate": 0.00010449248566499188, "loss": 0.8627, "step": 37445 }, { "epoch": 0.54, "grad_norm": 0.59375, "learning_rate": 0.00010446747460014743, "loss": 1.0908, "step": 37450 }, { "epoch": 0.54, "grad_norm": 0.58984375, "learning_rate": 0.0001044424632552766, "loss": 0.9257, "step": 37455 }, { "epoch": 0.54, "grad_norm": 0.6328125, "learning_rate": 0.00010441745163194709, "loss": 0.8122, "step": 37460 }, { "epoch": 0.54, "grad_norm": 0.5625, "learning_rate": 0.00010439243973172673, "loss": 1.0586, "step": 37465 }, { "epoch": 0.54, "grad_norm": 0.6015625, "learning_rate": 0.0001043674275561832, "loss": 0.9671, "step": 37470 }, { "epoch": 0.54, "grad_norm": 0.65234375, "learning_rate": 0.00010434241510688434, "loss": 0.9491, "step": 37475 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.00010431740238539796, "loss": 0.9244, "step": 37480 }, { "epoch": 0.54, "grad_norm": 0.515625, "learning_rate": 0.00010429238939329189, "loss": 0.8859, "step": 37485 }, { "epoch": 0.54, "grad_norm": 0.53515625, "learning_rate": 0.00010426737613213395, "loss": 0.9897, "step": 37490 }, { "epoch": 0.54, "grad_norm": 0.578125, "learning_rate": 0.00010424236260349203, "loss": 0.9511, "step": 37495 }, { "epoch": 0.54, "grad_norm": 0.63671875, "learning_rate": 0.00010421734880893396, "loss": 0.8938, "step": 37500 }, { "epoch": 0.54, "grad_norm": 0.5390625, "learning_rate": 0.0001041923347500277, "loss": 0.9602, "step": 37505 }, { "epoch": 0.54, "grad_norm": 0.62890625, "learning_rate": 0.00010416732042834112, "loss": 0.9406, "step": 37510 }, { "epoch": 0.54, "grad_norm": 0.58203125, "learning_rate": 0.0001041423058454421, "loss": 0.9569, "step": 37515 }, { "epoch": 0.54, "grad_norm": 0.462890625, "learning_rate": 0.0001041172910028987, "loss": 0.8252, "step": 37520 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.00010409227590227879, "loss": 1.0166, "step": 37525 }, { "epoch": 0.54, "grad_norm": 0.490234375, "learning_rate": 0.00010406726054515035, "loss": 0.9833, "step": 37530 }, { "epoch": 0.54, "grad_norm": 0.515625, "learning_rate": 0.00010404224493308139, "loss": 1.1919, "step": 37535 }, { "epoch": 0.54, "grad_norm": 0.49609375, "learning_rate": 0.00010401722906763993, "loss": 0.8367, "step": 37540 }, { "epoch": 0.54, "grad_norm": 0.48828125, "learning_rate": 0.00010399221295039396, "loss": 1.0228, "step": 37545 }, { "epoch": 0.54, "grad_norm": 0.8125, "learning_rate": 0.00010396719658291155, "loss": 1.0634, "step": 37550 }, { "epoch": 0.54, "grad_norm": 0.515625, "learning_rate": 0.0001039421799667607, "loss": 0.9437, "step": 37555 }, { "epoch": 0.54, "grad_norm": 0.59765625, "learning_rate": 0.00010391716310350957, "loss": 0.9024, "step": 37560 }, { "epoch": 0.54, "grad_norm": 0.578125, "learning_rate": 0.00010389214599472617, "loss": 0.9552, "step": 37565 }, { "epoch": 0.54, "grad_norm": 0.62109375, "learning_rate": 0.00010386712864197863, "loss": 1.1085, "step": 37570 }, { "epoch": 0.54, "grad_norm": 0.55859375, "learning_rate": 0.00010384211104683508, "loss": 0.8691, "step": 37575 }, { "epoch": 0.54, "grad_norm": 0.63671875, "learning_rate": 0.00010381709321086361, "loss": 1.0081, "step": 37580 }, { "epoch": 0.54, "grad_norm": 0.60546875, "learning_rate": 0.00010379207513563239, "loss": 0.951, "step": 37585 }, { "epoch": 0.54, "grad_norm": 0.63671875, "learning_rate": 0.00010376705682270958, "loss": 1.0732, "step": 37590 }, { "epoch": 0.54, "grad_norm": 0.58203125, "learning_rate": 0.00010374203827366338, "loss": 1.0202, "step": 37595 }, { "epoch": 0.54, "grad_norm": 0.5703125, "learning_rate": 0.00010371701949006195, "loss": 0.9479, "step": 37600 }, { "epoch": 0.54, "grad_norm": 0.5078125, "learning_rate": 0.0001036920004734735, "loss": 0.8367, "step": 37605 }, { "epoch": 0.54, "grad_norm": 0.58203125, "learning_rate": 0.00010366698122546623, "loss": 0.891, "step": 37610 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.00010364196174760845, "loss": 0.8879, "step": 37615 }, { "epoch": 0.54, "grad_norm": 0.6015625, "learning_rate": 0.00010361694204146833, "loss": 1.0175, "step": 37620 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.00010359192210861417, "loss": 0.9633, "step": 37625 }, { "epoch": 0.54, "grad_norm": 0.62890625, "learning_rate": 0.00010356690195061424, "loss": 1.1454, "step": 37630 }, { "epoch": 0.54, "grad_norm": 0.63671875, "learning_rate": 0.00010354188156903686, "loss": 0.8291, "step": 37635 }, { "epoch": 0.54, "grad_norm": 0.64453125, "learning_rate": 0.00010351686096545026, "loss": 0.9924, "step": 37640 }, { "epoch": 0.54, "grad_norm": 0.57421875, "learning_rate": 0.00010349184014142284, "loss": 0.8741, "step": 37645 }, { "epoch": 0.54, "grad_norm": 0.474609375, "learning_rate": 0.0001034668190985229, "loss": 0.8813, "step": 37650 }, { "epoch": 0.54, "grad_norm": 0.486328125, "learning_rate": 0.0001034417978383188, "loss": 0.822, "step": 37655 }, { "epoch": 0.54, "grad_norm": 0.5703125, "learning_rate": 0.00010341677636237887, "loss": 0.9509, "step": 37660 }, { "epoch": 0.54, "grad_norm": 0.546875, "learning_rate": 0.0001033917546722715, "loss": 0.81, "step": 37665 }, { "epoch": 0.54, "grad_norm": 0.53515625, "learning_rate": 0.0001033667327695651, "loss": 0.9395, "step": 37670 }, { "epoch": 0.54, "grad_norm": 0.53125, "learning_rate": 0.00010334171065582807, "loss": 0.8975, "step": 37675 }, { "epoch": 0.54, "grad_norm": 0.55078125, "learning_rate": 0.0001033166883326288, "loss": 0.9838, "step": 37680 }, { "epoch": 0.54, "grad_norm": 0.58984375, "learning_rate": 0.00010329166580153573, "loss": 0.7565, "step": 37685 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.00010326664306411727, "loss": 0.9554, "step": 37690 }, { "epoch": 0.54, "grad_norm": 0.5234375, "learning_rate": 0.00010324162012194194, "loss": 1.0028, "step": 37695 }, { "epoch": 0.54, "grad_norm": 0.5390625, "learning_rate": 0.00010321659697657818, "loss": 0.9018, "step": 37700 }, { "epoch": 0.54, "grad_norm": 0.5234375, "learning_rate": 0.0001031915736295944, "loss": 0.943, "step": 37705 }, { "epoch": 0.54, "grad_norm": 0.490234375, "learning_rate": 0.0001031665500825592, "loss": 0.8955, "step": 37710 }, { "epoch": 0.54, "grad_norm": 0.58203125, "learning_rate": 0.00010314152633704102, "loss": 1.1076, "step": 37715 }, { "epoch": 0.54, "grad_norm": 0.53515625, "learning_rate": 0.00010311650239460834, "loss": 1.0408, "step": 37720 }, { "epoch": 0.54, "grad_norm": 0.5859375, "learning_rate": 0.00010309147825682982, "loss": 0.8527, "step": 37725 }, { "epoch": 0.54, "grad_norm": 0.59765625, "learning_rate": 0.00010306645392527388, "loss": 0.9127, "step": 37730 }, { "epoch": 0.54, "grad_norm": 0.498046875, "learning_rate": 0.00010304142940150913, "loss": 0.8388, "step": 37735 }, { "epoch": 0.54, "grad_norm": 0.498046875, "learning_rate": 0.00010301640468710412, "loss": 1.0603, "step": 37740 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.0001029913797836274, "loss": 1.1031, "step": 37745 }, { "epoch": 0.54, "grad_norm": 0.58984375, "learning_rate": 0.00010296635469264764, "loss": 0.9181, "step": 37750 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.00010294132941573338, "loss": 0.8225, "step": 37755 }, { "epoch": 0.54, "grad_norm": 0.5859375, "learning_rate": 0.0001029163039544532, "loss": 1.0889, "step": 37760 }, { "epoch": 0.54, "grad_norm": 0.494140625, "learning_rate": 0.00010289127831037579, "loss": 0.8887, "step": 37765 }, { "epoch": 0.54, "grad_norm": 0.57421875, "learning_rate": 0.00010286625248506979, "loss": 1.0263, "step": 37770 }, { "epoch": 0.54, "grad_norm": 0.53125, "learning_rate": 0.00010284122648010377, "loss": 0.9872, "step": 37775 }, { "epoch": 0.54, "grad_norm": 0.5546875, "learning_rate": 0.00010281620029704649, "loss": 0.977, "step": 37780 }, { "epoch": 0.54, "grad_norm": 0.5546875, "learning_rate": 0.0001027911739374665, "loss": 1.1302, "step": 37785 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.00010276614740293262, "loss": 1.0363, "step": 37790 }, { "epoch": 0.54, "grad_norm": 0.49609375, "learning_rate": 0.00010274112069501344, "loss": 0.9864, "step": 37795 }, { "epoch": 0.54, "grad_norm": 0.51953125, "learning_rate": 0.00010271609381527767, "loss": 0.8938, "step": 37800 }, { "epoch": 0.54, "grad_norm": 0.50390625, "learning_rate": 0.0001026910667652941, "loss": 0.9837, "step": 37805 }, { "epoch": 0.54, "grad_norm": 0.66796875, "learning_rate": 0.00010266603954663136, "loss": 1.0605, "step": 37810 }, { "epoch": 0.54, "grad_norm": 0.6015625, "learning_rate": 0.00010264101216085821, "loss": 0.9841, "step": 37815 }, { "epoch": 0.54, "grad_norm": 0.59375, "learning_rate": 0.00010261598460954345, "loss": 0.8508, "step": 37820 }, { "epoch": 0.54, "grad_norm": 0.7578125, "learning_rate": 0.0001025909568942558, "loss": 0.9525, "step": 37825 }, { "epoch": 0.54, "grad_norm": 0.490234375, "learning_rate": 0.00010256592901656397, "loss": 0.9262, "step": 37830 }, { "epoch": 0.54, "grad_norm": 0.5625, "learning_rate": 0.00010254090097803685, "loss": 0.94, "step": 37835 }, { "epoch": 0.54, "grad_norm": 0.5390625, "learning_rate": 0.0001025158727802431, "loss": 0.8998, "step": 37840 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010249084442475163, "loss": 0.9269, "step": 37845 }, { "epoch": 0.54, "grad_norm": 0.5234375, "learning_rate": 0.00010246581591313118, "loss": 0.8688, "step": 37850 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010244078724695055, "loss": 1.0001, "step": 37855 }, { "epoch": 0.54, "grad_norm": 0.60546875, "learning_rate": 0.00010241575842777864, "loss": 0.8315, "step": 37860 }, { "epoch": 0.54, "grad_norm": 0.5703125, "learning_rate": 0.00010239072945718422, "loss": 0.882, "step": 37865 }, { "epoch": 0.54, "grad_norm": 0.5, "learning_rate": 0.00010236570033673614, "loss": 1.1545, "step": 37870 }, { "epoch": 0.54, "grad_norm": 0.50390625, "learning_rate": 0.00010234067106800329, "loss": 0.9951, "step": 37875 }, { "epoch": 0.54, "grad_norm": 0.5546875, "learning_rate": 0.00010231564165255452, "loss": 0.9681, "step": 37880 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.00010229061209195867, "loss": 0.9191, "step": 37885 }, { "epoch": 0.54, "grad_norm": 0.57421875, "learning_rate": 0.00010226558238778466, "loss": 0.9133, "step": 37890 }, { "epoch": 0.54, "grad_norm": 0.57421875, "learning_rate": 0.00010224055254160134, "loss": 0.9152, "step": 37895 }, { "epoch": 0.54, "grad_norm": 0.50390625, "learning_rate": 0.00010221552255497767, "loss": 1.0462, "step": 37900 }, { "epoch": 0.54, "grad_norm": 0.5546875, "learning_rate": 0.00010219049242948251, "loss": 1.047, "step": 37905 }, { "epoch": 0.54, "grad_norm": 0.6640625, "learning_rate": 0.00010216546216668479, "loss": 1.0363, "step": 37910 }, { "epoch": 0.54, "grad_norm": 0.58203125, "learning_rate": 0.00010214043176815344, "loss": 1.0124, "step": 37915 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010211540123545737, "loss": 1.0344, "step": 37920 }, { "epoch": 0.54, "grad_norm": 0.6171875, "learning_rate": 0.00010209037057016556, "loss": 0.9466, "step": 37925 }, { "epoch": 0.54, "grad_norm": 0.53125, "learning_rate": 0.00010206533977384694, "loss": 1.0074, "step": 37930 }, { "epoch": 0.54, "grad_norm": 0.50390625, "learning_rate": 0.00010204030884807046, "loss": 0.982, "step": 37935 }, { "epoch": 0.54, "grad_norm": 0.625, "learning_rate": 0.00010201527779440509, "loss": 0.8657, "step": 37940 }, { "epoch": 0.54, "grad_norm": 0.5, "learning_rate": 0.00010199024661441985, "loss": 1.0115, "step": 37945 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010196521530968363, "loss": 0.7979, "step": 37950 }, { "epoch": 0.54, "grad_norm": 0.57421875, "learning_rate": 0.00010194018388176552, "loss": 0.8499, "step": 37955 }, { "epoch": 0.54, "grad_norm": 0.515625, "learning_rate": 0.00010191515233223448, "loss": 1.0006, "step": 37960 }, { "epoch": 0.54, "grad_norm": 0.51953125, "learning_rate": 0.00010189012066265949, "loss": 0.8995, "step": 37965 }, { "epoch": 0.54, "grad_norm": 0.49609375, "learning_rate": 0.00010186508887460959, "loss": 1.0087, "step": 37970 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.0001018400569696538, "loss": 0.8561, "step": 37975 }, { "epoch": 0.54, "grad_norm": 0.58203125, "learning_rate": 0.0001018150249493611, "loss": 1.0744, "step": 37980 }, { "epoch": 0.54, "grad_norm": 0.55078125, "learning_rate": 0.00010178999281530062, "loss": 0.9463, "step": 37985 }, { "epoch": 0.54, "grad_norm": 0.494140625, "learning_rate": 0.00010176496056904135, "loss": 0.8571, "step": 37990 }, { "epoch": 0.55, "grad_norm": 0.5703125, "learning_rate": 0.00010173992821215232, "loss": 0.7871, "step": 37995 }, { "epoch": 0.55, "grad_norm": 0.64453125, "learning_rate": 0.00010171489574620263, "loss": 1.1391, "step": 38000 }, { "epoch": 0.55, "grad_norm": 0.5546875, "learning_rate": 0.00010168986317276128, "loss": 1.0766, "step": 38005 }, { "epoch": 0.55, "grad_norm": 0.5078125, "learning_rate": 0.00010166483049339741, "loss": 0.9119, "step": 38010 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 0.00010163979770968008, "loss": 0.8999, "step": 38015 }, { "epoch": 0.55, "grad_norm": 0.5546875, "learning_rate": 0.00010161476482317835, "loss": 1.039, "step": 38020 }, { "epoch": 0.55, "grad_norm": 0.578125, "learning_rate": 0.00010158973183546132, "loss": 1.0054, "step": 38025 }, { "epoch": 0.55, "grad_norm": 0.52734375, "learning_rate": 0.00010156469874809808, "loss": 0.891, "step": 38030 }, { "epoch": 0.55, "grad_norm": 0.55859375, "learning_rate": 0.00010153966556265775, "loss": 1.0695, "step": 38035 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 0.00010151463228070943, "loss": 0.8359, "step": 38040 }, { "epoch": 0.55, "grad_norm": 0.578125, "learning_rate": 0.00010148959890382224, "loss": 0.9787, "step": 38045 }, { "epoch": 0.55, "grad_norm": 0.515625, "learning_rate": 0.0001014645654335653, "loss": 0.9187, "step": 38050 }, { "epoch": 0.55, "grad_norm": 0.5078125, "learning_rate": 0.00010143953187150772, "loss": 0.9577, "step": 38055 }, { "epoch": 0.55, "grad_norm": 0.58203125, "learning_rate": 0.00010141449821921862, "loss": 1.0231, "step": 38060 }, { "epoch": 0.55, "grad_norm": 0.5, "learning_rate": 0.00010138946447826718, "loss": 0.936, "step": 38065 }, { "epoch": 0.55, "grad_norm": 0.51953125, "learning_rate": 0.00010136443065022254, "loss": 0.8882, "step": 38070 }, { "epoch": 0.55, "grad_norm": 0.6640625, "learning_rate": 0.00010133939673665382, "loss": 0.8248, "step": 38075 }, { "epoch": 0.55, "grad_norm": 0.56640625, "learning_rate": 0.0001013143627391302, "loss": 0.9949, "step": 38080 }, { "epoch": 0.55, "grad_norm": 0.5546875, "learning_rate": 0.00010128932865922078, "loss": 1.0271, "step": 38085 }, { "epoch": 0.55, "grad_norm": 0.66796875, "learning_rate": 0.00010126429449849482, "loss": 0.9705, "step": 38090 }, { "epoch": 0.55, "grad_norm": 0.53515625, "learning_rate": 0.00010123926025852144, "loss": 0.8858, "step": 38095 }, { "epoch": 0.55, "grad_norm": 0.6015625, "learning_rate": 0.00010121422594086978, "loss": 0.9708, "step": 38100 }, { "epoch": 0.55, "grad_norm": 0.6171875, "learning_rate": 0.00010118919154710909, "loss": 0.9252, "step": 38105 }, { "epoch": 0.55, "grad_norm": 0.578125, "learning_rate": 0.00010116415707880848, "loss": 0.9082, "step": 38110 }, { "epoch": 0.55, "grad_norm": 0.60546875, "learning_rate": 0.00010113912253753719, "loss": 0.9346, "step": 38115 }, { "epoch": 0.55, "grad_norm": 0.63671875, "learning_rate": 0.00010111408792486446, "loss": 0.8487, "step": 38120 }, { "epoch": 0.55, "grad_norm": 0.58203125, "learning_rate": 0.00010108905324235935, "loss": 0.8382, "step": 38125 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 0.0001010640184915912, "loss": 0.9948, "step": 38130 }, { "epoch": 0.55, "grad_norm": 0.6953125, "learning_rate": 0.00010103898367412913, "loss": 1.1, "step": 38135 }, { "epoch": 0.55, "grad_norm": 0.55859375, "learning_rate": 0.00010101394879154238, "loss": 1.0851, "step": 38140 }, { "epoch": 0.55, "grad_norm": 0.5078125, "learning_rate": 0.00010098891384540017, "loss": 1.0185, "step": 38145 }, { "epoch": 0.55, "grad_norm": 0.5703125, "learning_rate": 0.00010096387883727174, "loss": 1.0252, "step": 38150 }, { "epoch": 0.55, "grad_norm": 0.55859375, "learning_rate": 0.00010093884376872625, "loss": 1.0436, "step": 38155 }, { "epoch": 0.55, "grad_norm": 0.486328125, "learning_rate": 0.00010091380864133297, "loss": 0.9197, "step": 38160 }, { "epoch": 0.55, "grad_norm": 0.53125, "learning_rate": 0.00010088877345666112, "loss": 0.9213, "step": 38165 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 0.00010086373821627995, "loss": 1.0176, "step": 38170 }, { "epoch": 0.55, "grad_norm": 0.59375, "learning_rate": 0.0001008387029217587, "loss": 0.9032, "step": 38175 }, { "epoch": 0.55, "grad_norm": 0.53515625, "learning_rate": 0.00010081366757466655, "loss": 0.9536, "step": 38180 }, { "epoch": 0.55, "grad_norm": 0.59765625, "learning_rate": 0.00010078863217657282, "loss": 0.8605, "step": 38185 }, { "epoch": 0.55, "grad_norm": 0.52734375, "learning_rate": 0.00010076359672904673, "loss": 1.0488, "step": 38190 }, { "epoch": 0.55, "grad_norm": 0.609375, "learning_rate": 0.0001007385612336575, "loss": 1.0442, "step": 38195 }, { "epoch": 0.55, "grad_norm": 0.62890625, "learning_rate": 0.00010071352569197446, "loss": 1.0319, "step": 38200 }, { "epoch": 0.55, "grad_norm": 0.5859375, "learning_rate": 0.0001006884901055668, "loss": 0.9562, "step": 38205 }, { "epoch": 0.55, "grad_norm": 0.55859375, "learning_rate": 0.00010066345447600375, "loss": 0.9968, "step": 38210 }, { "epoch": 0.55, "grad_norm": 0.578125, "learning_rate": 0.00010063841880485467, "loss": 0.9077, "step": 38215 }, { "epoch": 0.55, "grad_norm": 0.625, "learning_rate": 0.00010061338309368875, "loss": 1.001, "step": 38220 }, { "epoch": 0.55, "grad_norm": 0.58203125, "learning_rate": 0.00010058834734407529, "loss": 0.966, "step": 38225 }, { "epoch": 0.55, "grad_norm": 0.52734375, "learning_rate": 0.00010056331155758356, "loss": 0.8654, "step": 38230 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 0.00010053827573578278, "loss": 1.0531, "step": 38235 }, { "epoch": 0.55, "grad_norm": 0.59765625, "learning_rate": 0.00010051323988024229, "loss": 0.9787, "step": 38240 }, { "epoch": 0.55, "grad_norm": 0.61328125, "learning_rate": 0.00010048820399253134, "loss": 0.9843, "step": 38245 }, { "epoch": 0.55, "grad_norm": 0.62109375, "learning_rate": 0.00010046316807421918, "loss": 0.9878, "step": 38250 }, { "epoch": 0.55, "grad_norm": 0.55078125, "learning_rate": 0.00010043813212687516, "loss": 0.8344, "step": 38255 }, { "epoch": 0.55, "grad_norm": 0.5, "learning_rate": 0.00010041309615206851, "loss": 0.8606, "step": 38260 }, { "epoch": 0.55, "grad_norm": 0.625, "learning_rate": 0.00010038806015136851, "loss": 1.0538, "step": 38265 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 0.00010036302412634446, "loss": 0.8955, "step": 38270 }, { "epoch": 0.55, "grad_norm": 0.50390625, "learning_rate": 0.00010033798807856565, "loss": 1.0416, "step": 38275 }, { "epoch": 0.55, "grad_norm": 0.59765625, "learning_rate": 0.00010031295200960136, "loss": 0.88, "step": 38280 }, { "epoch": 0.55, "grad_norm": 0.63671875, "learning_rate": 0.00010028791592102087, "loss": 0.9125, "step": 38285 }, { "epoch": 0.55, "grad_norm": 0.5703125, "learning_rate": 0.00010026287981439348, "loss": 0.9274, "step": 38290 }, { "epoch": 0.55, "grad_norm": 0.51953125, "learning_rate": 0.0001002378436912885, "loss": 0.9349, "step": 38295 }, { "epoch": 0.55, "grad_norm": 0.5390625, "learning_rate": 0.0001002128075532752, "loss": 0.834, "step": 38300 }, { "epoch": 0.55, "grad_norm": 0.58203125, "learning_rate": 0.00010018777140192288, "loss": 0.9952, "step": 38305 }, { "epoch": 0.55, "grad_norm": 0.53125, "learning_rate": 0.00010016273523880084, "loss": 0.9502, "step": 38310 }, { "epoch": 0.55, "grad_norm": 0.66015625, "learning_rate": 0.00010013769906547839, "loss": 0.9224, "step": 38315 }, { "epoch": 0.55, "grad_norm": 0.62890625, "learning_rate": 0.00010011266288352477, "loss": 1.001, "step": 38320 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 0.00010008762669450931, "loss": 0.9386, "step": 38325 }, { "epoch": 0.55, "grad_norm": 0.462890625, "learning_rate": 0.00010006259050000133, "loss": 0.8958, "step": 38330 }, { "epoch": 0.55, "grad_norm": 0.5234375, "learning_rate": 0.00010003755430157012, "loss": 0.8092, "step": 38335 }, { "epoch": 0.55, "grad_norm": 0.5, "learning_rate": 0.00010001251810078493, "loss": 0.9878, "step": 38340 }, { "epoch": 0.55, "grad_norm": 0.515625, "learning_rate": 9.998748189921509e-05, "loss": 1.0938, "step": 38345 }, { "epoch": 0.55, "grad_norm": 0.6015625, "learning_rate": 9.996244569842992e-05, "loss": 0.9138, "step": 38350 }, { "epoch": 0.55, "grad_norm": 0.5546875, "learning_rate": 9.993740949999869e-05, "loss": 0.9054, "step": 38355 }, { "epoch": 0.55, "grad_norm": 0.6640625, "learning_rate": 9.991237330549067e-05, "loss": 0.9356, "step": 38360 }, { "epoch": 0.55, "grad_norm": 0.55859375, "learning_rate": 9.988733711647524e-05, "loss": 0.8874, "step": 38365 }, { "epoch": 0.55, "grad_norm": 0.64453125, "learning_rate": 9.986230093452166e-05, "loss": 0.9673, "step": 38370 }, { "epoch": 0.55, "grad_norm": 0.6953125, "learning_rate": 9.983726476119918e-05, "loss": 0.9727, "step": 38375 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 9.981222859807715e-05, "loss": 0.9835, "step": 38380 }, { "epoch": 0.55, "grad_norm": 0.5234375, "learning_rate": 9.978719244672481e-05, "loss": 0.9609, "step": 38385 }, { "epoch": 0.55, "grad_norm": 0.61328125, "learning_rate": 9.976215630871152e-05, "loss": 0.9405, "step": 38390 }, { "epoch": 0.55, "grad_norm": 0.578125, "learning_rate": 9.973712018560654e-05, "loss": 0.8642, "step": 38395 }, { "epoch": 0.55, "grad_norm": 0.6484375, "learning_rate": 9.971208407897914e-05, "loss": 1.0028, "step": 38400 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 9.968704799039867e-05, "loss": 0.9833, "step": 38405 }, { "epoch": 0.55, "grad_norm": 0.55078125, "learning_rate": 9.966201192143439e-05, "loss": 0.991, "step": 38410 }, { "epoch": 0.55, "grad_norm": 0.53125, "learning_rate": 9.963697587365555e-05, "loss": 0.8559, "step": 38415 }, { "epoch": 0.55, "grad_norm": 0.56640625, "learning_rate": 9.96119398486315e-05, "loss": 1.0733, "step": 38420 }, { "epoch": 0.55, "grad_norm": 0.5859375, "learning_rate": 9.958690384793154e-05, "loss": 0.8843, "step": 38425 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 9.956186787312488e-05, "loss": 0.9691, "step": 38430 }, { "epoch": 0.55, "grad_norm": 0.50390625, "learning_rate": 9.953683192578083e-05, "loss": 0.9404, "step": 38435 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 9.951179600746868e-05, "loss": 1.0152, "step": 38440 }, { "epoch": 0.55, "grad_norm": 0.515625, "learning_rate": 9.948676011975773e-05, "loss": 1.011, "step": 38445 }, { "epoch": 0.55, "grad_norm": 0.7421875, "learning_rate": 9.946172426421725e-05, "loss": 1.028, "step": 38450 }, { "epoch": 0.55, "grad_norm": 0.49609375, "learning_rate": 9.943668844241647e-05, "loss": 1.0303, "step": 38455 }, { "epoch": 0.55, "grad_norm": 0.56640625, "learning_rate": 9.941165265592472e-05, "loss": 0.9126, "step": 38460 }, { "epoch": 0.55, "grad_norm": 0.51171875, "learning_rate": 9.938661690631127e-05, "loss": 0.991, "step": 38465 }, { "epoch": 0.55, "grad_norm": 0.5546875, "learning_rate": 9.936158119514533e-05, "loss": 0.9697, "step": 38470 }, { "epoch": 0.55, "grad_norm": 0.5, "learning_rate": 9.933654552399628e-05, "loss": 0.8526, "step": 38475 }, { "epoch": 0.55, "grad_norm": 0.53125, "learning_rate": 9.931150989443325e-05, "loss": 0.9171, "step": 38480 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 9.928647430802558e-05, "loss": 1.0696, "step": 38485 }, { "epoch": 0.55, "grad_norm": 0.57421875, "learning_rate": 9.926143876634252e-05, "loss": 0.9272, "step": 38490 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 9.923640327095329e-05, "loss": 0.9579, "step": 38495 }, { "epoch": 0.55, "grad_norm": 0.51953125, "learning_rate": 9.921136782342719e-05, "loss": 0.9735, "step": 38500 }, { "epoch": 0.55, "grad_norm": 0.55078125, "learning_rate": 9.918633242533347e-05, "loss": 0.8839, "step": 38505 }, { "epoch": 0.55, "grad_norm": 0.61328125, "learning_rate": 9.916129707824133e-05, "loss": 1.0197, "step": 38510 }, { "epoch": 0.55, "grad_norm": 0.59765625, "learning_rate": 9.913626178372006e-05, "loss": 1.0029, "step": 38515 }, { "epoch": 0.55, "grad_norm": 0.63671875, "learning_rate": 9.911122654333889e-05, "loss": 0.9344, "step": 38520 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 9.908619135866704e-05, "loss": 0.8733, "step": 38525 }, { "epoch": 0.55, "grad_norm": 0.6015625, "learning_rate": 9.906115623127381e-05, "loss": 1.0772, "step": 38530 }, { "epoch": 0.55, "grad_norm": 0.59765625, "learning_rate": 9.90361211627283e-05, "loss": 1.0062, "step": 38535 }, { "epoch": 0.55, "grad_norm": 0.51171875, "learning_rate": 9.901108615459986e-05, "loss": 0.8779, "step": 38540 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 9.898605120845766e-05, "loss": 0.9713, "step": 38545 }, { "epoch": 0.55, "grad_norm": 0.5859375, "learning_rate": 9.896101632587089e-05, "loss": 0.971, "step": 38550 }, { "epoch": 0.55, "grad_norm": 0.578125, "learning_rate": 9.893598150840884e-05, "loss": 1.0255, "step": 38555 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 9.891094675764067e-05, "loss": 0.9138, "step": 38560 }, { "epoch": 0.55, "grad_norm": 0.640625, "learning_rate": 9.888591207513556e-05, "loss": 0.9943, "step": 38565 }, { "epoch": 0.55, "grad_norm": 0.515625, "learning_rate": 9.88608774624628e-05, "loss": 0.8789, "step": 38570 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 9.883584292119153e-05, "loss": 1.0116, "step": 38575 }, { "epoch": 0.55, "grad_norm": 0.60546875, "learning_rate": 9.881080845289097e-05, "loss": 0.8831, "step": 38580 }, { "epoch": 0.55, "grad_norm": 0.6171875, "learning_rate": 9.878577405913027e-05, "loss": 1.0177, "step": 38585 }, { "epoch": 0.55, "grad_norm": 0.5546875, "learning_rate": 9.87607397414786e-05, "loss": 1.1244, "step": 38590 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 9.873570550150522e-05, "loss": 0.9846, "step": 38595 }, { "epoch": 0.55, "grad_norm": 0.59375, "learning_rate": 9.871067134077924e-05, "loss": 0.9154, "step": 38600 }, { "epoch": 0.55, "grad_norm": 0.6484375, "learning_rate": 9.868563726086983e-05, "loss": 1.0878, "step": 38605 }, { "epoch": 0.55, "grad_norm": 0.55078125, "learning_rate": 9.866060326334621e-05, "loss": 0.8965, "step": 38610 }, { "epoch": 0.55, "grad_norm": 0.53515625, "learning_rate": 9.86355693497775e-05, "loss": 0.8671, "step": 38615 }, { "epoch": 0.55, "grad_norm": 0.55078125, "learning_rate": 9.861053552173281e-05, "loss": 0.8291, "step": 38620 }, { "epoch": 0.55, "grad_norm": 0.65625, "learning_rate": 9.858550178078137e-05, "loss": 0.9788, "step": 38625 }, { "epoch": 0.55, "grad_norm": 0.57421875, "learning_rate": 9.85604681284923e-05, "loss": 1.0503, "step": 38630 }, { "epoch": 0.55, "grad_norm": 0.58984375, "learning_rate": 9.853543456643475e-05, "loss": 1.1138, "step": 38635 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 9.851040109617777e-05, "loss": 1.1198, "step": 38640 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 9.84853677192906e-05, "loss": 0.9442, "step": 38645 }, { "epoch": 0.55, "grad_norm": 0.578125, "learning_rate": 9.846033443734227e-05, "loss": 0.863, "step": 38650 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 9.843530125190194e-05, "loss": 1.0133, "step": 38655 }, { "epoch": 0.55, "grad_norm": 0.6015625, "learning_rate": 9.841026816453869e-05, "loss": 1.0073, "step": 38660 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 9.838523517682166e-05, "loss": 1.006, "step": 38665 }, { "epoch": 0.55, "grad_norm": 0.5703125, "learning_rate": 9.836020229031995e-05, "loss": 0.9199, "step": 38670 }, { "epoch": 0.55, "grad_norm": 0.5859375, "learning_rate": 9.833516950660259e-05, "loss": 1.0166, "step": 38675 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 9.831013682723872e-05, "loss": 0.9378, "step": 38680 }, { "epoch": 0.55, "grad_norm": 0.60546875, "learning_rate": 9.828510425379742e-05, "loss": 1.1848, "step": 38685 }, { "epoch": 0.55, "grad_norm": 0.58203125, "learning_rate": 9.826007178784772e-05, "loss": 0.9421, "step": 38690 }, { "epoch": 0.56, "grad_norm": 0.59375, "learning_rate": 9.823503943095869e-05, "loss": 0.9063, "step": 38695 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.82100071846994e-05, "loss": 0.9038, "step": 38700 }, { "epoch": 0.56, "grad_norm": 0.5703125, "learning_rate": 9.818497505063891e-05, "loss": 1.0482, "step": 38705 }, { "epoch": 0.56, "grad_norm": 0.5390625, "learning_rate": 9.815994303034623e-05, "loss": 1.0177, "step": 38710 }, { "epoch": 0.56, "grad_norm": 0.53125, "learning_rate": 9.813491112539043e-05, "loss": 0.9322, "step": 38715 }, { "epoch": 0.56, "grad_norm": 0.546875, "learning_rate": 9.810987933734054e-05, "loss": 0.85, "step": 38720 }, { "epoch": 0.56, "grad_norm": 0.60546875, "learning_rate": 9.808484766776556e-05, "loss": 1.0295, "step": 38725 }, { "epoch": 0.56, "grad_norm": 0.578125, "learning_rate": 9.805981611823448e-05, "loss": 1.066, "step": 38730 }, { "epoch": 0.56, "grad_norm": 0.59765625, "learning_rate": 9.803478469031636e-05, "loss": 1.0679, "step": 38735 }, { "epoch": 0.56, "grad_norm": 0.50390625, "learning_rate": 9.80097533855802e-05, "loss": 0.9112, "step": 38740 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.798472220559493e-05, "loss": 1.1377, "step": 38745 }, { "epoch": 0.56, "grad_norm": 0.52734375, "learning_rate": 9.795969115192957e-05, "loss": 0.955, "step": 38750 }, { "epoch": 0.56, "grad_norm": 0.48828125, "learning_rate": 9.79346602261531e-05, "loss": 0.9294, "step": 38755 }, { "epoch": 0.56, "grad_norm": 0.49609375, "learning_rate": 9.790962942983447e-05, "loss": 0.827, "step": 38760 }, { "epoch": 0.56, "grad_norm": 0.61328125, "learning_rate": 9.788459876454264e-05, "loss": 0.9328, "step": 38765 }, { "epoch": 0.56, "grad_norm": 0.58203125, "learning_rate": 9.785956823184659e-05, "loss": 0.9699, "step": 38770 }, { "epoch": 0.56, "grad_norm": 0.498046875, "learning_rate": 9.783453783331524e-05, "loss": 0.8915, "step": 38775 }, { "epoch": 0.56, "grad_norm": 0.66015625, "learning_rate": 9.780950757051749e-05, "loss": 0.9012, "step": 38780 }, { "epoch": 0.56, "grad_norm": 0.5390625, "learning_rate": 9.778447744502234e-05, "loss": 0.8871, "step": 38785 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.775944745839867e-05, "loss": 1.0527, "step": 38790 }, { "epoch": 0.56, "grad_norm": 0.50390625, "learning_rate": 9.773441761221538e-05, "loss": 0.9303, "step": 38795 }, { "epoch": 0.56, "grad_norm": 0.51171875, "learning_rate": 9.770938790804138e-05, "loss": 0.8257, "step": 38800 }, { "epoch": 0.56, "grad_norm": 0.52734375, "learning_rate": 9.768435834744552e-05, "loss": 0.9429, "step": 38805 }, { "epoch": 0.56, "grad_norm": 0.53125, "learning_rate": 9.765932893199673e-05, "loss": 0.9351, "step": 38810 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.763429966326387e-05, "loss": 0.9168, "step": 38815 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.760927054281579e-05, "loss": 0.9941, "step": 38820 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.758424157222138e-05, "loss": 0.9234, "step": 38825 }, { "epoch": 0.56, "grad_norm": 0.59765625, "learning_rate": 9.755921275304945e-05, "loss": 0.8665, "step": 38830 }, { "epoch": 0.56, "grad_norm": 0.5859375, "learning_rate": 9.753418408686883e-05, "loss": 1.0219, "step": 38835 }, { "epoch": 0.56, "grad_norm": 0.6171875, "learning_rate": 9.750915557524838e-05, "loss": 0.9246, "step": 38840 }, { "epoch": 0.56, "grad_norm": 0.46484375, "learning_rate": 9.748412721975691e-05, "loss": 1.0578, "step": 38845 }, { "epoch": 0.56, "grad_norm": 0.52734375, "learning_rate": 9.74590990219632e-05, "loss": 0.8982, "step": 38850 }, { "epoch": 0.56, "grad_norm": 0.58984375, "learning_rate": 9.743407098343604e-05, "loss": 0.8119, "step": 38855 }, { "epoch": 0.56, "grad_norm": 0.5703125, "learning_rate": 9.740904310574424e-05, "loss": 1.0587, "step": 38860 }, { "epoch": 0.56, "grad_norm": 0.5859375, "learning_rate": 9.738401539045656e-05, "loss": 0.9207, "step": 38865 }, { "epoch": 0.56, "grad_norm": 0.6171875, "learning_rate": 9.73589878391418e-05, "loss": 0.9301, "step": 38870 }, { "epoch": 0.56, "grad_norm": 0.5078125, "learning_rate": 9.733396045336865e-05, "loss": 0.8845, "step": 38875 }, { "epoch": 0.56, "grad_norm": 0.59765625, "learning_rate": 9.730893323470593e-05, "loss": 1.0785, "step": 38880 }, { "epoch": 0.56, "grad_norm": 0.53515625, "learning_rate": 9.728390618472232e-05, "loss": 0.9223, "step": 38885 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.725887930498657e-05, "loss": 0.9278, "step": 38890 }, { "epoch": 0.56, "grad_norm": 0.56640625, "learning_rate": 9.723385259706743e-05, "loss": 1.0404, "step": 38895 }, { "epoch": 0.56, "grad_norm": 0.65625, "learning_rate": 9.72088260625335e-05, "loss": 1.114, "step": 38900 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.718379970295356e-05, "loss": 0.9147, "step": 38905 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.715877351989625e-05, "loss": 0.9504, "step": 38910 }, { "epoch": 0.56, "grad_norm": 0.6171875, "learning_rate": 9.713374751493024e-05, "loss": 0.9138, "step": 38915 }, { "epoch": 0.56, "grad_norm": 0.58203125, "learning_rate": 9.710872168962422e-05, "loss": 1.0339, "step": 38920 }, { "epoch": 0.56, "grad_norm": 0.57421875, "learning_rate": 9.708369604554681e-05, "loss": 0.977, "step": 38925 }, { "epoch": 0.56, "grad_norm": 0.5625, "learning_rate": 9.705867058426664e-05, "loss": 0.9473, "step": 38930 }, { "epoch": 0.56, "grad_norm": 0.62109375, "learning_rate": 9.703364530735237e-05, "loss": 0.8984, "step": 38935 }, { "epoch": 0.56, "grad_norm": 0.578125, "learning_rate": 9.70086202163726e-05, "loss": 0.961, "step": 38940 }, { "epoch": 0.56, "grad_norm": 0.5625, "learning_rate": 9.698359531289588e-05, "loss": 0.9424, "step": 38945 }, { "epoch": 0.56, "grad_norm": 0.50390625, "learning_rate": 9.695857059849092e-05, "loss": 0.9191, "step": 38950 }, { "epoch": 0.56, "grad_norm": 0.578125, "learning_rate": 9.693354607472613e-05, "loss": 0.8682, "step": 38955 }, { "epoch": 0.56, "grad_norm": 0.671875, "learning_rate": 9.690852174317021e-05, "loss": 1.0405, "step": 38960 }, { "epoch": 0.56, "grad_norm": 0.486328125, "learning_rate": 9.688349760539167e-05, "loss": 1.0359, "step": 38965 }, { "epoch": 0.56, "grad_norm": 0.51171875, "learning_rate": 9.685847366295902e-05, "loss": 0.9881, "step": 38970 }, { "epoch": 0.56, "grad_norm": 0.5, "learning_rate": 9.683344991744083e-05, "loss": 0.9328, "step": 38975 }, { "epoch": 0.56, "grad_norm": 0.6328125, "learning_rate": 9.680842637040561e-05, "loss": 0.8895, "step": 38980 }, { "epoch": 0.56, "grad_norm": 0.56640625, "learning_rate": 9.678340302342184e-05, "loss": 0.9235, "step": 38985 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.675837987805807e-05, "loss": 1.0094, "step": 38990 }, { "epoch": 0.56, "grad_norm": 0.50390625, "learning_rate": 9.673335693588273e-05, "loss": 0.9143, "step": 38995 }, { "epoch": 0.56, "grad_norm": 0.5859375, "learning_rate": 9.670833419846432e-05, "loss": 0.9175, "step": 39000 }, { "epoch": 0.56, "grad_norm": 0.5390625, "learning_rate": 9.668331166737124e-05, "loss": 0.8666, "step": 39005 }, { "epoch": 0.56, "grad_norm": 0.63671875, "learning_rate": 9.665828934417196e-05, "loss": 0.9682, "step": 39010 }, { "epoch": 0.56, "grad_norm": 0.5234375, "learning_rate": 9.66332672304349e-05, "loss": 0.9945, "step": 39015 }, { "epoch": 0.56, "grad_norm": 0.60546875, "learning_rate": 9.660824532772852e-05, "loss": 0.9521, "step": 39020 }, { "epoch": 0.56, "grad_norm": 0.5625, "learning_rate": 9.658322363762115e-05, "loss": 0.969, "step": 39025 }, { "epoch": 0.56, "grad_norm": 0.625, "learning_rate": 9.655820216168123e-05, "loss": 0.9418, "step": 39030 }, { "epoch": 0.56, "grad_norm": 0.60546875, "learning_rate": 9.653318090147711e-05, "loss": 0.9502, "step": 39035 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.650815985857716e-05, "loss": 0.9308, "step": 39040 }, { "epoch": 0.56, "grad_norm": 0.5078125, "learning_rate": 9.648313903454975e-05, "loss": 1.0048, "step": 39045 }, { "epoch": 0.56, "grad_norm": 0.546875, "learning_rate": 9.645811843096316e-05, "loss": 1.0241, "step": 39050 }, { "epoch": 0.56, "grad_norm": 0.5390625, "learning_rate": 9.643309804938578e-05, "loss": 0.8488, "step": 39055 }, { "epoch": 0.56, "grad_norm": 0.55859375, "learning_rate": 9.640807789138586e-05, "loss": 0.9414, "step": 39060 }, { "epoch": 0.56, "grad_norm": 0.50390625, "learning_rate": 9.638305795853168e-05, "loss": 0.888, "step": 39065 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.635803825239158e-05, "loss": 1.0104, "step": 39070 }, { "epoch": 0.56, "grad_norm": 0.60546875, "learning_rate": 9.633301877453378e-05, "loss": 1.0704, "step": 39075 }, { "epoch": 0.56, "grad_norm": 0.57421875, "learning_rate": 9.630799952652651e-05, "loss": 0.8919, "step": 39080 }, { "epoch": 0.56, "grad_norm": 0.58203125, "learning_rate": 9.628298050993806e-05, "loss": 1.0273, "step": 39085 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.625796172633664e-05, "loss": 1.0297, "step": 39090 }, { "epoch": 0.56, "grad_norm": 0.5625, "learning_rate": 9.623294317729042e-05, "loss": 0.7903, "step": 39095 }, { "epoch": 0.56, "grad_norm": 0.58984375, "learning_rate": 9.620792486436762e-05, "loss": 0.9657, "step": 39100 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.618290678913642e-05, "loss": 0.932, "step": 39105 }, { "epoch": 0.56, "grad_norm": 0.5234375, "learning_rate": 9.615788895316498e-05, "loss": 1.0416, "step": 39110 }, { "epoch": 0.56, "grad_norm": 0.53515625, "learning_rate": 9.613287135802142e-05, "loss": 0.8526, "step": 39115 }, { "epoch": 0.56, "grad_norm": 0.69140625, "learning_rate": 9.610785400527385e-05, "loss": 0.9282, "step": 39120 }, { "epoch": 0.56, "grad_norm": 0.50390625, "learning_rate": 9.608283689649047e-05, "loss": 0.9398, "step": 39125 }, { "epoch": 0.56, "grad_norm": 0.5703125, "learning_rate": 9.605782003323932e-05, "loss": 0.8503, "step": 39130 }, { "epoch": 0.56, "grad_norm": 0.51171875, "learning_rate": 9.603280341708848e-05, "loss": 0.8561, "step": 39135 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.600778704960606e-05, "loss": 1.0332, "step": 39140 }, { "epoch": 0.56, "grad_norm": 0.6015625, "learning_rate": 9.59827709323601e-05, "loss": 0.9378, "step": 39145 }, { "epoch": 0.56, "grad_norm": 0.5703125, "learning_rate": 9.59577550669186e-05, "loss": 0.9466, "step": 39150 }, { "epoch": 0.56, "grad_norm": 0.546875, "learning_rate": 9.593273945484966e-05, "loss": 0.9579, "step": 39155 }, { "epoch": 0.56, "grad_norm": 0.57421875, "learning_rate": 9.590772409772125e-05, "loss": 0.9372, "step": 39160 }, { "epoch": 0.56, "grad_norm": 0.6015625, "learning_rate": 9.588270899710133e-05, "loss": 0.8365, "step": 39165 }, { "epoch": 0.56, "grad_norm": 0.494140625, "learning_rate": 9.58576941545579e-05, "loss": 0.8629, "step": 39170 }, { "epoch": 0.56, "grad_norm": 0.5234375, "learning_rate": 9.583267957165891e-05, "loss": 0.9038, "step": 39175 }, { "epoch": 0.56, "grad_norm": 0.58984375, "learning_rate": 9.580766524997232e-05, "loss": 0.9896, "step": 39180 }, { "epoch": 0.56, "grad_norm": 0.6171875, "learning_rate": 9.578265119106605e-05, "loss": 0.9699, "step": 39185 }, { "epoch": 0.56, "grad_norm": 0.56640625, "learning_rate": 9.575763739650798e-05, "loss": 0.9685, "step": 39190 }, { "epoch": 0.56, "grad_norm": 0.6171875, "learning_rate": 9.573262386786607e-05, "loss": 0.9458, "step": 39195 }, { "epoch": 0.56, "grad_norm": 0.5703125, "learning_rate": 9.570761060670814e-05, "loss": 0.9869, "step": 39200 }, { "epoch": 0.56, "grad_norm": 0.53515625, "learning_rate": 9.568259761460205e-05, "loss": 0.9797, "step": 39205 }, { "epoch": 0.56, "grad_norm": 0.6171875, "learning_rate": 9.565758489311572e-05, "loss": 1.0223, "step": 39210 }, { "epoch": 0.56, "grad_norm": 0.66015625, "learning_rate": 9.563257244381683e-05, "loss": 0.9996, "step": 39215 }, { "epoch": 0.56, "grad_norm": 0.5078125, "learning_rate": 9.560756026827333e-05, "loss": 0.9554, "step": 39220 }, { "epoch": 0.56, "grad_norm": 0.6484375, "learning_rate": 9.558254836805293e-05, "loss": 0.8866, "step": 39225 }, { "epoch": 0.56, "grad_norm": 0.5703125, "learning_rate": 9.555753674472342e-05, "loss": 0.8414, "step": 39230 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.553252539985258e-05, "loss": 0.9681, "step": 39235 }, { "epoch": 0.56, "grad_norm": 0.58203125, "learning_rate": 9.550751433500814e-05, "loss": 1.0373, "step": 39240 }, { "epoch": 0.56, "grad_norm": 0.5078125, "learning_rate": 9.54825035517578e-05, "loss": 0.9632, "step": 39245 }, { "epoch": 0.56, "grad_norm": 0.58984375, "learning_rate": 9.54574930516693e-05, "loss": 0.9035, "step": 39250 }, { "epoch": 0.56, "grad_norm": 0.5703125, "learning_rate": 9.543248283631031e-05, "loss": 0.8037, "step": 39255 }, { "epoch": 0.56, "grad_norm": 0.59375, "learning_rate": 9.540747290724848e-05, "loss": 1.0928, "step": 39260 }, { "epoch": 0.56, "grad_norm": 0.63671875, "learning_rate": 9.538246326605154e-05, "loss": 1.0113, "step": 39265 }, { "epoch": 0.56, "grad_norm": 0.609375, "learning_rate": 9.535745391428699e-05, "loss": 0.8858, "step": 39270 }, { "epoch": 0.56, "grad_norm": 0.515625, "learning_rate": 9.533244485352255e-05, "loss": 0.9432, "step": 39275 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.530743608532577e-05, "loss": 0.859, "step": 39280 }, { "epoch": 0.56, "grad_norm": 0.5625, "learning_rate": 9.528242761126424e-05, "loss": 0.9839, "step": 39285 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.525741943290552e-05, "loss": 1.0186, "step": 39290 }, { "epoch": 0.56, "grad_norm": 0.61328125, "learning_rate": 9.523241155181716e-05, "loss": 0.8606, "step": 39295 }, { "epoch": 0.56, "grad_norm": 0.50390625, "learning_rate": 9.520740396956665e-05, "loss": 0.9145, "step": 39300 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.518239668772154e-05, "loss": 0.9839, "step": 39305 }, { "epoch": 0.56, "grad_norm": 0.58203125, "learning_rate": 9.515738970784928e-05, "loss": 0.8016, "step": 39310 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.513238303151739e-05, "loss": 0.9934, "step": 39315 }, { "epoch": 0.56, "grad_norm": 0.56640625, "learning_rate": 9.510737666029323e-05, "loss": 0.8347, "step": 39320 }, { "epoch": 0.56, "grad_norm": 0.59375, "learning_rate": 9.508237059574429e-05, "loss": 0.9688, "step": 39325 }, { "epoch": 0.56, "grad_norm": 0.498046875, "learning_rate": 9.505736483943795e-05, "loss": 1.0757, "step": 39330 }, { "epoch": 0.56, "grad_norm": 0.60546875, "learning_rate": 9.503235939294163e-05, "loss": 0.9708, "step": 39335 }, { "epoch": 0.56, "grad_norm": 0.5234375, "learning_rate": 9.500735425782266e-05, "loss": 0.8411, "step": 39340 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.49823494356484e-05, "loss": 1.0257, "step": 39345 }, { "epoch": 0.56, "grad_norm": 0.474609375, "learning_rate": 9.495734492798623e-05, "loss": 0.885, "step": 39350 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.493234073640339e-05, "loss": 1.0719, "step": 39355 }, { "epoch": 0.56, "grad_norm": 0.52734375, "learning_rate": 9.490733686246722e-05, "loss": 0.9743, "step": 39360 }, { "epoch": 0.56, "grad_norm": 0.6484375, "learning_rate": 9.488233330774497e-05, "loss": 0.8661, "step": 39365 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.485733007380395e-05, "loss": 0.955, "step": 39370 }, { "epoch": 0.56, "grad_norm": 0.546875, "learning_rate": 9.483232716221127e-05, "loss": 0.8181, "step": 39375 }, { "epoch": 0.56, "grad_norm": 0.6796875, "learning_rate": 9.480732457453422e-05, "loss": 0.9923, "step": 39380 }, { "epoch": 0.56, "grad_norm": 0.57421875, "learning_rate": 9.478232231234e-05, "loss": 1.033, "step": 39385 }, { "epoch": 0.57, "grad_norm": 0.515625, "learning_rate": 9.475732037719572e-05, "loss": 0.8905, "step": 39390 }, { "epoch": 0.57, "grad_norm": 0.625, "learning_rate": 9.473231877066861e-05, "loss": 0.9827, "step": 39395 }, { "epoch": 0.57, "grad_norm": 0.5234375, "learning_rate": 9.470731749432574e-05, "loss": 0.9783, "step": 39400 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.468231654973425e-05, "loss": 0.9423, "step": 39405 }, { "epoch": 0.57, "grad_norm": 0.515625, "learning_rate": 9.46573159384612e-05, "loss": 0.9276, "step": 39410 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.463231566207368e-05, "loss": 0.992, "step": 39415 }, { "epoch": 0.57, "grad_norm": 0.58203125, "learning_rate": 9.460731572213875e-05, "loss": 0.9216, "step": 39420 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.45823161202234e-05, "loss": 0.95, "step": 39425 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.455731685789461e-05, "loss": 0.9028, "step": 39430 }, { "epoch": 0.57, "grad_norm": 0.57421875, "learning_rate": 9.453231793671944e-05, "loss": 0.9488, "step": 39435 }, { "epoch": 0.57, "grad_norm": 0.71484375, "learning_rate": 9.450731935826479e-05, "loss": 1.054, "step": 39440 }, { "epoch": 0.57, "grad_norm": 0.494140625, "learning_rate": 9.44823211240976e-05, "loss": 0.9122, "step": 39445 }, { "epoch": 0.57, "grad_norm": 0.546875, "learning_rate": 9.445732323578484e-05, "loss": 0.877, "step": 39450 }, { "epoch": 0.57, "grad_norm": 0.61328125, "learning_rate": 9.443232569489337e-05, "loss": 0.9831, "step": 39455 }, { "epoch": 0.57, "grad_norm": 0.59765625, "learning_rate": 9.440732850299003e-05, "loss": 1.0377, "step": 39460 }, { "epoch": 0.57, "grad_norm": 0.439453125, "learning_rate": 9.438233166164175e-05, "loss": 1.0571, "step": 39465 }, { "epoch": 0.57, "grad_norm": 0.57421875, "learning_rate": 9.43573351724153e-05, "loss": 1.04, "step": 39470 }, { "epoch": 0.57, "grad_norm": 0.5078125, "learning_rate": 9.433233903687754e-05, "loss": 1.0514, "step": 39475 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.43073432565952e-05, "loss": 0.8964, "step": 39480 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.428234783313505e-05, "loss": 1.0734, "step": 39485 }, { "epoch": 0.57, "grad_norm": 0.515625, "learning_rate": 9.425735276806387e-05, "loss": 0.8907, "step": 39490 }, { "epoch": 0.57, "grad_norm": 0.50390625, "learning_rate": 9.423235806294835e-05, "loss": 0.9568, "step": 39495 }, { "epoch": 0.57, "grad_norm": 0.55859375, "learning_rate": 9.420736371935519e-05, "loss": 0.9042, "step": 39500 }, { "epoch": 0.57, "grad_norm": 0.609375, "learning_rate": 9.418236973885107e-05, "loss": 0.8978, "step": 39505 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.415737612300266e-05, "loss": 0.9227, "step": 39510 }, { "epoch": 0.57, "grad_norm": 0.5703125, "learning_rate": 9.413238287337653e-05, "loss": 1.0013, "step": 39515 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.410738999153936e-05, "loss": 0.8893, "step": 39520 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.408239747905771e-05, "loss": 0.8983, "step": 39525 }, { "epoch": 0.57, "grad_norm": 0.498046875, "learning_rate": 9.405740533749811e-05, "loss": 0.9171, "step": 39530 }, { "epoch": 0.57, "grad_norm": 0.5625, "learning_rate": 9.403241356842711e-05, "loss": 0.9745, "step": 39535 }, { "epoch": 0.57, "grad_norm": 0.58203125, "learning_rate": 9.40074221734112e-05, "loss": 1.0574, "step": 39540 }, { "epoch": 0.57, "grad_norm": 0.578125, "learning_rate": 9.398243115401693e-05, "loss": 1.023, "step": 39545 }, { "epoch": 0.57, "grad_norm": 0.57421875, "learning_rate": 9.39574405118107e-05, "loss": 0.9277, "step": 39550 }, { "epoch": 0.57, "grad_norm": 0.70703125, "learning_rate": 9.393245024835898e-05, "loss": 1.0421, "step": 39555 }, { "epoch": 0.57, "grad_norm": 0.56640625, "learning_rate": 9.39074603652282e-05, "loss": 0.939, "step": 39560 }, { "epoch": 0.57, "grad_norm": 0.56640625, "learning_rate": 9.388247086398475e-05, "loss": 0.826, "step": 39565 }, { "epoch": 0.57, "grad_norm": 0.57421875, "learning_rate": 9.385748174619497e-05, "loss": 0.9839, "step": 39570 }, { "epoch": 0.57, "grad_norm": 0.46484375, "learning_rate": 9.383249301342524e-05, "loss": 0.9497, "step": 39575 }, { "epoch": 0.57, "grad_norm": 0.5625, "learning_rate": 9.380750466724192e-05, "loss": 0.9422, "step": 39580 }, { "epoch": 0.57, "grad_norm": 0.546875, "learning_rate": 9.378251670921122e-05, "loss": 0.7498, "step": 39585 }, { "epoch": 0.57, "grad_norm": 0.73828125, "learning_rate": 9.375752914089946e-05, "loss": 1.0139, "step": 39590 }, { "epoch": 0.57, "grad_norm": 0.490234375, "learning_rate": 9.373254196387286e-05, "loss": 0.8377, "step": 39595 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.370755517969768e-05, "loss": 0.8605, "step": 39600 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.368256878994012e-05, "loss": 0.8669, "step": 39605 }, { "epoch": 0.57, "grad_norm": 0.515625, "learning_rate": 9.365758279616631e-05, "loss": 0.9177, "step": 39610 }, { "epoch": 0.57, "grad_norm": 0.59765625, "learning_rate": 9.363259719994247e-05, "loss": 0.9787, "step": 39615 }, { "epoch": 0.57, "grad_norm": 0.671875, "learning_rate": 9.360761200283468e-05, "loss": 0.948, "step": 39620 }, { "epoch": 0.57, "grad_norm": 0.58203125, "learning_rate": 9.358262720640903e-05, "loss": 1.0305, "step": 39625 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.355764281223168e-05, "loss": 0.9977, "step": 39630 }, { "epoch": 0.57, "grad_norm": 0.58984375, "learning_rate": 9.353265882186855e-05, "loss": 0.7942, "step": 39635 }, { "epoch": 0.57, "grad_norm": 0.625, "learning_rate": 9.350767523688574e-05, "loss": 0.902, "step": 39640 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.348269205884926e-05, "loss": 0.8388, "step": 39645 }, { "epoch": 0.57, "grad_norm": 0.5703125, "learning_rate": 9.345770928932505e-05, "loss": 1.0024, "step": 39650 }, { "epoch": 0.57, "grad_norm": 0.61328125, "learning_rate": 9.343272692987908e-05, "loss": 0.8947, "step": 39655 }, { "epoch": 0.57, "grad_norm": 0.458984375, "learning_rate": 9.340774498207726e-05, "loss": 1.0304, "step": 39660 }, { "epoch": 0.57, "grad_norm": 0.81640625, "learning_rate": 9.338276344748548e-05, "loss": 0.9795, "step": 39665 }, { "epoch": 0.57, "grad_norm": 0.6015625, "learning_rate": 9.335778232766964e-05, "loss": 1.0649, "step": 39670 }, { "epoch": 0.57, "grad_norm": 0.50390625, "learning_rate": 9.333280162419558e-05, "loss": 0.9476, "step": 39675 }, { "epoch": 0.57, "grad_norm": 0.609375, "learning_rate": 9.330782133862907e-05, "loss": 1.0906, "step": 39680 }, { "epoch": 0.57, "grad_norm": 0.65234375, "learning_rate": 9.328284147253601e-05, "loss": 0.9704, "step": 39685 }, { "epoch": 0.57, "grad_norm": 0.56640625, "learning_rate": 9.325786202748203e-05, "loss": 0.9293, "step": 39690 }, { "epoch": 0.57, "grad_norm": 0.64453125, "learning_rate": 9.323288300503296e-05, "loss": 1.004, "step": 39695 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.32079044067545e-05, "loss": 0.9423, "step": 39700 }, { "epoch": 0.57, "grad_norm": 0.6171875, "learning_rate": 9.31829262342123e-05, "loss": 0.9661, "step": 39705 }, { "epoch": 0.57, "grad_norm": 0.5859375, "learning_rate": 9.315794848897207e-05, "loss": 0.8995, "step": 39710 }, { "epoch": 0.57, "grad_norm": 0.609375, "learning_rate": 9.313297117259941e-05, "loss": 0.9261, "step": 39715 }, { "epoch": 0.57, "grad_norm": 0.5859375, "learning_rate": 9.310799428665992e-05, "loss": 0.9131, "step": 39720 }, { "epoch": 0.57, "grad_norm": 0.66796875, "learning_rate": 9.308301783271923e-05, "loss": 1.0272, "step": 39725 }, { "epoch": 0.57, "grad_norm": 0.57421875, "learning_rate": 9.305804181234286e-05, "loss": 0.9359, "step": 39730 }, { "epoch": 0.57, "grad_norm": 0.484375, "learning_rate": 9.303306622709636e-05, "loss": 1.0216, "step": 39735 }, { "epoch": 0.57, "grad_norm": 0.8828125, "learning_rate": 9.300809107854517e-05, "loss": 0.9969, "step": 39740 }, { "epoch": 0.57, "grad_norm": 0.546875, "learning_rate": 9.298311636825477e-05, "loss": 1.057, "step": 39745 }, { "epoch": 0.57, "grad_norm": 0.4921875, "learning_rate": 9.295814209779066e-05, "loss": 0.9669, "step": 39750 }, { "epoch": 0.57, "grad_norm": 0.51953125, "learning_rate": 9.293316826871821e-05, "loss": 0.7833, "step": 39755 }, { "epoch": 0.57, "grad_norm": 0.5078125, "learning_rate": 9.290819488260281e-05, "loss": 0.8582, "step": 39760 }, { "epoch": 0.57, "grad_norm": 0.57421875, "learning_rate": 9.288322194100985e-05, "loss": 0.994, "step": 39765 }, { "epoch": 0.57, "grad_norm": 0.578125, "learning_rate": 9.285824944550465e-05, "loss": 1.081, "step": 39770 }, { "epoch": 0.57, "grad_norm": 0.58203125, "learning_rate": 9.283327739765248e-05, "loss": 0.9136, "step": 39775 }, { "epoch": 0.57, "grad_norm": 0.5859375, "learning_rate": 9.280830579901867e-05, "loss": 1.1627, "step": 39780 }, { "epoch": 0.57, "grad_norm": 0.62890625, "learning_rate": 9.278333465116844e-05, "loss": 1.0138, "step": 39785 }, { "epoch": 0.57, "grad_norm": 0.55859375, "learning_rate": 9.275836395566703e-05, "loss": 0.9436, "step": 39790 }, { "epoch": 0.57, "grad_norm": 0.62109375, "learning_rate": 9.27333937140796e-05, "loss": 0.8221, "step": 39795 }, { "epoch": 0.57, "grad_norm": 0.5234375, "learning_rate": 9.270842392797131e-05, "loss": 0.9202, "step": 39800 }, { "epoch": 0.57, "grad_norm": 0.609375, "learning_rate": 9.268345459890734e-05, "loss": 0.9205, "step": 39805 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.265848572845275e-05, "loss": 0.8917, "step": 39810 }, { "epoch": 0.57, "grad_norm": 0.57421875, "learning_rate": 9.263351731817263e-05, "loss": 0.9382, "step": 39815 }, { "epoch": 0.57, "grad_norm": 0.828125, "learning_rate": 9.260854936963205e-05, "loss": 0.9602, "step": 39820 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.2583581884396e-05, "loss": 0.9374, "step": 39825 }, { "epoch": 0.57, "grad_norm": 0.56640625, "learning_rate": 9.255861486402949e-05, "loss": 1.0302, "step": 39830 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.253364831009748e-05, "loss": 0.7649, "step": 39835 }, { "epoch": 0.57, "grad_norm": 0.58984375, "learning_rate": 9.250868222416493e-05, "loss": 0.8398, "step": 39840 }, { "epoch": 0.57, "grad_norm": 0.58203125, "learning_rate": 9.248371660779666e-05, "loss": 0.9781, "step": 39845 }, { "epoch": 0.57, "grad_norm": 0.5703125, "learning_rate": 9.245875146255763e-05, "loss": 1.0139, "step": 39850 }, { "epoch": 0.57, "grad_norm": 0.578125, "learning_rate": 9.24337867900126e-05, "loss": 0.8359, "step": 39855 }, { "epoch": 0.57, "grad_norm": 0.625, "learning_rate": 9.240882259172647e-05, "loss": 1.1231, "step": 39860 }, { "epoch": 0.57, "grad_norm": 0.51171875, "learning_rate": 9.238385886926397e-05, "loss": 0.8595, "step": 39865 }, { "epoch": 0.57, "grad_norm": 0.5703125, "learning_rate": 9.235889562418986e-05, "loss": 1.133, "step": 39870 }, { "epoch": 0.57, "grad_norm": 0.48828125, "learning_rate": 9.233393285806888e-05, "loss": 0.9109, "step": 39875 }, { "epoch": 0.57, "grad_norm": 0.5078125, "learning_rate": 9.230897057246574e-05, "loss": 0.924, "step": 39880 }, { "epoch": 0.57, "grad_norm": 0.48828125, "learning_rate": 9.228400876894506e-05, "loss": 0.8275, "step": 39885 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.22590474490715e-05, "loss": 0.8016, "step": 39890 }, { "epoch": 0.57, "grad_norm": 0.56640625, "learning_rate": 9.22340866144097e-05, "loss": 0.9906, "step": 39895 }, { "epoch": 0.57, "grad_norm": 0.5625, "learning_rate": 9.220912626652417e-05, "loss": 0.9459, "step": 39900 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.218416640697947e-05, "loss": 0.92, "step": 39905 }, { "epoch": 0.57, "grad_norm": 0.61328125, "learning_rate": 9.215920703734012e-05, "loss": 1.059, "step": 39910 }, { "epoch": 0.57, "grad_norm": 0.578125, "learning_rate": 9.213424815917062e-05, "loss": 1.1374, "step": 39915 }, { "epoch": 0.57, "grad_norm": 0.546875, "learning_rate": 9.21092897740354e-05, "loss": 1.0304, "step": 39920 }, { "epoch": 0.57, "grad_norm": 0.53515625, "learning_rate": 9.208433188349885e-05, "loss": 0.8611, "step": 39925 }, { "epoch": 0.57, "grad_norm": 0.6015625, "learning_rate": 9.205937448912543e-05, "loss": 0.9039, "step": 39930 }, { "epoch": 0.57, "grad_norm": 0.50390625, "learning_rate": 9.203441759247946e-05, "loss": 0.8528, "step": 39935 }, { "epoch": 0.57, "grad_norm": 0.5078125, "learning_rate": 9.200946119512523e-05, "loss": 0.764, "step": 39940 }, { "epoch": 0.57, "grad_norm": 0.59375, "learning_rate": 9.198450529862714e-05, "loss": 0.9309, "step": 39945 }, { "epoch": 0.57, "grad_norm": 0.59765625, "learning_rate": 9.195954990454934e-05, "loss": 1.0455, "step": 39950 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.193459501445611e-05, "loss": 1.173, "step": 39955 }, { "epoch": 0.57, "grad_norm": 0.515625, "learning_rate": 9.190964062991166e-05, "loss": 0.7986, "step": 39960 }, { "epoch": 0.57, "grad_norm": 0.53515625, "learning_rate": 9.188468675248014e-05, "loss": 0.8808, "step": 39965 }, { "epoch": 0.57, "grad_norm": 0.53515625, "learning_rate": 9.18597333837257e-05, "loss": 0.862, "step": 39970 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.183478052521244e-05, "loss": 0.893, "step": 39975 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.180982817850443e-05, "loss": 1.0495, "step": 39980 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.178487634516573e-05, "loss": 0.9471, "step": 39985 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.175992502676035e-05, "loss": 0.9318, "step": 39990 }, { "epoch": 0.57, "grad_norm": 0.6015625, "learning_rate": 9.173497422485222e-05, "loss": 0.956, "step": 39995 }, { "epoch": 0.57, "grad_norm": 0.5859375, "learning_rate": 9.171002394100539e-05, "loss": 0.9119, "step": 40000 }, { "epoch": 0.57, "grad_norm": 0.51171875, "learning_rate": 9.168507417678364e-05, "loss": 0.9412, "step": 40005 }, { "epoch": 0.57, "grad_norm": 0.75390625, "learning_rate": 9.166012493375091e-05, "loss": 1.026, "step": 40010 }, { "epoch": 0.57, "grad_norm": 0.5234375, "learning_rate": 9.163517621347106e-05, "loss": 1.0485, "step": 40015 }, { "epoch": 0.57, "grad_norm": 0.56640625, "learning_rate": 9.161022801750788e-05, "loss": 0.9197, "step": 40020 }, { "epoch": 0.57, "grad_norm": 0.703125, "learning_rate": 9.158528034742518e-05, "loss": 0.9262, "step": 40025 }, { "epoch": 0.57, "grad_norm": 0.53125, "learning_rate": 9.156033320478669e-05, "loss": 0.9726, "step": 40030 }, { "epoch": 0.57, "grad_norm": 0.5, "learning_rate": 9.15353865911561e-05, "loss": 0.8281, "step": 40035 }, { "epoch": 0.57, "grad_norm": 0.65625, "learning_rate": 9.151044050809715e-05, "loss": 0.9731, "step": 40040 }, { "epoch": 0.57, "grad_norm": 0.5, "learning_rate": 9.148549495717344e-05, "loss": 1.0655, "step": 40045 }, { "epoch": 0.57, "grad_norm": 0.5546875, "learning_rate": 9.146054993994864e-05, "loss": 1.0064, "step": 40050 }, { "epoch": 0.57, "grad_norm": 0.56640625, "learning_rate": 9.143560545798625e-05, "loss": 0.9116, "step": 40055 }, { "epoch": 0.57, "grad_norm": 0.62890625, "learning_rate": 9.141066151284988e-05, "loss": 0.9974, "step": 40060 }, { "epoch": 0.57, "grad_norm": 0.57421875, "learning_rate": 9.138571810610303e-05, "loss": 0.9558, "step": 40065 }, { "epoch": 0.57, "grad_norm": 0.55078125, "learning_rate": 9.136077523930918e-05, "loss": 0.9115, "step": 40070 }, { "epoch": 0.57, "grad_norm": 0.58984375, "learning_rate": 9.133583291403176e-05, "loss": 1.0983, "step": 40075 }, { "epoch": 0.57, "grad_norm": 0.5234375, "learning_rate": 9.131089113183422e-05, "loss": 1.028, "step": 40080 }, { "epoch": 0.58, "grad_norm": 0.55859375, "learning_rate": 9.128594989427992e-05, "loss": 0.9602, "step": 40085 }, { "epoch": 0.58, "grad_norm": 0.57421875, "learning_rate": 9.126100920293219e-05, "loss": 0.9662, "step": 40090 }, { "epoch": 0.58, "grad_norm": 0.64453125, "learning_rate": 9.123606905935436e-05, "loss": 0.944, "step": 40095 }, { "epoch": 0.58, "grad_norm": 0.57421875, "learning_rate": 9.121112946510973e-05, "loss": 0.9506, "step": 40100 }, { "epoch": 0.58, "grad_norm": 0.484375, "learning_rate": 9.11861904217615e-05, "loss": 0.9151, "step": 40105 }, { "epoch": 0.58, "grad_norm": 0.73828125, "learning_rate": 9.116125193087288e-05, "loss": 0.8144, "step": 40110 }, { "epoch": 0.58, "grad_norm": 0.5859375, "learning_rate": 9.113631399400707e-05, "loss": 0.9939, "step": 40115 }, { "epoch": 0.58, "grad_norm": 0.65625, "learning_rate": 9.11113766127272e-05, "loss": 0.9773, "step": 40120 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 9.108643978859632e-05, "loss": 0.9812, "step": 40125 }, { "epoch": 0.58, "grad_norm": 0.62109375, "learning_rate": 9.10615035231776e-05, "loss": 1.0516, "step": 40130 }, { "epoch": 0.58, "grad_norm": 0.54296875, "learning_rate": 9.103656781803399e-05, "loss": 1.0237, "step": 40135 }, { "epoch": 0.58, "grad_norm": 0.5625, "learning_rate": 9.101163267472852e-05, "loss": 0.969, "step": 40140 }, { "epoch": 0.58, "grad_norm": 0.515625, "learning_rate": 9.098669809482415e-05, "loss": 1.0175, "step": 40145 }, { "epoch": 0.58, "grad_norm": 0.58984375, "learning_rate": 9.096176407988382e-05, "loss": 0.9175, "step": 40150 }, { "epoch": 0.58, "grad_norm": 0.625, "learning_rate": 9.093683063147044e-05, "loss": 1.1157, "step": 40155 }, { "epoch": 0.58, "grad_norm": 0.5390625, "learning_rate": 9.09118977511468e-05, "loss": 1.0574, "step": 40160 }, { "epoch": 0.58, "grad_norm": 0.54296875, "learning_rate": 9.088696544047574e-05, "loss": 0.8854, "step": 40165 }, { "epoch": 0.58, "grad_norm": 0.52734375, "learning_rate": 9.08620337010201e-05, "loss": 0.8979, "step": 40170 }, { "epoch": 0.58, "grad_norm": 0.58984375, "learning_rate": 9.083710253434259e-05, "loss": 1.1431, "step": 40175 }, { "epoch": 0.58, "grad_norm": 0.55078125, "learning_rate": 9.08121719420059e-05, "loss": 0.9464, "step": 40180 }, { "epoch": 0.58, "grad_norm": 0.51171875, "learning_rate": 9.078724192557278e-05, "loss": 0.9012, "step": 40185 }, { "epoch": 0.58, "grad_norm": 0.58203125, "learning_rate": 9.07623124866058e-05, "loss": 0.9206, "step": 40190 }, { "epoch": 0.58, "grad_norm": 0.60546875, "learning_rate": 9.073738362666759e-05, "loss": 0.9284, "step": 40195 }, { "epoch": 0.58, "grad_norm": 0.5625, "learning_rate": 9.071245534732073e-05, "loss": 0.864, "step": 40200 }, { "epoch": 0.58, "grad_norm": 0.53515625, "learning_rate": 9.068752765012777e-05, "loss": 0.9126, "step": 40205 }, { "epoch": 0.58, "grad_norm": 0.5, "learning_rate": 9.066260053665119e-05, "loss": 0.9227, "step": 40210 }, { "epoch": 0.58, "grad_norm": 0.61328125, "learning_rate": 9.063767400845343e-05, "loss": 1.0837, "step": 40215 }, { "epoch": 0.58, "grad_norm": 0.5078125, "learning_rate": 9.061274806709688e-05, "loss": 1.0174, "step": 40220 }, { "epoch": 0.58, "grad_norm": 0.51171875, "learning_rate": 9.058782271414402e-05, "loss": 0.8127, "step": 40225 }, { "epoch": 0.58, "grad_norm": 0.58203125, "learning_rate": 9.056289795115715e-05, "loss": 0.84, "step": 40230 }, { "epoch": 0.58, "grad_norm": 0.546875, "learning_rate": 9.053797377969855e-05, "loss": 1.0014, "step": 40235 }, { "epoch": 0.58, "grad_norm": 0.625, "learning_rate": 9.051305020133055e-05, "loss": 1.0487, "step": 40240 }, { "epoch": 0.58, "grad_norm": 0.56640625, "learning_rate": 9.048812721761538e-05, "loss": 0.9631, "step": 40245 }, { "epoch": 0.58, "grad_norm": 0.5546875, "learning_rate": 9.04632048301152e-05, "loss": 0.9664, "step": 40250 }, { "epoch": 0.58, "grad_norm": 0.546875, "learning_rate": 9.043828304039222e-05, "loss": 0.8728, "step": 40255 }, { "epoch": 0.58, "grad_norm": 0.49609375, "learning_rate": 9.041336185000858e-05, "loss": 0.9733, "step": 40260 }, { "epoch": 0.58, "grad_norm": 0.62890625, "learning_rate": 9.038844126052633e-05, "loss": 1.0681, "step": 40265 }, { "epoch": 0.58, "grad_norm": 0.5078125, "learning_rate": 9.03635212735075e-05, "loss": 0.8276, "step": 40270 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 9.033860189051412e-05, "loss": 0.979, "step": 40275 }, { "epoch": 0.58, "grad_norm": 0.5625, "learning_rate": 9.031368311310821e-05, "loss": 1.0012, "step": 40280 }, { "epoch": 0.58, "grad_norm": 0.54296875, "learning_rate": 9.028876494285166e-05, "loss": 0.8916, "step": 40285 }, { "epoch": 0.58, "grad_norm": 0.55078125, "learning_rate": 9.026384738130637e-05, "loss": 1.0582, "step": 40290 }, { "epoch": 0.58, "grad_norm": 0.54296875, "learning_rate": 9.023893043003423e-05, "loss": 0.9211, "step": 40295 }, { "epoch": 0.58, "grad_norm": 0.59375, "learning_rate": 9.021401409059704e-05, "loss": 0.902, "step": 40300 }, { "epoch": 0.58, "grad_norm": 0.55078125, "learning_rate": 9.018909836455659e-05, "loss": 0.9903, "step": 40305 }, { "epoch": 0.58, "grad_norm": 0.54296875, "learning_rate": 9.016418325347464e-05, "loss": 0.8672, "step": 40310 }, { "epoch": 0.58, "grad_norm": 0.4921875, "learning_rate": 9.013926875891291e-05, "loss": 0.9729, "step": 40315 }, { "epoch": 0.58, "grad_norm": 0.49609375, "learning_rate": 9.011435488243302e-05, "loss": 0.8721, "step": 40320 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 9.008944162559663e-05, "loss": 0.9681, "step": 40325 }, { "epoch": 0.58, "grad_norm": 0.494140625, "learning_rate": 9.006452898996529e-05, "loss": 0.9349, "step": 40330 }, { "epoch": 0.58, "grad_norm": 0.5390625, "learning_rate": 9.003961697710062e-05, "loss": 1.0404, "step": 40335 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 9.001470558856411e-05, "loss": 0.9361, "step": 40340 }, { "epoch": 0.58, "grad_norm": 0.482421875, "learning_rate": 8.998979482591718e-05, "loss": 0.8448, "step": 40345 }, { "epoch": 0.58, "grad_norm": 0.64453125, "learning_rate": 8.996488469072136e-05, "loss": 1.0374, "step": 40350 }, { "epoch": 0.58, "grad_norm": 0.5625, "learning_rate": 8.993997518453799e-05, "loss": 0.797, "step": 40355 }, { "epoch": 0.58, "grad_norm": 0.5546875, "learning_rate": 8.99150663089284e-05, "loss": 0.7895, "step": 40360 }, { "epoch": 0.58, "grad_norm": 0.6328125, "learning_rate": 8.989015806545402e-05, "loss": 0.9464, "step": 40365 }, { "epoch": 0.58, "grad_norm": 0.55859375, "learning_rate": 8.986525045567597e-05, "loss": 0.9364, "step": 40370 }, { "epoch": 0.58, "grad_norm": 0.55859375, "learning_rate": 8.984034348115558e-05, "loss": 0.9559, "step": 40375 }, { "epoch": 0.58, "grad_norm": 0.66015625, "learning_rate": 8.981543714345404e-05, "loss": 1.109, "step": 40380 }, { "epoch": 0.58, "grad_norm": 0.69921875, "learning_rate": 8.979053144413248e-05, "loss": 1.0065, "step": 40385 }, { "epoch": 0.58, "grad_norm": 0.55859375, "learning_rate": 8.976562638475206e-05, "loss": 1.0162, "step": 40390 }, { "epoch": 0.58, "grad_norm": 0.6328125, "learning_rate": 8.974072196687384e-05, "loss": 1.0205, "step": 40395 }, { "epoch": 0.58, "grad_norm": 0.58203125, "learning_rate": 8.971581819205881e-05, "loss": 1.1037, "step": 40400 }, { "epoch": 0.58, "grad_norm": 0.578125, "learning_rate": 8.969091506186804e-05, "loss": 0.8065, "step": 40405 }, { "epoch": 0.58, "grad_norm": 0.5625, "learning_rate": 8.966601257786246e-05, "loss": 0.9767, "step": 40410 }, { "epoch": 0.58, "grad_norm": 0.71484375, "learning_rate": 8.964111074160296e-05, "loss": 1.0224, "step": 40415 }, { "epoch": 0.58, "grad_norm": 0.56640625, "learning_rate": 8.961620955465049e-05, "loss": 0.9845, "step": 40420 }, { "epoch": 0.58, "grad_norm": 0.7109375, "learning_rate": 8.959130901856576e-05, "loss": 1.135, "step": 40425 }, { "epoch": 0.58, "grad_norm": 0.55859375, "learning_rate": 8.956640913490968e-05, "loss": 1.0147, "step": 40430 }, { "epoch": 0.58, "grad_norm": 0.59765625, "learning_rate": 8.954150990524294e-05, "loss": 1.1181, "step": 40435 }, { "epoch": 0.58, "grad_norm": 0.5390625, "learning_rate": 8.951661133112625e-05, "loss": 0.8177, "step": 40440 }, { "epoch": 0.58, "grad_norm": 0.55078125, "learning_rate": 8.949171341412034e-05, "loss": 0.9945, "step": 40445 }, { "epoch": 0.58, "grad_norm": 0.62109375, "learning_rate": 8.946681615578578e-05, "loss": 1.0107, "step": 40450 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 8.944191955768317e-05, "loss": 0.9096, "step": 40455 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 8.941702362137309e-05, "loss": 1.0481, "step": 40460 }, { "epoch": 0.58, "grad_norm": 0.5859375, "learning_rate": 8.939212834841602e-05, "loss": 0.885, "step": 40465 }, { "epoch": 0.58, "grad_norm": 0.53515625, "learning_rate": 8.936723374037245e-05, "loss": 0.8906, "step": 40470 }, { "epoch": 0.58, "grad_norm": 0.46875, "learning_rate": 8.934233979880276e-05, "loss": 1.0109, "step": 40475 }, { "epoch": 0.58, "grad_norm": 0.609375, "learning_rate": 8.931744652526731e-05, "loss": 1.0306, "step": 40480 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 8.929255392132652e-05, "loss": 0.7733, "step": 40485 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 8.926766198854063e-05, "loss": 0.9697, "step": 40490 }, { "epoch": 0.58, "grad_norm": 0.62890625, "learning_rate": 8.92427707284699e-05, "loss": 0.9151, "step": 40495 }, { "epoch": 0.58, "grad_norm": 0.625, "learning_rate": 8.921788014267456e-05, "loss": 1.1148, "step": 40500 }, { "epoch": 0.58, "grad_norm": 0.59375, "learning_rate": 8.919299023271479e-05, "loss": 0.9216, "step": 40505 }, { "epoch": 0.58, "grad_norm": 0.60546875, "learning_rate": 8.916810100015066e-05, "loss": 0.8619, "step": 40510 }, { "epoch": 0.58, "grad_norm": 0.69921875, "learning_rate": 8.914321244654233e-05, "loss": 1.042, "step": 40515 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 8.91183245734498e-05, "loss": 0.9905, "step": 40520 }, { "epoch": 0.58, "grad_norm": 0.55078125, "learning_rate": 8.909343738243312e-05, "loss": 0.8287, "step": 40525 }, { "epoch": 0.58, "grad_norm": 0.61328125, "learning_rate": 8.906855087505217e-05, "loss": 0.9968, "step": 40530 }, { "epoch": 0.58, "grad_norm": 0.5546875, "learning_rate": 8.904366505286687e-05, "loss": 0.944, "step": 40535 }, { "epoch": 0.58, "grad_norm": 0.515625, "learning_rate": 8.901877991743717e-05, "loss": 0.8778, "step": 40540 }, { "epoch": 0.58, "grad_norm": 0.56640625, "learning_rate": 8.899389547032283e-05, "loss": 0.9997, "step": 40545 }, { "epoch": 0.58, "grad_norm": 0.50390625, "learning_rate": 8.896901171308364e-05, "loss": 1.0573, "step": 40550 }, { "epoch": 0.58, "grad_norm": 0.58984375, "learning_rate": 8.894412864727937e-05, "loss": 0.8705, "step": 40555 }, { "epoch": 0.58, "grad_norm": 0.59765625, "learning_rate": 8.891924627446974e-05, "loss": 0.9807, "step": 40560 }, { "epoch": 0.58, "grad_norm": 0.4609375, "learning_rate": 8.889436459621432e-05, "loss": 1.0162, "step": 40565 }, { "epoch": 0.58, "grad_norm": 0.59765625, "learning_rate": 8.886948361407281e-05, "loss": 0.9592, "step": 40570 }, { "epoch": 0.58, "grad_norm": 0.5859375, "learning_rate": 8.884460332960477e-05, "loss": 1.0517, "step": 40575 }, { "epoch": 0.58, "grad_norm": 0.60546875, "learning_rate": 8.881972374436967e-05, "loss": 1.0649, "step": 40580 }, { "epoch": 0.58, "grad_norm": 0.51171875, "learning_rate": 8.879484485992702e-05, "loss": 0.85, "step": 40585 }, { "epoch": 0.58, "grad_norm": 0.59765625, "learning_rate": 8.876996667783624e-05, "loss": 1.0682, "step": 40590 }, { "epoch": 0.58, "grad_norm": 0.59375, "learning_rate": 8.874508919965675e-05, "loss": 1.009, "step": 40595 }, { "epoch": 0.58, "grad_norm": 0.55078125, "learning_rate": 8.872021242694789e-05, "loss": 0.9226, "step": 40600 }, { "epoch": 0.58, "grad_norm": 0.65234375, "learning_rate": 8.869533636126895e-05, "loss": 0.8566, "step": 40605 }, { "epoch": 0.58, "grad_norm": 0.5, "learning_rate": 8.867046100417921e-05, "loss": 0.949, "step": 40610 }, { "epoch": 0.58, "grad_norm": 0.55078125, "learning_rate": 8.864558635723787e-05, "loss": 0.9686, "step": 40615 }, { "epoch": 0.58, "grad_norm": 0.578125, "learning_rate": 8.862071242200411e-05, "loss": 0.9619, "step": 40620 }, { "epoch": 0.58, "grad_norm": 0.53515625, "learning_rate": 8.859583920003707e-05, "loss": 0.9039, "step": 40625 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 8.857096669289583e-05, "loss": 0.9528, "step": 40630 }, { "epoch": 0.58, "grad_norm": 0.60546875, "learning_rate": 8.85460949021394e-05, "loss": 0.912, "step": 40635 }, { "epoch": 0.58, "grad_norm": 0.6171875, "learning_rate": 8.852122382932678e-05, "loss": 1.0178, "step": 40640 }, { "epoch": 0.58, "grad_norm": 0.58984375, "learning_rate": 8.849635347601692e-05, "loss": 0.906, "step": 40645 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 8.847148384376872e-05, "loss": 0.9614, "step": 40650 }, { "epoch": 0.58, "grad_norm": 0.59765625, "learning_rate": 8.844661493414106e-05, "loss": 0.9905, "step": 40655 }, { "epoch": 0.58, "grad_norm": 0.5078125, "learning_rate": 8.84217467486927e-05, "loss": 0.8828, "step": 40660 }, { "epoch": 0.58, "grad_norm": 0.62890625, "learning_rate": 8.839687928898248e-05, "loss": 1.0715, "step": 40665 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 8.837201255656907e-05, "loss": 0.8497, "step": 40670 }, { "epoch": 0.58, "grad_norm": 0.6171875, "learning_rate": 8.834714655301114e-05, "loss": 0.9762, "step": 40675 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 8.832228127986734e-05, "loss": 0.9128, "step": 40680 }, { "epoch": 0.58, "grad_norm": 0.5859375, "learning_rate": 8.82974167386963e-05, "loss": 1.2287, "step": 40685 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 8.827255293105646e-05, "loss": 1.03, "step": 40690 }, { "epoch": 0.58, "grad_norm": 0.56640625, "learning_rate": 8.824768985850638e-05, "loss": 1.0054, "step": 40695 }, { "epoch": 0.58, "grad_norm": 0.4921875, "learning_rate": 8.822282752260445e-05, "loss": 0.8711, "step": 40700 }, { "epoch": 0.58, "grad_norm": 0.482421875, "learning_rate": 8.819796592490912e-05, "loss": 0.969, "step": 40705 }, { "epoch": 0.58, "grad_norm": 0.546875, "learning_rate": 8.817310506697873e-05, "loss": 1.0671, "step": 40710 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 8.814824495037157e-05, "loss": 0.9447, "step": 40715 }, { "epoch": 0.58, "grad_norm": 0.5625, "learning_rate": 8.812338557664594e-05, "loss": 1.0268, "step": 40720 }, { "epoch": 0.58, "grad_norm": 0.55078125, "learning_rate": 8.809852694736001e-05, "loss": 1.0194, "step": 40725 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 8.807366906407196e-05, "loss": 0.9151, "step": 40730 }, { "epoch": 0.58, "grad_norm": 0.9609375, "learning_rate": 8.804881192833996e-05, "loss": 0.7921, "step": 40735 }, { "epoch": 0.58, "grad_norm": 0.7109375, "learning_rate": 8.802395554172201e-05, "loss": 1.011, "step": 40740 }, { "epoch": 0.58, "grad_norm": 0.5625, "learning_rate": 8.799909990577617e-05, "loss": 0.915, "step": 40745 }, { "epoch": 0.58, "grad_norm": 0.578125, "learning_rate": 8.79742450220604e-05, "loss": 1.0229, "step": 40750 }, { "epoch": 0.58, "grad_norm": 0.5546875, "learning_rate": 8.794939089213265e-05, "loss": 0.9477, "step": 40755 }, { "epoch": 0.58, "grad_norm": 0.50390625, "learning_rate": 8.792453751755081e-05, "loss": 0.7819, "step": 40760 }, { "epoch": 0.58, "grad_norm": 0.89453125, "learning_rate": 8.789968489987273e-05, "loss": 0.9665, "step": 40765 }, { "epoch": 0.58, "grad_norm": 0.4921875, "learning_rate": 8.787483304065613e-05, "loss": 0.7705, "step": 40770 }, { "epoch": 0.58, "grad_norm": 0.55078125, "learning_rate": 8.784998194145885e-05, "loss": 0.9689, "step": 40775 }, { "epoch": 0.58, "grad_norm": 0.546875, "learning_rate": 8.782513160383852e-05, "loss": 0.8291, "step": 40780 }, { "epoch": 0.59, "grad_norm": 0.625, "learning_rate": 8.780028202935281e-05, "loss": 0.9454, "step": 40785 }, { "epoch": 0.59, "grad_norm": 0.671875, "learning_rate": 8.777543321955934e-05, "loss": 0.9423, "step": 40790 }, { "epoch": 0.59, "grad_norm": 0.578125, "learning_rate": 8.775058517601563e-05, "loss": 1.0121, "step": 40795 }, { "epoch": 0.59, "grad_norm": 0.54296875, "learning_rate": 8.772573790027919e-05, "loss": 0.9232, "step": 40800 }, { "epoch": 0.59, "grad_norm": 0.56640625, "learning_rate": 8.770089139390745e-05, "loss": 1.001, "step": 40805 }, { "epoch": 0.59, "grad_norm": 0.56640625, "learning_rate": 8.767604565845787e-05, "loss": 0.929, "step": 40810 }, { "epoch": 0.59, "grad_norm": 0.61328125, "learning_rate": 8.76512006954878e-05, "loss": 0.9996, "step": 40815 }, { "epoch": 0.59, "grad_norm": 0.57421875, "learning_rate": 8.76263565065545e-05, "loss": 0.9141, "step": 40820 }, { "epoch": 0.59, "grad_norm": 0.50390625, "learning_rate": 8.760151309321527e-05, "loss": 1.0298, "step": 40825 }, { "epoch": 0.59, "grad_norm": 0.5, "learning_rate": 8.757667045702733e-05, "loss": 0.8709, "step": 40830 }, { "epoch": 0.59, "grad_norm": 0.6328125, "learning_rate": 8.755182859954784e-05, "loss": 0.9799, "step": 40835 }, { "epoch": 0.59, "grad_norm": 0.5625, "learning_rate": 8.752698752233394e-05, "loss": 1.0642, "step": 40840 }, { "epoch": 0.59, "grad_norm": 0.490234375, "learning_rate": 8.750214722694261e-05, "loss": 0.9883, "step": 40845 }, { "epoch": 0.59, "grad_norm": 0.59765625, "learning_rate": 8.747730771493095e-05, "loss": 0.9238, "step": 40850 }, { "epoch": 0.59, "grad_norm": 0.60546875, "learning_rate": 8.74524689878559e-05, "loss": 0.9287, "step": 40855 }, { "epoch": 0.59, "grad_norm": 0.55859375, "learning_rate": 8.742763104727439e-05, "loss": 0.9518, "step": 40860 }, { "epoch": 0.59, "grad_norm": 0.5078125, "learning_rate": 8.740279389474327e-05, "loss": 0.8663, "step": 40865 }, { "epoch": 0.59, "grad_norm": 0.609375, "learning_rate": 8.737795753181939e-05, "loss": 0.9363, "step": 40870 }, { "epoch": 0.59, "grad_norm": 0.8046875, "learning_rate": 8.735312196005949e-05, "loss": 1.1332, "step": 40875 }, { "epoch": 0.59, "grad_norm": 0.59765625, "learning_rate": 8.732828718102033e-05, "loss": 1.0223, "step": 40880 }, { "epoch": 0.59, "grad_norm": 0.609375, "learning_rate": 8.730345319625857e-05, "loss": 1.0087, "step": 40885 }, { "epoch": 0.59, "grad_norm": 0.5390625, "learning_rate": 8.727862000733081e-05, "loss": 0.9222, "step": 40890 }, { "epoch": 0.59, "grad_norm": 0.53515625, "learning_rate": 8.725378761579366e-05, "loss": 1.0914, "step": 40895 }, { "epoch": 0.59, "grad_norm": 0.53515625, "learning_rate": 8.722895602320358e-05, "loss": 1.0208, "step": 40900 }, { "epoch": 0.59, "grad_norm": 0.60546875, "learning_rate": 8.720412523111709e-05, "loss": 0.8234, "step": 40905 }, { "epoch": 0.59, "grad_norm": 0.58984375, "learning_rate": 8.717929524109062e-05, "loss": 0.8737, "step": 40910 }, { "epoch": 0.59, "grad_norm": 0.48828125, "learning_rate": 8.71544660546805e-05, "loss": 1.0277, "step": 40915 }, { "epoch": 0.59, "grad_norm": 0.54296875, "learning_rate": 8.71296376734431e-05, "loss": 0.9144, "step": 40920 }, { "epoch": 0.59, "grad_norm": 0.61328125, "learning_rate": 8.710481009893467e-05, "loss": 0.9951, "step": 40925 }, { "epoch": 0.59, "grad_norm": 0.6953125, "learning_rate": 8.70799833327114e-05, "loss": 0.9455, "step": 40930 }, { "epoch": 0.59, "grad_norm": 0.62890625, "learning_rate": 8.705515737632952e-05, "loss": 0.9778, "step": 40935 }, { "epoch": 0.59, "grad_norm": 0.51171875, "learning_rate": 8.703033223134511e-05, "loss": 0.9631, "step": 40940 }, { "epoch": 0.59, "grad_norm": 0.47265625, "learning_rate": 8.700550789931428e-05, "loss": 0.8185, "step": 40945 }, { "epoch": 0.59, "grad_norm": 0.57421875, "learning_rate": 8.698068438179297e-05, "loss": 1.0634, "step": 40950 }, { "epoch": 0.59, "grad_norm": 0.5859375, "learning_rate": 8.695586168033719e-05, "loss": 1.0236, "step": 40955 }, { "epoch": 0.59, "grad_norm": 0.5625, "learning_rate": 8.693103979650288e-05, "loss": 1.0162, "step": 40960 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.690621873184587e-05, "loss": 0.9561, "step": 40965 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.688139848792196e-05, "loss": 1.0784, "step": 40970 }, { "epoch": 0.59, "grad_norm": 0.474609375, "learning_rate": 8.685657906628696e-05, "loss": 0.8949, "step": 40975 }, { "epoch": 0.59, "grad_norm": 0.546875, "learning_rate": 8.683176046849655e-05, "loss": 0.9578, "step": 40980 }, { "epoch": 0.59, "grad_norm": 0.46875, "learning_rate": 8.680694269610637e-05, "loss": 0.8533, "step": 40985 }, { "epoch": 0.59, "grad_norm": 0.54296875, "learning_rate": 8.678212575067207e-05, "loss": 0.8894, "step": 40990 }, { "epoch": 0.59, "grad_norm": 0.71875, "learning_rate": 8.675730963374918e-05, "loss": 0.9729, "step": 40995 }, { "epoch": 0.59, "grad_norm": 0.56640625, "learning_rate": 8.673249434689322e-05, "loss": 0.9695, "step": 41000 }, { "epoch": 0.59, "grad_norm": 0.50390625, "learning_rate": 8.670767989165962e-05, "loss": 0.8899, "step": 41005 }, { "epoch": 0.59, "grad_norm": 0.5703125, "learning_rate": 8.668286626960375e-05, "loss": 0.868, "step": 41010 }, { "epoch": 0.59, "grad_norm": 0.60546875, "learning_rate": 8.6658053482281e-05, "loss": 0.9941, "step": 41015 }, { "epoch": 0.59, "grad_norm": 0.56640625, "learning_rate": 8.663324153124667e-05, "loss": 0.9316, "step": 41020 }, { "epoch": 0.59, "grad_norm": 0.5078125, "learning_rate": 8.660843041805597e-05, "loss": 0.8675, "step": 41025 }, { "epoch": 0.59, "grad_norm": 0.55078125, "learning_rate": 8.658362014426411e-05, "loss": 0.8628, "step": 41030 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.655881071142622e-05, "loss": 0.8342, "step": 41035 }, { "epoch": 0.59, "grad_norm": 0.5703125, "learning_rate": 8.653400212109737e-05, "loss": 0.9824, "step": 41040 }, { "epoch": 0.59, "grad_norm": 0.55859375, "learning_rate": 8.650919437483262e-05, "loss": 1.1338, "step": 41045 }, { "epoch": 0.59, "grad_norm": 0.5625, "learning_rate": 8.648438747418694e-05, "loss": 0.9745, "step": 41050 }, { "epoch": 0.59, "grad_norm": 0.6640625, "learning_rate": 8.645958142071524e-05, "loss": 0.9639, "step": 41055 }, { "epoch": 0.59, "grad_norm": 0.62890625, "learning_rate": 8.64347762159724e-05, "loss": 1.0613, "step": 41060 }, { "epoch": 0.59, "grad_norm": 0.5703125, "learning_rate": 8.64099718615132e-05, "loss": 0.9962, "step": 41065 }, { "epoch": 0.59, "grad_norm": 0.51171875, "learning_rate": 8.638516835889248e-05, "loss": 0.8651, "step": 41070 }, { "epoch": 0.59, "grad_norm": 0.59375, "learning_rate": 8.636036570966491e-05, "loss": 0.8809, "step": 41075 }, { "epoch": 0.59, "grad_norm": 0.58203125, "learning_rate": 8.633556391538511e-05, "loss": 1.0232, "step": 41080 }, { "epoch": 0.59, "grad_norm": 0.53125, "learning_rate": 8.631076297760776e-05, "loss": 0.9977, "step": 41085 }, { "epoch": 0.59, "grad_norm": 0.55078125, "learning_rate": 8.628596289788738e-05, "loss": 1.0693, "step": 41090 }, { "epoch": 0.59, "grad_norm": 0.546875, "learning_rate": 8.626116367777845e-05, "loss": 0.9539, "step": 41095 }, { "epoch": 0.59, "grad_norm": 0.57421875, "learning_rate": 8.623636531883545e-05, "loss": 0.894, "step": 41100 }, { "epoch": 0.59, "grad_norm": 0.57421875, "learning_rate": 8.621156782261275e-05, "loss": 1.0753, "step": 41105 }, { "epoch": 0.59, "grad_norm": 0.5390625, "learning_rate": 8.618677119066468e-05, "loss": 0.8618, "step": 41110 }, { "epoch": 0.59, "grad_norm": 0.61328125, "learning_rate": 8.616197542454551e-05, "loss": 1.0041, "step": 41115 }, { "epoch": 0.59, "grad_norm": 0.578125, "learning_rate": 8.613718052580946e-05, "loss": 0.9626, "step": 41120 }, { "epoch": 0.59, "grad_norm": 0.6015625, "learning_rate": 8.611238649601075e-05, "loss": 0.7566, "step": 41125 }, { "epoch": 0.59, "grad_norm": 0.65234375, "learning_rate": 8.608759333670349e-05, "loss": 0.9487, "step": 41130 }, { "epoch": 0.59, "grad_norm": 0.578125, "learning_rate": 8.606280104944168e-05, "loss": 0.9074, "step": 41135 }, { "epoch": 0.59, "grad_norm": 0.6015625, "learning_rate": 8.603800963577939e-05, "loss": 1.0181, "step": 41140 }, { "epoch": 0.59, "grad_norm": 0.53515625, "learning_rate": 8.601321909727056e-05, "loss": 0.8415, "step": 41145 }, { "epoch": 0.59, "grad_norm": 0.55078125, "learning_rate": 8.598842943546908e-05, "loss": 0.891, "step": 41150 }, { "epoch": 0.59, "grad_norm": 0.53125, "learning_rate": 8.596364065192885e-05, "loss": 0.9422, "step": 41155 }, { "epoch": 0.59, "grad_norm": 0.52734375, "learning_rate": 8.593885274820356e-05, "loss": 0.9747, "step": 41160 }, { "epoch": 0.59, "grad_norm": 0.5625, "learning_rate": 8.591406572584701e-05, "loss": 0.9683, "step": 41165 }, { "epoch": 0.59, "grad_norm": 0.625, "learning_rate": 8.588927958641286e-05, "loss": 0.9289, "step": 41170 }, { "epoch": 0.59, "grad_norm": 0.5390625, "learning_rate": 8.586449433145472e-05, "loss": 0.94, "step": 41175 }, { "epoch": 0.59, "grad_norm": 0.546875, "learning_rate": 8.583970996252618e-05, "loss": 1.0362, "step": 41180 }, { "epoch": 0.59, "grad_norm": 0.5625, "learning_rate": 8.581492648118077e-05, "loss": 1.1214, "step": 41185 }, { "epoch": 0.59, "grad_norm": 0.5625, "learning_rate": 8.579014388897188e-05, "loss": 0.9097, "step": 41190 }, { "epoch": 0.59, "grad_norm": 0.640625, "learning_rate": 8.576536218745299e-05, "loss": 0.9254, "step": 41195 }, { "epoch": 0.59, "grad_norm": 0.5390625, "learning_rate": 8.57405813781774e-05, "loss": 1.0014, "step": 41200 }, { "epoch": 0.59, "grad_norm": 0.578125, "learning_rate": 8.57158014626984e-05, "loss": 1.0573, "step": 41205 }, { "epoch": 0.59, "grad_norm": 0.66796875, "learning_rate": 8.569102244256929e-05, "loss": 0.91, "step": 41210 }, { "epoch": 0.59, "grad_norm": 0.6484375, "learning_rate": 8.566624431934312e-05, "loss": 0.9182, "step": 41215 }, { "epoch": 0.59, "grad_norm": 0.5078125, "learning_rate": 8.564146709457312e-05, "loss": 0.9758, "step": 41220 }, { "epoch": 0.59, "grad_norm": 0.65234375, "learning_rate": 8.56166907698123e-05, "loss": 0.7203, "step": 41225 }, { "epoch": 0.59, "grad_norm": 0.52734375, "learning_rate": 8.559191534661367e-05, "loss": 0.8752, "step": 41230 }, { "epoch": 0.59, "grad_norm": 0.61328125, "learning_rate": 8.556714082653021e-05, "loss": 1.0124, "step": 41235 }, { "epoch": 0.59, "grad_norm": 0.52734375, "learning_rate": 8.55423672111148e-05, "loss": 0.9686, "step": 41240 }, { "epoch": 0.59, "grad_norm": 0.55078125, "learning_rate": 8.551759450192026e-05, "loss": 1.0235, "step": 41245 }, { "epoch": 0.59, "grad_norm": 0.6171875, "learning_rate": 8.549282270049941e-05, "loss": 1.0041, "step": 41250 }, { "epoch": 0.59, "grad_norm": 0.59375, "learning_rate": 8.546805180840497e-05, "loss": 0.9878, "step": 41255 }, { "epoch": 0.59, "grad_norm": 0.66796875, "learning_rate": 8.54432818271896e-05, "loss": 0.8734, "step": 41260 }, { "epoch": 0.59, "grad_norm": 0.50390625, "learning_rate": 8.541851275840589e-05, "loss": 0.9486, "step": 41265 }, { "epoch": 0.59, "grad_norm": 0.50390625, "learning_rate": 8.539374460360638e-05, "loss": 0.7125, "step": 41270 }, { "epoch": 0.59, "grad_norm": 0.58984375, "learning_rate": 8.536897736434362e-05, "loss": 0.9791, "step": 41275 }, { "epoch": 0.59, "grad_norm": 0.72265625, "learning_rate": 8.534421104217001e-05, "loss": 1.0311, "step": 41280 }, { "epoch": 0.59, "grad_norm": 0.52734375, "learning_rate": 8.531944563863794e-05, "loss": 0.9974, "step": 41285 }, { "epoch": 0.59, "grad_norm": 0.56640625, "learning_rate": 8.529468115529975e-05, "loss": 0.9587, "step": 41290 }, { "epoch": 0.59, "grad_norm": 0.59765625, "learning_rate": 8.52699175937077e-05, "loss": 0.9041, "step": 41295 }, { "epoch": 0.59, "grad_norm": 0.57421875, "learning_rate": 8.524515495541395e-05, "loss": 0.8812, "step": 41300 }, { "epoch": 0.59, "grad_norm": 0.57421875, "learning_rate": 8.522039324197073e-05, "loss": 0.9394, "step": 41305 }, { "epoch": 0.59, "grad_norm": 0.53125, "learning_rate": 8.51956324549301e-05, "loss": 0.9483, "step": 41310 }, { "epoch": 0.59, "grad_norm": 0.5078125, "learning_rate": 8.517087259584409e-05, "loss": 0.8897, "step": 41315 }, { "epoch": 0.59, "grad_norm": 0.57421875, "learning_rate": 8.514611366626466e-05, "loss": 0.9513, "step": 41320 }, { "epoch": 0.59, "grad_norm": 0.578125, "learning_rate": 8.512135566774372e-05, "loss": 0.9343, "step": 41325 }, { "epoch": 0.59, "grad_norm": 0.55078125, "learning_rate": 8.50965986018332e-05, "loss": 0.8736, "step": 41330 }, { "epoch": 0.59, "grad_norm": 0.61328125, "learning_rate": 8.507184247008482e-05, "loss": 1.0342, "step": 41335 }, { "epoch": 0.59, "grad_norm": 0.51171875, "learning_rate": 8.504708727405035e-05, "loss": 0.7705, "step": 41340 }, { "epoch": 0.59, "grad_norm": 0.57421875, "learning_rate": 8.502233301528149e-05, "loss": 1.098, "step": 41345 }, { "epoch": 0.59, "grad_norm": 0.6171875, "learning_rate": 8.499757969532986e-05, "loss": 0.855, "step": 41350 }, { "epoch": 0.59, "grad_norm": 0.5625, "learning_rate": 8.497282731574701e-05, "loss": 0.9717, "step": 41355 }, { "epoch": 0.59, "grad_norm": 0.515625, "learning_rate": 8.494807587808447e-05, "loss": 0.9026, "step": 41360 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.49233253838937e-05, "loss": 0.8608, "step": 41365 }, { "epoch": 0.59, "grad_norm": 0.58203125, "learning_rate": 8.489857583472604e-05, "loss": 0.8898, "step": 41370 }, { "epoch": 0.59, "grad_norm": 0.578125, "learning_rate": 8.487382723213284e-05, "loss": 1.023, "step": 41375 }, { "epoch": 0.59, "grad_norm": 0.57421875, "learning_rate": 8.484907957766537e-05, "loss": 0.8763, "step": 41380 }, { "epoch": 0.59, "grad_norm": 0.53125, "learning_rate": 8.482433287287485e-05, "loss": 0.9427, "step": 41385 }, { "epoch": 0.59, "grad_norm": 0.54296875, "learning_rate": 8.479958711931245e-05, "loss": 0.8177, "step": 41390 }, { "epoch": 0.59, "grad_norm": 0.58203125, "learning_rate": 8.477484231852921e-05, "loss": 0.9392, "step": 41395 }, { "epoch": 0.59, "grad_norm": 0.609375, "learning_rate": 8.475009847207622e-05, "loss": 0.895, "step": 41400 }, { "epoch": 0.59, "grad_norm": 0.55859375, "learning_rate": 8.472535558150441e-05, "loss": 0.9946, "step": 41405 }, { "epoch": 0.59, "grad_norm": 0.546875, "learning_rate": 8.470061364836472e-05, "loss": 0.9943, "step": 41410 }, { "epoch": 0.59, "grad_norm": 0.6015625, "learning_rate": 8.467587267420797e-05, "loss": 0.9164, "step": 41415 }, { "epoch": 0.59, "grad_norm": 0.56640625, "learning_rate": 8.465113266058503e-05, "loss": 0.9155, "step": 41420 }, { "epoch": 0.59, "grad_norm": 0.55859375, "learning_rate": 8.462639360904653e-05, "loss": 0.9499, "step": 41425 }, { "epoch": 0.59, "grad_norm": 0.62890625, "learning_rate": 8.460165552114321e-05, "loss": 0.8514, "step": 41430 }, { "epoch": 0.59, "grad_norm": 0.451171875, "learning_rate": 8.457691839842563e-05, "loss": 0.8172, "step": 41435 }, { "epoch": 0.59, "grad_norm": 0.5859375, "learning_rate": 8.455218224244439e-05, "loss": 0.9282, "step": 41440 }, { "epoch": 0.59, "grad_norm": 0.64453125, "learning_rate": 8.452744705474995e-05, "loss": 0.9488, "step": 41445 }, { "epoch": 0.59, "grad_norm": 0.55859375, "learning_rate": 8.450271283689274e-05, "loss": 0.9748, "step": 41450 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.447797959042317e-05, "loss": 0.8702, "step": 41455 }, { "epoch": 0.59, "grad_norm": 0.54296875, "learning_rate": 8.44532473168915e-05, "loss": 0.9403, "step": 41460 }, { "epoch": 0.59, "grad_norm": 0.56640625, "learning_rate": 8.442851601784798e-05, "loss": 0.8392, "step": 41465 }, { "epoch": 0.59, "grad_norm": 0.59375, "learning_rate": 8.440378569484286e-05, "loss": 0.9365, "step": 41470 }, { "epoch": 0.59, "grad_norm": 0.55859375, "learning_rate": 8.437905634942618e-05, "loss": 0.9095, "step": 41475 }, { "epoch": 0.6, "grad_norm": 0.60546875, "learning_rate": 8.435432798314805e-05, "loss": 1.0542, "step": 41480 }, { "epoch": 0.6, "grad_norm": 0.5625, "learning_rate": 8.432960059755842e-05, "loss": 0.9465, "step": 41485 }, { "epoch": 0.6, "grad_norm": 0.515625, "learning_rate": 8.430487419420732e-05, "loss": 0.9138, "step": 41490 }, { "epoch": 0.6, "grad_norm": 0.546875, "learning_rate": 8.428014877464457e-05, "loss": 0.9988, "step": 41495 }, { "epoch": 0.6, "grad_norm": 0.58984375, "learning_rate": 8.425542434041999e-05, "loss": 0.983, "step": 41500 }, { "epoch": 0.6, "grad_norm": 0.79296875, "learning_rate": 8.423070089308333e-05, "loss": 0.9907, "step": 41505 }, { "epoch": 0.6, "grad_norm": 0.54296875, "learning_rate": 8.420597843418432e-05, "loss": 0.8086, "step": 41510 }, { "epoch": 0.6, "grad_norm": 0.55859375, "learning_rate": 8.418125696527256e-05, "loss": 1.0055, "step": 41515 }, { "epoch": 0.6, "grad_norm": 0.515625, "learning_rate": 8.415653648789762e-05, "loss": 0.8668, "step": 41520 }, { "epoch": 0.6, "grad_norm": 0.76171875, "learning_rate": 8.413181700360905e-05, "loss": 1.0207, "step": 41525 }, { "epoch": 0.6, "grad_norm": 0.5625, "learning_rate": 8.410709851395626e-05, "loss": 0.8962, "step": 41530 }, { "epoch": 0.6, "grad_norm": 0.58203125, "learning_rate": 8.408238102048861e-05, "loss": 0.8318, "step": 41535 }, { "epoch": 0.6, "grad_norm": 0.466796875, "learning_rate": 8.405766452475544e-05, "loss": 0.8854, "step": 41540 }, { "epoch": 0.6, "grad_norm": 0.64453125, "learning_rate": 8.403294902830603e-05, "loss": 0.9743, "step": 41545 }, { "epoch": 0.6, "grad_norm": 0.57421875, "learning_rate": 8.400823453268956e-05, "loss": 0.8932, "step": 41550 }, { "epoch": 0.6, "grad_norm": 0.62109375, "learning_rate": 8.398352103945515e-05, "loss": 0.9847, "step": 41555 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.39588085501519e-05, "loss": 0.8523, "step": 41560 }, { "epoch": 0.6, "grad_norm": 0.51953125, "learning_rate": 8.39340970663288e-05, "loss": 0.9778, "step": 41565 }, { "epoch": 0.6, "grad_norm": 0.60546875, "learning_rate": 8.390938658953478e-05, "loss": 0.9594, "step": 41570 }, { "epoch": 0.6, "grad_norm": 0.486328125, "learning_rate": 8.388467712131877e-05, "loss": 0.8426, "step": 41575 }, { "epoch": 0.6, "grad_norm": 0.5859375, "learning_rate": 8.38599686632295e-05, "loss": 1.0202, "step": 41580 }, { "epoch": 0.6, "grad_norm": 0.55859375, "learning_rate": 8.383526121681581e-05, "loss": 1.0363, "step": 41585 }, { "epoch": 0.6, "grad_norm": 0.486328125, "learning_rate": 8.381055478362635e-05, "loss": 0.9128, "step": 41590 }, { "epoch": 0.6, "grad_norm": 0.5546875, "learning_rate": 8.378584936520973e-05, "loss": 0.9713, "step": 41595 }, { "epoch": 0.6, "grad_norm": 0.5703125, "learning_rate": 8.376114496311456e-05, "loss": 0.9081, "step": 41600 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.373644157888933e-05, "loss": 0.993, "step": 41605 }, { "epoch": 0.6, "grad_norm": 0.60546875, "learning_rate": 8.371173921408243e-05, "loss": 0.9647, "step": 41610 }, { "epoch": 0.6, "grad_norm": 0.546875, "learning_rate": 8.368703787024229e-05, "loss": 1.16, "step": 41615 }, { "epoch": 0.6, "grad_norm": 0.578125, "learning_rate": 8.36623375489172e-05, "loss": 0.8755, "step": 41620 }, { "epoch": 0.6, "grad_norm": 0.5859375, "learning_rate": 8.363763825165538e-05, "loss": 1.1381, "step": 41625 }, { "epoch": 0.6, "grad_norm": 0.48046875, "learning_rate": 8.361293998000509e-05, "loss": 0.9973, "step": 41630 }, { "epoch": 0.6, "grad_norm": 0.51953125, "learning_rate": 8.35882427355143e-05, "loss": 0.9467, "step": 41635 }, { "epoch": 0.6, "grad_norm": 0.58203125, "learning_rate": 8.35635465197312e-05, "loss": 0.8742, "step": 41640 }, { "epoch": 0.6, "grad_norm": 0.5078125, "learning_rate": 8.353885133420372e-05, "loss": 0.9813, "step": 41645 }, { "epoch": 0.6, "grad_norm": 0.546875, "learning_rate": 8.351415718047976e-05, "loss": 1.0443, "step": 41650 }, { "epoch": 0.6, "grad_norm": 0.62109375, "learning_rate": 8.348946406010721e-05, "loss": 0.9129, "step": 41655 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.346477197463387e-05, "loss": 0.938, "step": 41660 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.344008092560743e-05, "loss": 0.9234, "step": 41665 }, { "epoch": 0.6, "grad_norm": 0.5625, "learning_rate": 8.341539091457561e-05, "loss": 1.1227, "step": 41670 }, { "epoch": 0.6, "grad_norm": 0.625, "learning_rate": 8.339070194308597e-05, "loss": 0.9144, "step": 41675 }, { "epoch": 0.6, "grad_norm": 0.6171875, "learning_rate": 8.336601401268607e-05, "loss": 0.9558, "step": 41680 }, { "epoch": 0.6, "grad_norm": 0.58984375, "learning_rate": 8.334132712492333e-05, "loss": 0.8646, "step": 41685 }, { "epoch": 0.6, "grad_norm": 0.5703125, "learning_rate": 8.331664128134515e-05, "loss": 1.0444, "step": 41690 }, { "epoch": 0.6, "grad_norm": 0.50390625, "learning_rate": 8.329195648349892e-05, "loss": 0.9609, "step": 41695 }, { "epoch": 0.6, "grad_norm": 0.5078125, "learning_rate": 8.32672727329319e-05, "loss": 1.0872, "step": 41700 }, { "epoch": 0.6, "grad_norm": 0.57421875, "learning_rate": 8.324259003119126e-05, "loss": 1.0415, "step": 41705 }, { "epoch": 0.6, "grad_norm": 0.65625, "learning_rate": 8.321790837982417e-05, "loss": 0.9616, "step": 41710 }, { "epoch": 0.6, "grad_norm": 0.58984375, "learning_rate": 8.31932277803777e-05, "loss": 0.7862, "step": 41715 }, { "epoch": 0.6, "grad_norm": 0.6796875, "learning_rate": 8.316854823439884e-05, "loss": 0.9196, "step": 41720 }, { "epoch": 0.6, "grad_norm": 0.5546875, "learning_rate": 8.314386974343455e-05, "loss": 0.8995, "step": 41725 }, { "epoch": 0.6, "grad_norm": 0.5625, "learning_rate": 8.311919230903172e-05, "loss": 0.9577, "step": 41730 }, { "epoch": 0.6, "grad_norm": 0.462890625, "learning_rate": 8.309451593273717e-05, "loss": 0.8472, "step": 41735 }, { "epoch": 0.6, "grad_norm": 0.52734375, "learning_rate": 8.306984061609758e-05, "loss": 0.9422, "step": 41740 }, { "epoch": 0.6, "grad_norm": 0.5546875, "learning_rate": 8.304516636065964e-05, "loss": 1.1044, "step": 41745 }, { "epoch": 0.6, "grad_norm": 0.61328125, "learning_rate": 8.302049316796999e-05, "loss": 0.8926, "step": 41750 }, { "epoch": 0.6, "grad_norm": 0.486328125, "learning_rate": 8.29958210395752e-05, "loss": 0.8397, "step": 41755 }, { "epoch": 0.6, "grad_norm": 0.51171875, "learning_rate": 8.297114997702166e-05, "loss": 0.9482, "step": 41760 }, { "epoch": 0.6, "grad_norm": 0.51953125, "learning_rate": 8.294647998185587e-05, "loss": 0.8748, "step": 41765 }, { "epoch": 0.6, "grad_norm": 0.53125, "learning_rate": 8.292181105562414e-05, "loss": 0.9345, "step": 41770 }, { "epoch": 0.6, "grad_norm": 0.83984375, "learning_rate": 8.289714319987271e-05, "loss": 1.0754, "step": 41775 }, { "epoch": 0.6, "grad_norm": 0.5546875, "learning_rate": 8.287247641614785e-05, "loss": 0.9052, "step": 41780 }, { "epoch": 0.6, "grad_norm": 0.5625, "learning_rate": 8.28478107059957e-05, "loss": 0.9907, "step": 41785 }, { "epoch": 0.6, "grad_norm": 0.58984375, "learning_rate": 8.282314607096228e-05, "loss": 1.0787, "step": 41790 }, { "epoch": 0.6, "grad_norm": 0.515625, "learning_rate": 8.279848251259364e-05, "loss": 0.9882, "step": 41795 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.277382003243567e-05, "loss": 0.8283, "step": 41800 }, { "epoch": 0.6, "grad_norm": 0.68359375, "learning_rate": 8.274915863203432e-05, "loss": 0.8574, "step": 41805 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.272449831293534e-05, "loss": 0.902, "step": 41810 }, { "epoch": 0.6, "grad_norm": 0.609375, "learning_rate": 8.269983907668446e-05, "loss": 0.965, "step": 41815 }, { "epoch": 0.6, "grad_norm": 0.59765625, "learning_rate": 8.26751809248274e-05, "loss": 1.014, "step": 41820 }, { "epoch": 0.6, "grad_norm": 0.83203125, "learning_rate": 8.265052385890972e-05, "loss": 0.947, "step": 41825 }, { "epoch": 0.6, "grad_norm": 0.6640625, "learning_rate": 8.262586788047696e-05, "loss": 1.0511, "step": 41830 }, { "epoch": 0.6, "grad_norm": 0.546875, "learning_rate": 8.26012129910746e-05, "loss": 0.8874, "step": 41835 }, { "epoch": 0.6, "grad_norm": 0.51953125, "learning_rate": 8.257655919224804e-05, "loss": 0.8345, "step": 41840 }, { "epoch": 0.6, "grad_norm": 0.578125, "learning_rate": 8.255190648554257e-05, "loss": 0.8518, "step": 41845 }, { "epoch": 0.6, "grad_norm": 0.55859375, "learning_rate": 8.252725487250349e-05, "loss": 1.0714, "step": 41850 }, { "epoch": 0.6, "grad_norm": 0.57421875, "learning_rate": 8.250260435467594e-05, "loss": 0.9246, "step": 41855 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.247795493360511e-05, "loss": 0.978, "step": 41860 }, { "epoch": 0.6, "grad_norm": 0.59375, "learning_rate": 8.2453306610836e-05, "loss": 0.8857, "step": 41865 }, { "epoch": 0.6, "grad_norm": 0.50390625, "learning_rate": 8.24286593879136e-05, "loss": 0.9305, "step": 41870 }, { "epoch": 0.6, "grad_norm": 0.515625, "learning_rate": 8.240401326638287e-05, "loss": 0.8613, "step": 41875 }, { "epoch": 0.6, "grad_norm": 0.498046875, "learning_rate": 8.237936824778863e-05, "loss": 1.0857, "step": 41880 }, { "epoch": 0.6, "grad_norm": 0.51171875, "learning_rate": 8.235472433367563e-05, "loss": 0.9642, "step": 41885 }, { "epoch": 0.6, "grad_norm": 0.59765625, "learning_rate": 8.233008152558868e-05, "loss": 0.8736, "step": 41890 }, { "epoch": 0.6, "grad_norm": 0.51953125, "learning_rate": 8.230543982507227e-05, "loss": 0.9844, "step": 41895 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.228079923367107e-05, "loss": 0.9585, "step": 41900 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.225615975292955e-05, "loss": 0.9325, "step": 41905 }, { "epoch": 0.6, "grad_norm": 0.5625, "learning_rate": 8.223152138439213e-05, "loss": 0.8639, "step": 41910 }, { "epoch": 0.6, "grad_norm": 0.61328125, "learning_rate": 8.220688412960321e-05, "loss": 0.987, "step": 41915 }, { "epoch": 0.6, "grad_norm": 0.498046875, "learning_rate": 8.218224799010704e-05, "loss": 0.8088, "step": 41920 }, { "epoch": 0.6, "grad_norm": 0.546875, "learning_rate": 8.215761296744786e-05, "loss": 1.0435, "step": 41925 }, { "epoch": 0.6, "grad_norm": 0.64453125, "learning_rate": 8.213297906316983e-05, "loss": 0.8773, "step": 41930 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.210834627881702e-05, "loss": 0.9031, "step": 41935 }, { "epoch": 0.6, "grad_norm": 0.5859375, "learning_rate": 8.208371461593341e-05, "loss": 0.8389, "step": 41940 }, { "epoch": 0.6, "grad_norm": 0.54296875, "learning_rate": 8.205908407606306e-05, "loss": 0.9602, "step": 41945 }, { "epoch": 0.6, "grad_norm": 0.59765625, "learning_rate": 8.203445466074968e-05, "loss": 0.9552, "step": 41950 }, { "epoch": 0.6, "grad_norm": 0.546875, "learning_rate": 8.200982637153715e-05, "loss": 0.9129, "step": 41955 }, { "epoch": 0.6, "grad_norm": 0.5703125, "learning_rate": 8.198519920996921e-05, "loss": 0.8813, "step": 41960 }, { "epoch": 0.6, "grad_norm": 0.625, "learning_rate": 8.196057317758948e-05, "loss": 1.0221, "step": 41965 }, { "epoch": 0.6, "grad_norm": 0.53125, "learning_rate": 8.193594827594158e-05, "loss": 0.9442, "step": 41970 }, { "epoch": 0.6, "grad_norm": 0.5, "learning_rate": 8.191132450656902e-05, "loss": 0.9139, "step": 41975 }, { "epoch": 0.6, "grad_norm": 0.51953125, "learning_rate": 8.188670187101521e-05, "loss": 1.1236, "step": 41980 }, { "epoch": 0.6, "grad_norm": 0.5078125, "learning_rate": 8.186208037082359e-05, "loss": 0.8923, "step": 41985 }, { "epoch": 0.6, "grad_norm": 0.578125, "learning_rate": 8.183746000753743e-05, "loss": 1.0116, "step": 41990 }, { "epoch": 0.6, "grad_norm": 0.56640625, "learning_rate": 8.181284078269997e-05, "loss": 0.8802, "step": 41995 }, { "epoch": 0.6, "grad_norm": 0.52734375, "learning_rate": 8.178822269785435e-05, "loss": 1.0867, "step": 42000 }, { "epoch": 0.6, "grad_norm": 0.484375, "learning_rate": 8.176360575454366e-05, "loss": 0.9187, "step": 42005 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.173898995431095e-05, "loss": 1.1618, "step": 42010 }, { "epoch": 0.6, "grad_norm": 0.578125, "learning_rate": 8.171437529869912e-05, "loss": 0.993, "step": 42015 }, { "epoch": 0.6, "grad_norm": 0.6796875, "learning_rate": 8.168976178925109e-05, "loss": 1.0539, "step": 42020 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.166514942750964e-05, "loss": 0.9547, "step": 42025 }, { "epoch": 0.6, "grad_norm": 0.45703125, "learning_rate": 8.164053821501751e-05, "loss": 0.7925, "step": 42030 }, { "epoch": 0.6, "grad_norm": 0.53515625, "learning_rate": 8.161592815331734e-05, "loss": 0.8804, "step": 42035 }, { "epoch": 0.6, "grad_norm": 0.65625, "learning_rate": 8.159131924395176e-05, "loss": 1.0156, "step": 42040 }, { "epoch": 0.6, "grad_norm": 0.5703125, "learning_rate": 8.156671148846325e-05, "loss": 0.914, "step": 42045 }, { "epoch": 0.6, "grad_norm": 0.5078125, "learning_rate": 8.154210488839426e-05, "loss": 0.9318, "step": 42050 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.151749944528714e-05, "loss": 1.0271, "step": 42055 }, { "epoch": 0.6, "grad_norm": 0.59765625, "learning_rate": 8.14928951606842e-05, "loss": 1.0947, "step": 42060 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.146829203612769e-05, "loss": 0.7624, "step": 42065 }, { "epoch": 0.6, "grad_norm": 0.59765625, "learning_rate": 8.144369007315973e-05, "loss": 0.7011, "step": 42070 }, { "epoch": 0.6, "grad_norm": 0.546875, "learning_rate": 8.141908927332239e-05, "loss": 1.0747, "step": 42075 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.139448963815772e-05, "loss": 0.968, "step": 42080 }, { "epoch": 0.6, "grad_norm": 0.78515625, "learning_rate": 8.136989116920763e-05, "loss": 1.0052, "step": 42085 }, { "epoch": 0.6, "grad_norm": 0.5546875, "learning_rate": 8.134529386801396e-05, "loss": 1.0768, "step": 42090 }, { "epoch": 0.6, "grad_norm": 0.65625, "learning_rate": 8.132069773611854e-05, "loss": 1.1377, "step": 42095 }, { "epoch": 0.6, "grad_norm": 0.484375, "learning_rate": 8.129610277506308e-05, "loss": 0.9914, "step": 42100 }, { "epoch": 0.6, "grad_norm": 0.66015625, "learning_rate": 8.127150898638918e-05, "loss": 1.1432, "step": 42105 }, { "epoch": 0.6, "grad_norm": 0.6015625, "learning_rate": 8.124691637163842e-05, "loss": 0.873, "step": 42110 }, { "epoch": 0.6, "grad_norm": 0.52734375, "learning_rate": 8.122232493235228e-05, "loss": 1.0254, "step": 42115 }, { "epoch": 0.6, "grad_norm": 0.5234375, "learning_rate": 8.119773467007222e-05, "loss": 1.0591, "step": 42120 }, { "epoch": 0.6, "grad_norm": 0.515625, "learning_rate": 8.117314558633956e-05, "loss": 0.8329, "step": 42125 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.114855768269556e-05, "loss": 0.981, "step": 42130 }, { "epoch": 0.6, "grad_norm": 0.60546875, "learning_rate": 8.112397096068146e-05, "loss": 0.9654, "step": 42135 }, { "epoch": 0.6, "grad_norm": 0.578125, "learning_rate": 8.109938542183836e-05, "loss": 0.9152, "step": 42140 }, { "epoch": 0.6, "grad_norm": 0.609375, "learning_rate": 8.107480106770729e-05, "loss": 1.0351, "step": 42145 }, { "epoch": 0.6, "grad_norm": 0.609375, "learning_rate": 8.105021789982925e-05, "loss": 1.0923, "step": 42150 }, { "epoch": 0.6, "grad_norm": 0.52734375, "learning_rate": 8.102563591974516e-05, "loss": 0.9168, "step": 42155 }, { "epoch": 0.6, "grad_norm": 0.5703125, "learning_rate": 8.100105512899579e-05, "loss": 0.9979, "step": 42160 }, { "epoch": 0.6, "grad_norm": 0.578125, "learning_rate": 8.097647552912192e-05, "loss": 0.9022, "step": 42165 }, { "epoch": 0.6, "grad_norm": 0.60546875, "learning_rate": 8.095189712166425e-05, "loss": 1.0565, "step": 42170 }, { "epoch": 0.6, "grad_norm": 0.51171875, "learning_rate": 8.092731990816335e-05, "loss": 0.8553, "step": 42175 }, { "epoch": 0.61, "grad_norm": 0.484375, "learning_rate": 8.090274389015978e-05, "loss": 0.9753, "step": 42180 }, { "epoch": 0.61, "grad_norm": 0.55078125, "learning_rate": 8.087816906919391e-05, "loss": 0.9659, "step": 42185 }, { "epoch": 0.61, "grad_norm": 0.5234375, "learning_rate": 8.085359544680623e-05, "loss": 0.9577, "step": 42190 }, { "epoch": 0.61, "grad_norm": 0.490234375, "learning_rate": 8.082902302453701e-05, "loss": 0.9535, "step": 42195 }, { "epoch": 0.61, "grad_norm": 0.51171875, "learning_rate": 8.080445180392642e-05, "loss": 1.0207, "step": 42200 }, { "epoch": 0.61, "grad_norm": 0.59765625, "learning_rate": 8.07798817865147e-05, "loss": 0.9898, "step": 42205 }, { "epoch": 0.61, "grad_norm": 0.640625, "learning_rate": 8.075531297384185e-05, "loss": 0.9406, "step": 42210 }, { "epoch": 0.61, "grad_norm": 0.54296875, "learning_rate": 8.07307453674479e-05, "loss": 0.9575, "step": 42215 }, { "epoch": 0.61, "grad_norm": 0.546875, "learning_rate": 8.070617896887277e-05, "loss": 0.9358, "step": 42220 }, { "epoch": 0.61, "grad_norm": 0.5859375, "learning_rate": 8.068161377965633e-05, "loss": 0.9085, "step": 42225 }, { "epoch": 0.61, "grad_norm": 0.5703125, "learning_rate": 8.065704980133833e-05, "loss": 0.8574, "step": 42230 }, { "epoch": 0.61, "grad_norm": 0.578125, "learning_rate": 8.063248703545847e-05, "loss": 1.0217, "step": 42235 }, { "epoch": 0.61, "grad_norm": 0.515625, "learning_rate": 8.06079254835564e-05, "loss": 0.8214, "step": 42240 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 8.058336514717164e-05, "loss": 0.9639, "step": 42245 }, { "epoch": 0.61, "grad_norm": 0.52734375, "learning_rate": 8.055880602784367e-05, "loss": 0.9374, "step": 42250 }, { "epoch": 0.61, "grad_norm": 0.56640625, "learning_rate": 8.053424812711186e-05, "loss": 1.2145, "step": 42255 }, { "epoch": 0.61, "grad_norm": 0.61328125, "learning_rate": 8.050969144651559e-05, "loss": 0.9234, "step": 42260 }, { "epoch": 0.61, "grad_norm": 0.640625, "learning_rate": 8.048513598759403e-05, "loss": 0.9074, "step": 42265 }, { "epoch": 0.61, "grad_norm": 0.6015625, "learning_rate": 8.046058175188638e-05, "loss": 1.0246, "step": 42270 }, { "epoch": 0.61, "grad_norm": 0.57421875, "learning_rate": 8.043602874093169e-05, "loss": 0.8926, "step": 42275 }, { "epoch": 0.61, "grad_norm": 0.57421875, "learning_rate": 8.041147695626902e-05, "loss": 0.9339, "step": 42280 }, { "epoch": 0.61, "grad_norm": 0.49609375, "learning_rate": 8.03869263994373e-05, "loss": 0.9105, "step": 42285 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 8.036237707197534e-05, "loss": 0.8557, "step": 42290 }, { "epoch": 0.61, "grad_norm": 0.578125, "learning_rate": 8.033782897542197e-05, "loss": 0.9911, "step": 42295 }, { "epoch": 0.61, "grad_norm": 0.5, "learning_rate": 8.031328211131586e-05, "loss": 0.8827, "step": 42300 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 8.028873648119565e-05, "loss": 0.8477, "step": 42305 }, { "epoch": 0.61, "grad_norm": 0.58984375, "learning_rate": 8.026419208659994e-05, "loss": 0.8995, "step": 42310 }, { "epoch": 0.61, "grad_norm": 0.6171875, "learning_rate": 8.023964892906709e-05, "loss": 0.9163, "step": 42315 }, { "epoch": 0.61, "grad_norm": 0.546875, "learning_rate": 8.021510701013557e-05, "loss": 1.0234, "step": 42320 }, { "epoch": 0.61, "grad_norm": 0.58984375, "learning_rate": 8.019056633134367e-05, "loss": 0.9321, "step": 42325 }, { "epoch": 0.61, "grad_norm": 0.55859375, "learning_rate": 8.01660268942296e-05, "loss": 0.9225, "step": 42330 }, { "epoch": 0.61, "grad_norm": 0.71484375, "learning_rate": 8.01414887003316e-05, "loss": 0.9481, "step": 42335 }, { "epoch": 0.61, "grad_norm": 0.56640625, "learning_rate": 8.01169517511877e-05, "loss": 0.9872, "step": 42340 }, { "epoch": 0.61, "grad_norm": 0.578125, "learning_rate": 8.009241604833588e-05, "loss": 0.9599, "step": 42345 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 8.006788159331412e-05, "loss": 0.9175, "step": 42350 }, { "epoch": 0.61, "grad_norm": 0.5078125, "learning_rate": 8.004334838766024e-05, "loss": 0.9903, "step": 42355 }, { "epoch": 0.61, "grad_norm": 0.5234375, "learning_rate": 8.001881643291198e-05, "loss": 0.9758, "step": 42360 }, { "epoch": 0.61, "grad_norm": 0.486328125, "learning_rate": 7.999428573060715e-05, "loss": 1.0384, "step": 42365 }, { "epoch": 0.61, "grad_norm": 0.5859375, "learning_rate": 7.99697562822832e-05, "loss": 0.9855, "step": 42370 }, { "epoch": 0.61, "grad_norm": 0.58203125, "learning_rate": 7.994522808947775e-05, "loss": 0.9536, "step": 42375 }, { "epoch": 0.61, "grad_norm": 0.55078125, "learning_rate": 7.992070115372825e-05, "loss": 1.2132, "step": 42380 }, { "epoch": 0.61, "grad_norm": 0.59765625, "learning_rate": 7.989617547657206e-05, "loss": 0.9809, "step": 42385 }, { "epoch": 0.61, "grad_norm": 0.56640625, "learning_rate": 7.98716510595465e-05, "loss": 0.9123, "step": 42390 }, { "epoch": 0.61, "grad_norm": 0.5234375, "learning_rate": 7.984712790418878e-05, "loss": 0.9471, "step": 42395 }, { "epoch": 0.61, "grad_norm": 0.412109375, "learning_rate": 7.9822606012036e-05, "loss": 0.8363, "step": 42400 }, { "epoch": 0.61, "grad_norm": 0.65234375, "learning_rate": 7.979808538462528e-05, "loss": 0.961, "step": 42405 }, { "epoch": 0.61, "grad_norm": 0.515625, "learning_rate": 7.977356602349359e-05, "loss": 0.8457, "step": 42410 }, { "epoch": 0.61, "grad_norm": 0.59765625, "learning_rate": 7.974904793017784e-05, "loss": 1.0536, "step": 42415 }, { "epoch": 0.61, "grad_norm": 0.58984375, "learning_rate": 7.972453110621479e-05, "loss": 0.9591, "step": 42420 }, { "epoch": 0.61, "grad_norm": 0.50390625, "learning_rate": 7.970001555314121e-05, "loss": 0.8937, "step": 42425 }, { "epoch": 0.61, "grad_norm": 0.54296875, "learning_rate": 7.967550127249381e-05, "loss": 0.8458, "step": 42430 }, { "epoch": 0.61, "grad_norm": 0.5703125, "learning_rate": 7.965098826580912e-05, "loss": 0.906, "step": 42435 }, { "epoch": 0.61, "grad_norm": 0.6328125, "learning_rate": 7.962647653462364e-05, "loss": 1.0945, "step": 42440 }, { "epoch": 0.61, "grad_norm": 0.455078125, "learning_rate": 7.960196608047385e-05, "loss": 0.9566, "step": 42445 }, { "epoch": 0.61, "grad_norm": 0.498046875, "learning_rate": 7.957745690489604e-05, "loss": 0.898, "step": 42450 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 7.955294900942646e-05, "loss": 0.8699, "step": 42455 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.952844239560138e-05, "loss": 0.9584, "step": 42460 }, { "epoch": 0.61, "grad_norm": 0.58203125, "learning_rate": 7.950393706495683e-05, "loss": 0.9459, "step": 42465 }, { "epoch": 0.61, "grad_norm": 0.58203125, "learning_rate": 7.947943301902885e-05, "loss": 0.9533, "step": 42470 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 7.945493025935338e-05, "loss": 0.93, "step": 42475 }, { "epoch": 0.61, "grad_norm": 0.515625, "learning_rate": 7.943042878746626e-05, "loss": 0.8268, "step": 42480 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.94059286049033e-05, "loss": 0.8896, "step": 42485 }, { "epoch": 0.61, "grad_norm": 0.54296875, "learning_rate": 7.93814297132002e-05, "loss": 1.0771, "step": 42490 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 7.935693211389254e-05, "loss": 0.8995, "step": 42495 }, { "epoch": 0.61, "grad_norm": 0.62890625, "learning_rate": 7.933243580851591e-05, "loss": 0.9314, "step": 42500 }, { "epoch": 0.61, "grad_norm": 0.55078125, "learning_rate": 7.930794079860575e-05, "loss": 0.8779, "step": 42505 }, { "epoch": 0.61, "grad_norm": 0.625, "learning_rate": 7.92834470856974e-05, "loss": 1.006, "step": 42510 }, { "epoch": 0.61, "grad_norm": 0.55859375, "learning_rate": 7.925895467132618e-05, "loss": 0.8862, "step": 42515 }, { "epoch": 0.61, "grad_norm": 0.5859375, "learning_rate": 7.923446355702735e-05, "loss": 0.9694, "step": 42520 }, { "epoch": 0.61, "grad_norm": 0.55859375, "learning_rate": 7.920997374433596e-05, "loss": 0.8995, "step": 42525 }, { "epoch": 0.61, "grad_norm": 0.494140625, "learning_rate": 7.91854852347871e-05, "loss": 0.8734, "step": 42530 }, { "epoch": 0.61, "grad_norm": 0.58984375, "learning_rate": 7.916099802991572e-05, "loss": 0.9556, "step": 42535 }, { "epoch": 0.61, "grad_norm": 0.6015625, "learning_rate": 7.913651213125672e-05, "loss": 0.9399, "step": 42540 }, { "epoch": 0.61, "grad_norm": 0.90625, "learning_rate": 7.911202754034491e-05, "loss": 0.9976, "step": 42545 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 7.908754425871498e-05, "loss": 0.9188, "step": 42550 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.906306228790161e-05, "loss": 0.8477, "step": 42555 }, { "epoch": 0.61, "grad_norm": 0.60546875, "learning_rate": 7.903858162943935e-05, "loss": 1.0648, "step": 42560 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.901410228486264e-05, "loss": 0.9438, "step": 42565 }, { "epoch": 0.61, "grad_norm": 0.59375, "learning_rate": 7.898962425570592e-05, "loss": 0.9425, "step": 42570 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 7.89651475435035e-05, "loss": 0.8806, "step": 42575 }, { "epoch": 0.61, "grad_norm": 0.498046875, "learning_rate": 7.894067214978959e-05, "loss": 0.9518, "step": 42580 }, { "epoch": 0.61, "grad_norm": 0.60546875, "learning_rate": 7.891619807609832e-05, "loss": 0.9417, "step": 42585 }, { "epoch": 0.61, "grad_norm": 0.61328125, "learning_rate": 7.889172532396373e-05, "loss": 0.8483, "step": 42590 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.886725389491987e-05, "loss": 0.8405, "step": 42595 }, { "epoch": 0.61, "grad_norm": 0.55859375, "learning_rate": 7.88427837905006e-05, "loss": 0.9486, "step": 42600 }, { "epoch": 0.61, "grad_norm": 0.58203125, "learning_rate": 7.881831501223973e-05, "loss": 1.03, "step": 42605 }, { "epoch": 0.61, "grad_norm": 0.5078125, "learning_rate": 7.879384756167101e-05, "loss": 0.9571, "step": 42610 }, { "epoch": 0.61, "grad_norm": 0.515625, "learning_rate": 7.876938144032807e-05, "loss": 1.0589, "step": 42615 }, { "epoch": 0.61, "grad_norm": 0.51953125, "learning_rate": 7.874491664974448e-05, "loss": 0.9998, "step": 42620 }, { "epoch": 0.61, "grad_norm": 0.6015625, "learning_rate": 7.872045319145376e-05, "loss": 1.0316, "step": 42625 }, { "epoch": 0.61, "grad_norm": 0.5625, "learning_rate": 7.869599106698922e-05, "loss": 0.9804, "step": 42630 }, { "epoch": 0.61, "grad_norm": 0.55078125, "learning_rate": 7.867153027788424e-05, "loss": 0.8879, "step": 42635 }, { "epoch": 0.61, "grad_norm": 0.6640625, "learning_rate": 7.864707082567204e-05, "loss": 1.0132, "step": 42640 }, { "epoch": 0.61, "grad_norm": 0.5859375, "learning_rate": 7.862261271188574e-05, "loss": 1.0564, "step": 42645 }, { "epoch": 0.61, "grad_norm": 0.5703125, "learning_rate": 7.859815593805844e-05, "loss": 1.0339, "step": 42650 }, { "epoch": 0.61, "grad_norm": 0.58203125, "learning_rate": 7.85737005057231e-05, "loss": 1.0806, "step": 42655 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 7.854924641641258e-05, "loss": 0.9391, "step": 42660 }, { "epoch": 0.61, "grad_norm": 0.498046875, "learning_rate": 7.852479367165976e-05, "loss": 0.8207, "step": 42665 }, { "epoch": 0.61, "grad_norm": 0.58984375, "learning_rate": 7.850034227299734e-05, "loss": 0.9667, "step": 42670 }, { "epoch": 0.61, "grad_norm": 0.57421875, "learning_rate": 7.84758922219579e-05, "loss": 0.9414, "step": 42675 }, { "epoch": 0.61, "grad_norm": 0.4921875, "learning_rate": 7.845144352007414e-05, "loss": 0.9857, "step": 42680 }, { "epoch": 0.61, "grad_norm": 0.5625, "learning_rate": 7.842699616887837e-05, "loss": 0.956, "step": 42685 }, { "epoch": 0.61, "grad_norm": 0.55859375, "learning_rate": 7.840255016990308e-05, "loss": 0.8266, "step": 42690 }, { "epoch": 0.61, "grad_norm": 0.56640625, "learning_rate": 7.837810552468053e-05, "loss": 0.866, "step": 42695 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.835366223474293e-05, "loss": 0.9463, "step": 42700 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.832922030162246e-05, "loss": 0.8329, "step": 42705 }, { "epoch": 0.61, "grad_norm": 0.60546875, "learning_rate": 7.830477972685115e-05, "loss": 0.9353, "step": 42710 }, { "epoch": 0.61, "grad_norm": 0.62109375, "learning_rate": 7.828034051196093e-05, "loss": 0.9475, "step": 42715 }, { "epoch": 0.61, "grad_norm": 0.58203125, "learning_rate": 7.825590265848371e-05, "loss": 0.9154, "step": 42720 }, { "epoch": 0.61, "grad_norm": 0.56640625, "learning_rate": 7.823146616795129e-05, "loss": 1.0003, "step": 42725 }, { "epoch": 0.61, "grad_norm": 0.5703125, "learning_rate": 7.820703104189538e-05, "loss": 0.8323, "step": 42730 }, { "epoch": 0.61, "grad_norm": 0.515625, "learning_rate": 7.818259728184757e-05, "loss": 0.8252, "step": 42735 }, { "epoch": 0.61, "grad_norm": 0.7109375, "learning_rate": 7.815816488933938e-05, "loss": 1.0588, "step": 42740 }, { "epoch": 0.61, "grad_norm": 0.65234375, "learning_rate": 7.813373386590232e-05, "loss": 0.917, "step": 42745 }, { "epoch": 0.61, "grad_norm": 0.57421875, "learning_rate": 7.810930421306772e-05, "loss": 0.9622, "step": 42750 }, { "epoch": 0.61, "grad_norm": 0.486328125, "learning_rate": 7.808487593236683e-05, "loss": 0.8535, "step": 42755 }, { "epoch": 0.61, "grad_norm": 0.671875, "learning_rate": 7.806044902533092e-05, "loss": 1.0698, "step": 42760 }, { "epoch": 0.61, "grad_norm": 0.62109375, "learning_rate": 7.803602349349104e-05, "loss": 0.9576, "step": 42765 }, { "epoch": 0.61, "grad_norm": 0.578125, "learning_rate": 7.801159933837821e-05, "loss": 1.0837, "step": 42770 }, { "epoch": 0.61, "grad_norm": 0.63671875, "learning_rate": 7.798717656152339e-05, "loss": 0.8904, "step": 42775 }, { "epoch": 0.61, "grad_norm": 0.52734375, "learning_rate": 7.796275516445741e-05, "loss": 0.9422, "step": 42780 }, { "epoch": 0.61, "grad_norm": 0.458984375, "learning_rate": 7.793833514871106e-05, "loss": 0.9295, "step": 42785 }, { "epoch": 0.61, "grad_norm": 0.51171875, "learning_rate": 7.791391651581497e-05, "loss": 0.9939, "step": 42790 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 7.788949926729972e-05, "loss": 0.9126, "step": 42795 }, { "epoch": 0.61, "grad_norm": 0.56640625, "learning_rate": 7.786508340469586e-05, "loss": 0.8445, "step": 42800 }, { "epoch": 0.61, "grad_norm": 0.546875, "learning_rate": 7.78406689295338e-05, "loss": 0.9759, "step": 42805 }, { "epoch": 0.61, "grad_norm": 0.462890625, "learning_rate": 7.78162558433438e-05, "loss": 1.0567, "step": 42810 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.779184414765618e-05, "loss": 0.862, "step": 42815 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 7.776743384400106e-05, "loss": 1.0773, "step": 42820 }, { "epoch": 0.61, "grad_norm": 0.51953125, "learning_rate": 7.77430249339085e-05, "loss": 0.9642, "step": 42825 }, { "epoch": 0.61, "grad_norm": 0.578125, "learning_rate": 7.771861741890848e-05, "loss": 0.8702, "step": 42830 }, { "epoch": 0.61, "grad_norm": 0.52734375, "learning_rate": 7.769421130053094e-05, "loss": 0.8947, "step": 42835 }, { "epoch": 0.61, "grad_norm": 0.51953125, "learning_rate": 7.766980658030562e-05, "loss": 0.9315, "step": 42840 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 7.764540325976225e-05, "loss": 0.9786, "step": 42845 }, { "epoch": 0.61, "grad_norm": 0.56640625, "learning_rate": 7.762100134043043e-05, "loss": 0.9207, "step": 42850 }, { "epoch": 0.61, "grad_norm": 0.515625, "learning_rate": 7.759660082383977e-05, "loss": 1.0167, "step": 42855 }, { "epoch": 0.61, "grad_norm": 0.546875, "learning_rate": 7.757220171151967e-05, "loss": 0.9736, "step": 42860 }, { "epoch": 0.61, "grad_norm": 0.47265625, "learning_rate": 7.754780400499951e-05, "loss": 1.0015, "step": 42865 }, { "epoch": 0.61, "grad_norm": 0.5, "learning_rate": 7.752340770580858e-05, "loss": 0.9226, "step": 42870 }, { "epoch": 0.62, "grad_norm": 0.52734375, "learning_rate": 7.749901281547604e-05, "loss": 0.9444, "step": 42875 }, { "epoch": 0.62, "grad_norm": 0.65234375, "learning_rate": 7.747461933553099e-05, "loss": 1.0346, "step": 42880 }, { "epoch": 0.62, "grad_norm": 0.462890625, "learning_rate": 7.745022726750248e-05, "loss": 0.9532, "step": 42885 }, { "epoch": 0.62, "grad_norm": 0.51171875, "learning_rate": 7.742583661291943e-05, "loss": 0.8445, "step": 42890 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.740144737331062e-05, "loss": 0.9205, "step": 42895 }, { "epoch": 0.62, "grad_norm": 0.48046875, "learning_rate": 7.737705955020482e-05, "loss": 0.9847, "step": 42900 }, { "epoch": 0.62, "grad_norm": 0.54296875, "learning_rate": 7.735267314513069e-05, "loss": 0.9144, "step": 42905 }, { "epoch": 0.62, "grad_norm": 0.59375, "learning_rate": 7.732828815961683e-05, "loss": 1.0651, "step": 42910 }, { "epoch": 0.62, "grad_norm": 0.55078125, "learning_rate": 7.730390459519167e-05, "loss": 0.9732, "step": 42915 }, { "epoch": 0.62, "grad_norm": 0.5390625, "learning_rate": 7.727952245338359e-05, "loss": 0.9412, "step": 42920 }, { "epoch": 0.62, "grad_norm": 0.56640625, "learning_rate": 7.725514173572096e-05, "loss": 1.1264, "step": 42925 }, { "epoch": 0.62, "grad_norm": 0.7109375, "learning_rate": 7.723076244373195e-05, "loss": 0.9375, "step": 42930 }, { "epoch": 0.62, "grad_norm": 0.55859375, "learning_rate": 7.720638457894465e-05, "loss": 0.9551, "step": 42935 }, { "epoch": 0.62, "grad_norm": 0.48046875, "learning_rate": 7.718200814288717e-05, "loss": 1.0087, "step": 42940 }, { "epoch": 0.62, "grad_norm": 0.53125, "learning_rate": 7.715763313708739e-05, "loss": 1.0652, "step": 42945 }, { "epoch": 0.62, "grad_norm": 0.53125, "learning_rate": 7.713325956307316e-05, "loss": 0.963, "step": 42950 }, { "epoch": 0.62, "grad_norm": 0.58984375, "learning_rate": 7.710888742237226e-05, "loss": 0.8619, "step": 42955 }, { "epoch": 0.62, "grad_norm": 0.59765625, "learning_rate": 7.708451671651239e-05, "loss": 0.9818, "step": 42960 }, { "epoch": 0.62, "grad_norm": 0.56640625, "learning_rate": 7.70601474470211e-05, "loss": 0.9524, "step": 42965 }, { "epoch": 0.62, "grad_norm": 0.73828125, "learning_rate": 7.703577961542588e-05, "loss": 1.0222, "step": 42970 }, { "epoch": 0.62, "grad_norm": 0.56640625, "learning_rate": 7.701141322325418e-05, "loss": 1.0135, "step": 42975 }, { "epoch": 0.62, "grad_norm": 0.51171875, "learning_rate": 7.698704827203326e-05, "loss": 0.8863, "step": 42980 }, { "epoch": 0.62, "grad_norm": 0.5703125, "learning_rate": 7.696268476329038e-05, "loss": 0.9803, "step": 42985 }, { "epoch": 0.62, "grad_norm": 0.61328125, "learning_rate": 7.693832269855264e-05, "loss": 1.1359, "step": 42990 }, { "epoch": 0.62, "grad_norm": 0.53125, "learning_rate": 7.691396207934711e-05, "loss": 0.9429, "step": 42995 }, { "epoch": 0.62, "grad_norm": 0.55078125, "learning_rate": 7.688960290720073e-05, "loss": 0.996, "step": 43000 }, { "epoch": 0.62, "grad_norm": 0.62890625, "learning_rate": 7.686524518364036e-05, "loss": 0.9694, "step": 43005 }, { "epoch": 0.62, "grad_norm": 0.828125, "learning_rate": 7.684088891019276e-05, "loss": 1.0616, "step": 43010 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.681653408838462e-05, "loss": 0.8888, "step": 43015 }, { "epoch": 0.62, "grad_norm": 0.55078125, "learning_rate": 7.679218071974254e-05, "loss": 1.0106, "step": 43020 }, { "epoch": 0.62, "grad_norm": 0.609375, "learning_rate": 7.676782880579298e-05, "loss": 0.9764, "step": 43025 }, { "epoch": 0.62, "grad_norm": 0.5703125, "learning_rate": 7.674347834806238e-05, "loss": 0.9293, "step": 43030 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.671912934807706e-05, "loss": 1.0437, "step": 43035 }, { "epoch": 0.62, "grad_norm": 0.56640625, "learning_rate": 7.669478180736319e-05, "loss": 0.8764, "step": 43040 }, { "epoch": 0.62, "grad_norm": 0.76953125, "learning_rate": 7.667043572744701e-05, "loss": 1.0054, "step": 43045 }, { "epoch": 0.62, "grad_norm": 0.5703125, "learning_rate": 7.664609110985444e-05, "loss": 0.9659, "step": 43050 }, { "epoch": 0.62, "grad_norm": 0.56640625, "learning_rate": 7.662174795611147e-05, "loss": 0.8469, "step": 43055 }, { "epoch": 0.62, "grad_norm": 0.64453125, "learning_rate": 7.659740626774399e-05, "loss": 1.0004, "step": 43060 }, { "epoch": 0.62, "grad_norm": 0.482421875, "learning_rate": 7.65730660462777e-05, "loss": 0.8755, "step": 43065 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.654872729323834e-05, "loss": 0.9929, "step": 43070 }, { "epoch": 0.62, "grad_norm": 0.57421875, "learning_rate": 7.652439001015145e-05, "loss": 0.9484, "step": 43075 }, { "epoch": 0.62, "grad_norm": 0.5703125, "learning_rate": 7.650005419854252e-05, "loss": 1.0401, "step": 43080 }, { "epoch": 0.62, "grad_norm": 0.609375, "learning_rate": 7.647571985993697e-05, "loss": 1.1313, "step": 43085 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.645138699586013e-05, "loss": 0.8608, "step": 43090 }, { "epoch": 0.62, "grad_norm": 0.67578125, "learning_rate": 7.64270556078371e-05, "loss": 1.0649, "step": 43095 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.640272569739316e-05, "loss": 1.0124, "step": 43100 }, { "epoch": 0.62, "grad_norm": 0.478515625, "learning_rate": 7.637839726605318e-05, "loss": 0.8626, "step": 43105 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.635407031534218e-05, "loss": 0.8512, "step": 43110 }, { "epoch": 0.62, "grad_norm": 0.63671875, "learning_rate": 7.632974484678499e-05, "loss": 1.1224, "step": 43115 }, { "epoch": 0.62, "grad_norm": 0.65234375, "learning_rate": 7.630542086190633e-05, "loss": 0.9001, "step": 43120 }, { "epoch": 0.62, "grad_norm": 0.419921875, "learning_rate": 7.62810983622309e-05, "loss": 0.9177, "step": 43125 }, { "epoch": 0.62, "grad_norm": 0.51171875, "learning_rate": 7.625677734928322e-05, "loss": 0.932, "step": 43130 }, { "epoch": 0.62, "grad_norm": 0.703125, "learning_rate": 7.623245782458777e-05, "loss": 1.0339, "step": 43135 }, { "epoch": 0.62, "grad_norm": 0.51953125, "learning_rate": 7.620813978966895e-05, "loss": 0.9261, "step": 43140 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.618382324605104e-05, "loss": 0.9913, "step": 43145 }, { "epoch": 0.62, "grad_norm": 0.6171875, "learning_rate": 7.615950819525821e-05, "loss": 0.9563, "step": 43150 }, { "epoch": 0.62, "grad_norm": 0.55859375, "learning_rate": 7.613519463881456e-05, "loss": 0.8773, "step": 43155 }, { "epoch": 0.62, "grad_norm": 0.50390625, "learning_rate": 7.611088257824405e-05, "loss": 0.8426, "step": 43160 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.608657201507066e-05, "loss": 1.0284, "step": 43165 }, { "epoch": 0.62, "grad_norm": 0.5859375, "learning_rate": 7.606226295081815e-05, "loss": 0.9378, "step": 43170 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.603795538701026e-05, "loss": 0.983, "step": 43175 }, { "epoch": 0.62, "grad_norm": 0.61328125, "learning_rate": 7.601364932517065e-05, "loss": 0.9846, "step": 43180 }, { "epoch": 0.62, "grad_norm": 0.55859375, "learning_rate": 7.598934476682279e-05, "loss": 0.8697, "step": 43185 }, { "epoch": 0.62, "grad_norm": 0.58984375, "learning_rate": 7.596504171349013e-05, "loss": 1.0234, "step": 43190 }, { "epoch": 0.62, "grad_norm": 0.625, "learning_rate": 7.594074016669606e-05, "loss": 0.9989, "step": 43195 }, { "epoch": 0.62, "grad_norm": 0.61328125, "learning_rate": 7.59164401279638e-05, "loss": 0.9878, "step": 43200 }, { "epoch": 0.62, "grad_norm": 0.56640625, "learning_rate": 7.589214159881652e-05, "loss": 0.8622, "step": 43205 }, { "epoch": 0.62, "grad_norm": 0.6796875, "learning_rate": 7.586784458077723e-05, "loss": 1.1644, "step": 43210 }, { "epoch": 0.62, "grad_norm": 0.6328125, "learning_rate": 7.584354907536892e-05, "loss": 1.0065, "step": 43215 }, { "epoch": 0.62, "grad_norm": 0.61328125, "learning_rate": 7.581925508411448e-05, "loss": 1.0772, "step": 43220 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.579496260853666e-05, "loss": 0.8406, "step": 43225 }, { "epoch": 0.62, "grad_norm": 0.5390625, "learning_rate": 7.577067165015815e-05, "loss": 0.8243, "step": 43230 }, { "epoch": 0.62, "grad_norm": 0.5703125, "learning_rate": 7.574638221050156e-05, "loss": 0.8884, "step": 43235 }, { "epoch": 0.62, "grad_norm": 0.5390625, "learning_rate": 7.572209429108935e-05, "loss": 0.9756, "step": 43240 }, { "epoch": 0.62, "grad_norm": 0.5390625, "learning_rate": 7.569780789344388e-05, "loss": 0.9726, "step": 43245 }, { "epoch": 0.62, "grad_norm": 0.57421875, "learning_rate": 7.567352301908752e-05, "loss": 0.9453, "step": 43250 }, { "epoch": 0.62, "grad_norm": 0.52734375, "learning_rate": 7.564923966954247e-05, "loss": 0.8655, "step": 43255 }, { "epoch": 0.62, "grad_norm": 0.51953125, "learning_rate": 7.562495784633078e-05, "loss": 0.8646, "step": 43260 }, { "epoch": 0.62, "grad_norm": 0.57421875, "learning_rate": 7.56006775509745e-05, "loss": 1.0515, "step": 43265 }, { "epoch": 0.62, "grad_norm": 0.546875, "learning_rate": 7.557639878499551e-05, "loss": 0.932, "step": 43270 }, { "epoch": 0.62, "grad_norm": 0.59765625, "learning_rate": 7.555212154991569e-05, "loss": 0.9745, "step": 43275 }, { "epoch": 0.62, "grad_norm": 0.546875, "learning_rate": 7.552784584725674e-05, "loss": 1.104, "step": 43280 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.550357167854025e-05, "loss": 1.0603, "step": 43285 }, { "epoch": 0.62, "grad_norm": 0.58984375, "learning_rate": 7.547929904528783e-05, "loss": 0.8669, "step": 43290 }, { "epoch": 0.62, "grad_norm": 0.55859375, "learning_rate": 7.545502794902084e-05, "loss": 0.8064, "step": 43295 }, { "epoch": 0.62, "grad_norm": 0.6875, "learning_rate": 7.543075839126065e-05, "loss": 1.0797, "step": 43300 }, { "epoch": 0.62, "grad_norm": 0.578125, "learning_rate": 7.540649037352853e-05, "loss": 0.9345, "step": 43305 }, { "epoch": 0.62, "grad_norm": 0.58203125, "learning_rate": 7.538222389734561e-05, "loss": 0.9949, "step": 43310 }, { "epoch": 0.62, "grad_norm": 0.52734375, "learning_rate": 7.535795896423292e-05, "loss": 0.9763, "step": 43315 }, { "epoch": 0.62, "grad_norm": 0.515625, "learning_rate": 7.533369557571144e-05, "loss": 0.9526, "step": 43320 }, { "epoch": 0.62, "grad_norm": 0.486328125, "learning_rate": 7.530943373330197e-05, "loss": 0.9384, "step": 43325 }, { "epoch": 0.62, "grad_norm": 0.66015625, "learning_rate": 7.528517343852535e-05, "loss": 1.0181, "step": 43330 }, { "epoch": 0.62, "grad_norm": 0.55078125, "learning_rate": 7.526091469290221e-05, "loss": 0.8762, "step": 43335 }, { "epoch": 0.62, "grad_norm": 0.470703125, "learning_rate": 7.523665749795308e-05, "loss": 0.8089, "step": 43340 }, { "epoch": 0.62, "grad_norm": 0.51953125, "learning_rate": 7.521240185519849e-05, "loss": 0.9557, "step": 43345 }, { "epoch": 0.62, "grad_norm": 0.57421875, "learning_rate": 7.518814776615878e-05, "loss": 0.8498, "step": 43350 }, { "epoch": 0.62, "grad_norm": 0.498046875, "learning_rate": 7.51638952323542e-05, "loss": 1.0413, "step": 43355 }, { "epoch": 0.62, "grad_norm": 0.53125, "learning_rate": 7.513964425530502e-05, "loss": 0.8996, "step": 43360 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.511539483653119e-05, "loss": 1.0167, "step": 43365 }, { "epoch": 0.62, "grad_norm": 0.478515625, "learning_rate": 7.509114697755277e-05, "loss": 0.9066, "step": 43370 }, { "epoch": 0.62, "grad_norm": 0.6015625, "learning_rate": 7.506690067988963e-05, "loss": 0.9654, "step": 43375 }, { "epoch": 0.62, "grad_norm": 0.55078125, "learning_rate": 7.504265594506152e-05, "loss": 1.0211, "step": 43380 }, { "epoch": 0.62, "grad_norm": 0.5234375, "learning_rate": 7.50184127745882e-05, "loss": 0.9962, "step": 43385 }, { "epoch": 0.62, "grad_norm": 0.5859375, "learning_rate": 7.499417116998921e-05, "loss": 1.0111, "step": 43390 }, { "epoch": 0.62, "grad_norm": 0.56640625, "learning_rate": 7.496993113278403e-05, "loss": 0.9227, "step": 43395 }, { "epoch": 0.62, "grad_norm": 0.609375, "learning_rate": 7.49456926644921e-05, "loss": 0.8432, "step": 43400 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.492145576663267e-05, "loss": 0.9982, "step": 43405 }, { "epoch": 0.62, "grad_norm": 0.5625, "learning_rate": 7.489722044072493e-05, "loss": 0.9431, "step": 43410 }, { "epoch": 0.62, "grad_norm": 0.578125, "learning_rate": 7.487298668828809e-05, "loss": 1.0562, "step": 43415 }, { "epoch": 0.62, "grad_norm": 0.50390625, "learning_rate": 7.484875451084098e-05, "loss": 1.0369, "step": 43420 }, { "epoch": 0.62, "grad_norm": 0.515625, "learning_rate": 7.482452390990262e-05, "loss": 1.0191, "step": 43425 }, { "epoch": 0.62, "grad_norm": 0.64453125, "learning_rate": 7.480029488699177e-05, "loss": 1.0395, "step": 43430 }, { "epoch": 0.62, "grad_norm": 0.62890625, "learning_rate": 7.47760674436271e-05, "loss": 0.9695, "step": 43435 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.475184158132728e-05, "loss": 0.9514, "step": 43440 }, { "epoch": 0.62, "grad_norm": 0.59765625, "learning_rate": 7.472761730161079e-05, "loss": 1.0904, "step": 43445 }, { "epoch": 0.62, "grad_norm": 0.5078125, "learning_rate": 7.470339460599601e-05, "loss": 0.8637, "step": 43450 }, { "epoch": 0.62, "grad_norm": 0.58984375, "learning_rate": 7.467917349600129e-05, "loss": 0.7954, "step": 43455 }, { "epoch": 0.62, "grad_norm": 0.57421875, "learning_rate": 7.46549539731448e-05, "loss": 0.9869, "step": 43460 }, { "epoch": 0.62, "grad_norm": 0.63671875, "learning_rate": 7.463073603894469e-05, "loss": 0.8432, "step": 43465 }, { "epoch": 0.62, "grad_norm": 0.55859375, "learning_rate": 7.460651969491892e-05, "loss": 0.8657, "step": 43470 }, { "epoch": 0.62, "grad_norm": 0.515625, "learning_rate": 7.45823049425854e-05, "loss": 0.9686, "step": 43475 }, { "epoch": 0.62, "grad_norm": 0.51171875, "learning_rate": 7.455809178346196e-05, "loss": 1.0432, "step": 43480 }, { "epoch": 0.62, "grad_norm": 0.55859375, "learning_rate": 7.453388021906631e-05, "loss": 0.7601, "step": 43485 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.450967025091604e-05, "loss": 0.9924, "step": 43490 }, { "epoch": 0.62, "grad_norm": 0.5234375, "learning_rate": 7.448546188052867e-05, "loss": 0.9843, "step": 43495 }, { "epoch": 0.62, "grad_norm": 0.458984375, "learning_rate": 7.446125510942162e-05, "loss": 0.9573, "step": 43500 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.443704993911216e-05, "loss": 0.9863, "step": 43505 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.441284637111755e-05, "loss": 0.9481, "step": 43510 }, { "epoch": 0.62, "grad_norm": 0.5703125, "learning_rate": 7.438864440695487e-05, "loss": 0.9117, "step": 43515 }, { "epoch": 0.62, "grad_norm": 0.55859375, "learning_rate": 7.436444404814115e-05, "loss": 0.9144, "step": 43520 }, { "epoch": 0.62, "grad_norm": 0.52734375, "learning_rate": 7.434024529619325e-05, "loss": 0.9627, "step": 43525 }, { "epoch": 0.62, "grad_norm": 0.55078125, "learning_rate": 7.431604815262799e-05, "loss": 1.0474, "step": 43530 }, { "epoch": 0.62, "grad_norm": 0.470703125, "learning_rate": 7.429185261896207e-05, "loss": 1.0196, "step": 43535 }, { "epoch": 0.62, "grad_norm": 0.515625, "learning_rate": 7.426765869671214e-05, "loss": 0.8849, "step": 43540 }, { "epoch": 0.62, "grad_norm": 0.5078125, "learning_rate": 7.424346638739463e-05, "loss": 0.9278, "step": 43545 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.421927569252601e-05, "loss": 0.9472, "step": 43550 }, { "epoch": 0.62, "grad_norm": 0.494140625, "learning_rate": 7.419508661362255e-05, "loss": 0.9385, "step": 43555 }, { "epoch": 0.62, "grad_norm": 0.59375, "learning_rate": 7.417089915220044e-05, "loss": 0.8907, "step": 43560 }, { "epoch": 0.62, "grad_norm": 0.6171875, "learning_rate": 7.41467133097758e-05, "loss": 1.055, "step": 43565 }, { "epoch": 0.62, "grad_norm": 0.53125, "learning_rate": 7.412252908786463e-05, "loss": 0.9292, "step": 43570 }, { "epoch": 0.63, "grad_norm": 0.61328125, "learning_rate": 7.409834648798279e-05, "loss": 1.0102, "step": 43575 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.407416551164608e-05, "loss": 0.9795, "step": 43580 }, { "epoch": 0.63, "grad_norm": 0.478515625, "learning_rate": 7.404998616037022e-05, "loss": 0.8821, "step": 43585 }, { "epoch": 0.63, "grad_norm": 0.5703125, "learning_rate": 7.402580843567078e-05, "loss": 0.8465, "step": 43590 }, { "epoch": 0.63, "grad_norm": 0.5703125, "learning_rate": 7.400163233906324e-05, "loss": 1.0351, "step": 43595 }, { "epoch": 0.63, "grad_norm": 0.578125, "learning_rate": 7.397745787206298e-05, "loss": 0.9298, "step": 43600 }, { "epoch": 0.63, "grad_norm": 0.55078125, "learning_rate": 7.395328503618533e-05, "loss": 0.982, "step": 43605 }, { "epoch": 0.63, "grad_norm": 0.55078125, "learning_rate": 7.392911383294543e-05, "loss": 0.9462, "step": 43610 }, { "epoch": 0.63, "grad_norm": 0.53125, "learning_rate": 7.390494426385835e-05, "loss": 1.0016, "step": 43615 }, { "epoch": 0.63, "grad_norm": 0.5234375, "learning_rate": 7.388077633043908e-05, "loss": 0.8682, "step": 43620 }, { "epoch": 0.63, "grad_norm": 0.578125, "learning_rate": 7.385661003420255e-05, "loss": 0.9146, "step": 43625 }, { "epoch": 0.63, "grad_norm": 0.5546875, "learning_rate": 7.383244537666345e-05, "loss": 0.896, "step": 43630 }, { "epoch": 0.63, "grad_norm": 0.57421875, "learning_rate": 7.380828235933644e-05, "loss": 1.0661, "step": 43635 }, { "epoch": 0.63, "grad_norm": 0.640625, "learning_rate": 7.378412098373616e-05, "loss": 0.909, "step": 43640 }, { "epoch": 0.63, "grad_norm": 0.58984375, "learning_rate": 7.375996125137704e-05, "loss": 1.0145, "step": 43645 }, { "epoch": 0.63, "grad_norm": 0.59765625, "learning_rate": 7.373580316377337e-05, "loss": 0.8261, "step": 43650 }, { "epoch": 0.63, "grad_norm": 0.57421875, "learning_rate": 7.371164672243953e-05, "loss": 0.9722, "step": 43655 }, { "epoch": 0.63, "grad_norm": 0.62890625, "learning_rate": 7.36874919288896e-05, "loss": 0.856, "step": 43660 }, { "epoch": 0.63, "grad_norm": 0.578125, "learning_rate": 7.366333878463765e-05, "loss": 1.0439, "step": 43665 }, { "epoch": 0.63, "grad_norm": 0.52734375, "learning_rate": 7.36391872911976e-05, "loss": 1.074, "step": 43670 }, { "epoch": 0.63, "grad_norm": 0.53125, "learning_rate": 7.361503745008335e-05, "loss": 0.8275, "step": 43675 }, { "epoch": 0.63, "grad_norm": 0.61328125, "learning_rate": 7.359088926280858e-05, "loss": 0.9063, "step": 43680 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.356674273088695e-05, "loss": 0.9664, "step": 43685 }, { "epoch": 0.63, "grad_norm": 0.56640625, "learning_rate": 7.354259785583197e-05, "loss": 0.9558, "step": 43690 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.351845463915711e-05, "loss": 0.9554, "step": 43695 }, { "epoch": 0.63, "grad_norm": 0.5703125, "learning_rate": 7.349431308237568e-05, "loss": 1.1306, "step": 43700 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.347017318700087e-05, "loss": 0.8899, "step": 43705 }, { "epoch": 0.63, "grad_norm": 0.6015625, "learning_rate": 7.344603495454582e-05, "loss": 0.9215, "step": 43710 }, { "epoch": 0.63, "grad_norm": 0.5625, "learning_rate": 7.342189838652357e-05, "loss": 0.9706, "step": 43715 }, { "epoch": 0.63, "grad_norm": 0.5859375, "learning_rate": 7.339776348444696e-05, "loss": 0.8483, "step": 43720 }, { "epoch": 0.63, "grad_norm": 0.51953125, "learning_rate": 7.337363024982886e-05, "loss": 0.8448, "step": 43725 }, { "epoch": 0.63, "grad_norm": 0.578125, "learning_rate": 7.334949868418197e-05, "loss": 0.8894, "step": 43730 }, { "epoch": 0.63, "grad_norm": 0.5703125, "learning_rate": 7.332536878901882e-05, "loss": 0.9876, "step": 43735 }, { "epoch": 0.63, "grad_norm": 0.62109375, "learning_rate": 7.330124056585194e-05, "loss": 0.983, "step": 43740 }, { "epoch": 0.63, "grad_norm": 0.62890625, "learning_rate": 7.327711401619369e-05, "loss": 1.0151, "step": 43745 }, { "epoch": 0.63, "grad_norm": 0.515625, "learning_rate": 7.325298914155638e-05, "loss": 1.0221, "step": 43750 }, { "epoch": 0.63, "grad_norm": 0.55078125, "learning_rate": 7.322886594345218e-05, "loss": 0.854, "step": 43755 }, { "epoch": 0.63, "grad_norm": 0.53125, "learning_rate": 7.320474442339314e-05, "loss": 0.8749, "step": 43760 }, { "epoch": 0.63, "grad_norm": 0.490234375, "learning_rate": 7.318062458289124e-05, "loss": 0.8302, "step": 43765 }, { "epoch": 0.63, "grad_norm": 0.66796875, "learning_rate": 7.315650642345835e-05, "loss": 1.0153, "step": 43770 }, { "epoch": 0.63, "grad_norm": 0.5390625, "learning_rate": 7.313238994660618e-05, "loss": 1.0044, "step": 43775 }, { "epoch": 0.63, "grad_norm": 0.50390625, "learning_rate": 7.310827515384648e-05, "loss": 0.9339, "step": 43780 }, { "epoch": 0.63, "grad_norm": 0.5625, "learning_rate": 7.308416204669063e-05, "loss": 0.8673, "step": 43785 }, { "epoch": 0.63, "grad_norm": 0.546875, "learning_rate": 7.30600506266502e-05, "loss": 0.9973, "step": 43790 }, { "epoch": 0.63, "grad_norm": 0.50390625, "learning_rate": 7.303594089523649e-05, "loss": 0.818, "step": 43795 }, { "epoch": 0.63, "grad_norm": 0.50390625, "learning_rate": 7.301183285396068e-05, "loss": 0.8573, "step": 43800 }, { "epoch": 0.63, "grad_norm": 0.51171875, "learning_rate": 7.298772650433394e-05, "loss": 0.9827, "step": 43805 }, { "epoch": 0.63, "grad_norm": 0.50390625, "learning_rate": 7.296362184786728e-05, "loss": 0.9497, "step": 43810 }, { "epoch": 0.63, "grad_norm": 0.5234375, "learning_rate": 7.293951888607156e-05, "loss": 1.0426, "step": 43815 }, { "epoch": 0.63, "grad_norm": 0.49609375, "learning_rate": 7.291541762045765e-05, "loss": 0.9381, "step": 43820 }, { "epoch": 0.63, "grad_norm": 0.52734375, "learning_rate": 7.28913180525362e-05, "loss": 0.8794, "step": 43825 }, { "epoch": 0.63, "grad_norm": 0.5390625, "learning_rate": 7.28672201838178e-05, "loss": 0.9819, "step": 43830 }, { "epoch": 0.63, "grad_norm": 0.51953125, "learning_rate": 7.2843124015813e-05, "loss": 1.1377, "step": 43835 }, { "epoch": 0.63, "grad_norm": 0.52734375, "learning_rate": 7.281902955003204e-05, "loss": 1.0289, "step": 43840 }, { "epoch": 0.63, "grad_norm": 0.50390625, "learning_rate": 7.279493678798529e-05, "loss": 0.9459, "step": 43845 }, { "epoch": 0.63, "grad_norm": 0.6640625, "learning_rate": 7.277084573118289e-05, "loss": 0.9653, "step": 43850 }, { "epoch": 0.63, "grad_norm": 0.53515625, "learning_rate": 7.274675638113486e-05, "loss": 0.8437, "step": 43855 }, { "epoch": 0.63, "grad_norm": 0.51171875, "learning_rate": 7.272266873935122e-05, "loss": 0.974, "step": 43860 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.269858280734176e-05, "loss": 1.1957, "step": 43865 }, { "epoch": 0.63, "grad_norm": 0.59375, "learning_rate": 7.26744985866162e-05, "loss": 0.9526, "step": 43870 }, { "epoch": 0.63, "grad_norm": 0.5078125, "learning_rate": 7.265041607868422e-05, "loss": 0.8111, "step": 43875 }, { "epoch": 0.63, "grad_norm": 0.59765625, "learning_rate": 7.262633528505529e-05, "loss": 0.8836, "step": 43880 }, { "epoch": 0.63, "grad_norm": 0.578125, "learning_rate": 7.260225620723888e-05, "loss": 0.8641, "step": 43885 }, { "epoch": 0.63, "grad_norm": 0.5546875, "learning_rate": 7.257817884674421e-05, "loss": 0.82, "step": 43890 }, { "epoch": 0.63, "grad_norm": 0.5546875, "learning_rate": 7.255410320508052e-05, "loss": 1.0119, "step": 43895 }, { "epoch": 0.63, "grad_norm": 0.51953125, "learning_rate": 7.253002928375692e-05, "loss": 0.8987, "step": 43900 }, { "epoch": 0.63, "grad_norm": 0.53515625, "learning_rate": 7.250595708428236e-05, "loss": 1.0118, "step": 43905 }, { "epoch": 0.63, "grad_norm": 0.59375, "learning_rate": 7.248188660816571e-05, "loss": 1.0069, "step": 43910 }, { "epoch": 0.63, "grad_norm": 0.5234375, "learning_rate": 7.245781785691576e-05, "loss": 0.9783, "step": 43915 }, { "epoch": 0.63, "grad_norm": 0.62109375, "learning_rate": 7.243375083204116e-05, "loss": 0.9433, "step": 43920 }, { "epoch": 0.63, "grad_norm": 0.5078125, "learning_rate": 7.240968553505043e-05, "loss": 0.9561, "step": 43925 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.238562196745206e-05, "loss": 0.9336, "step": 43930 }, { "epoch": 0.63, "grad_norm": 0.57421875, "learning_rate": 7.236156013075435e-05, "loss": 1.0196, "step": 43935 }, { "epoch": 0.63, "grad_norm": 0.53515625, "learning_rate": 7.233750002646555e-05, "loss": 0.9104, "step": 43940 }, { "epoch": 0.63, "grad_norm": 0.5390625, "learning_rate": 7.231344165609375e-05, "loss": 0.9293, "step": 43945 }, { "epoch": 0.63, "grad_norm": 0.53515625, "learning_rate": 7.228938502114692e-05, "loss": 0.9552, "step": 43950 }, { "epoch": 0.63, "grad_norm": 0.5859375, "learning_rate": 7.226533012313301e-05, "loss": 0.8964, "step": 43955 }, { "epoch": 0.63, "grad_norm": 0.57421875, "learning_rate": 7.224127696355981e-05, "loss": 0.889, "step": 43960 }, { "epoch": 0.63, "grad_norm": 0.546875, "learning_rate": 7.221722554393496e-05, "loss": 1.1623, "step": 43965 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.219317586576609e-05, "loss": 0.8779, "step": 43970 }, { "epoch": 0.63, "grad_norm": 0.578125, "learning_rate": 7.21691279305606e-05, "loss": 0.8894, "step": 43975 }, { "epoch": 0.63, "grad_norm": 0.52734375, "learning_rate": 7.214508173982585e-05, "loss": 1.0329, "step": 43980 }, { "epoch": 0.63, "grad_norm": 0.609375, "learning_rate": 7.212103729506914e-05, "loss": 0.8058, "step": 43985 }, { "epoch": 0.63, "grad_norm": 0.5234375, "learning_rate": 7.209699459779758e-05, "loss": 0.8913, "step": 43990 }, { "epoch": 0.63, "grad_norm": 0.625, "learning_rate": 7.207295364951814e-05, "loss": 0.9224, "step": 43995 }, { "epoch": 0.63, "grad_norm": 0.5546875, "learning_rate": 7.204891445173779e-05, "loss": 1.0408, "step": 44000 }, { "epoch": 0.63, "grad_norm": 0.640625, "learning_rate": 7.202487700596328e-05, "loss": 1.0718, "step": 44005 }, { "epoch": 0.63, "grad_norm": 0.64453125, "learning_rate": 7.200084131370138e-05, "loss": 0.91, "step": 44010 }, { "epoch": 0.63, "grad_norm": 0.65234375, "learning_rate": 7.197680737645861e-05, "loss": 0.8467, "step": 44015 }, { "epoch": 0.63, "grad_norm": 0.51171875, "learning_rate": 7.195277519574147e-05, "loss": 0.9021, "step": 44020 }, { "epoch": 0.63, "grad_norm": 0.52734375, "learning_rate": 7.192874477305633e-05, "loss": 0.8741, "step": 44025 }, { "epoch": 0.63, "grad_norm": 0.6640625, "learning_rate": 7.190471610990944e-05, "loss": 1.0427, "step": 44030 }, { "epoch": 0.63, "grad_norm": 0.6875, "learning_rate": 7.188068920780692e-05, "loss": 0.9584, "step": 44035 }, { "epoch": 0.63, "grad_norm": 0.5859375, "learning_rate": 7.185666406825486e-05, "loss": 0.9775, "step": 44040 }, { "epoch": 0.63, "grad_norm": 0.65234375, "learning_rate": 7.183264069275915e-05, "loss": 0.9982, "step": 44045 }, { "epoch": 0.63, "grad_norm": 0.56640625, "learning_rate": 7.18086190828256e-05, "loss": 0.9059, "step": 44050 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.17845992399599e-05, "loss": 0.9274, "step": 44055 }, { "epoch": 0.63, "grad_norm": 0.5390625, "learning_rate": 7.176058116566764e-05, "loss": 1.0291, "step": 44060 }, { "epoch": 0.63, "grad_norm": 0.671875, "learning_rate": 7.173656486145434e-05, "loss": 0.8917, "step": 44065 }, { "epoch": 0.63, "grad_norm": 0.5390625, "learning_rate": 7.171255032882534e-05, "loss": 0.9251, "step": 44070 }, { "epoch": 0.63, "grad_norm": 0.51953125, "learning_rate": 7.168853756928587e-05, "loss": 1.0542, "step": 44075 }, { "epoch": 0.63, "grad_norm": 0.796875, "learning_rate": 7.166452658434115e-05, "loss": 1.0037, "step": 44080 }, { "epoch": 0.63, "grad_norm": 0.7890625, "learning_rate": 7.164051737549615e-05, "loss": 0.9898, "step": 44085 }, { "epoch": 0.63, "grad_norm": 0.51171875, "learning_rate": 7.161650994425582e-05, "loss": 0.8652, "step": 44090 }, { "epoch": 0.63, "grad_norm": 0.5625, "learning_rate": 7.159250429212503e-05, "loss": 1.0474, "step": 44095 }, { "epoch": 0.63, "grad_norm": 0.51953125, "learning_rate": 7.156850042060837e-05, "loss": 1.0262, "step": 44100 }, { "epoch": 0.63, "grad_norm": 0.63671875, "learning_rate": 7.154449833121049e-05, "loss": 0.8672, "step": 44105 }, { "epoch": 0.63, "grad_norm": 0.57421875, "learning_rate": 7.152049802543587e-05, "loss": 0.8591, "step": 44110 }, { "epoch": 0.63, "grad_norm": 0.498046875, "learning_rate": 7.149649950478884e-05, "loss": 0.9944, "step": 44115 }, { "epoch": 0.63, "grad_norm": 0.62890625, "learning_rate": 7.147250277077371e-05, "loss": 0.9759, "step": 44120 }, { "epoch": 0.63, "grad_norm": 0.5390625, "learning_rate": 7.14485078248946e-05, "loss": 0.9551, "step": 44125 }, { "epoch": 0.63, "grad_norm": 0.55078125, "learning_rate": 7.142451466865551e-05, "loss": 0.9274, "step": 44130 }, { "epoch": 0.63, "grad_norm": 0.5625, "learning_rate": 7.140052330356042e-05, "loss": 0.9259, "step": 44135 }, { "epoch": 0.63, "grad_norm": 0.63671875, "learning_rate": 7.137653373111309e-05, "loss": 1.0829, "step": 44140 }, { "epoch": 0.63, "grad_norm": 0.578125, "learning_rate": 7.135254595281719e-05, "loss": 0.982, "step": 44145 }, { "epoch": 0.63, "grad_norm": 0.62109375, "learning_rate": 7.132855997017642e-05, "loss": 0.8594, "step": 44150 }, { "epoch": 0.63, "grad_norm": 0.52734375, "learning_rate": 7.13045757846941e-05, "loss": 1.0339, "step": 44155 }, { "epoch": 0.63, "grad_norm": 0.58984375, "learning_rate": 7.128059339787368e-05, "loss": 0.974, "step": 44160 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.125661281121837e-05, "loss": 0.8284, "step": 44165 }, { "epoch": 0.63, "grad_norm": 0.53125, "learning_rate": 7.123263402623125e-05, "loss": 0.9314, "step": 44170 }, { "epoch": 0.63, "grad_norm": 0.5234375, "learning_rate": 7.120865704441546e-05, "loss": 0.9241, "step": 44175 }, { "epoch": 0.63, "grad_norm": 0.53515625, "learning_rate": 7.11846818672738e-05, "loss": 0.9662, "step": 44180 }, { "epoch": 0.63, "grad_norm": 0.578125, "learning_rate": 7.116070849630911e-05, "loss": 1.0086, "step": 44185 }, { "epoch": 0.63, "grad_norm": 0.59765625, "learning_rate": 7.113673693302406e-05, "loss": 0.9713, "step": 44190 }, { "epoch": 0.63, "grad_norm": 0.51171875, "learning_rate": 7.111276717892121e-05, "loss": 0.9079, "step": 44195 }, { "epoch": 0.63, "grad_norm": 0.56640625, "learning_rate": 7.108879923550305e-05, "loss": 1.0538, "step": 44200 }, { "epoch": 0.63, "grad_norm": 0.59375, "learning_rate": 7.106483310427184e-05, "loss": 0.9012, "step": 44205 }, { "epoch": 0.63, "grad_norm": 0.48828125, "learning_rate": 7.104086878672984e-05, "loss": 0.8179, "step": 44210 }, { "epoch": 0.63, "grad_norm": 0.50390625, "learning_rate": 7.101690628437918e-05, "loss": 0.8541, "step": 44215 }, { "epoch": 0.63, "grad_norm": 0.609375, "learning_rate": 7.099294559872184e-05, "loss": 1.0969, "step": 44220 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.096898673125969e-05, "loss": 1.0144, "step": 44225 }, { "epoch": 0.63, "grad_norm": 0.5546875, "learning_rate": 7.094502968349453e-05, "loss": 0.9345, "step": 44230 }, { "epoch": 0.63, "grad_norm": 0.6015625, "learning_rate": 7.092107445692802e-05, "loss": 1.0356, "step": 44235 }, { "epoch": 0.63, "grad_norm": 0.5546875, "learning_rate": 7.089712105306163e-05, "loss": 1.0366, "step": 44240 }, { "epoch": 0.63, "grad_norm": 0.6171875, "learning_rate": 7.087316947339689e-05, "loss": 0.9069, "step": 44245 }, { "epoch": 0.63, "grad_norm": 0.5390625, "learning_rate": 7.084921971943503e-05, "loss": 0.9973, "step": 44250 }, { "epoch": 0.63, "grad_norm": 0.53515625, "learning_rate": 7.082527179267731e-05, "loss": 0.9333, "step": 44255 }, { "epoch": 0.63, "grad_norm": 0.53515625, "learning_rate": 7.080132569462474e-05, "loss": 1.0377, "step": 44260 }, { "epoch": 0.63, "grad_norm": 0.65625, "learning_rate": 7.077738142677836e-05, "loss": 0.8374, "step": 44265 }, { "epoch": 0.64, "grad_norm": 0.58984375, "learning_rate": 7.0753438990639e-05, "loss": 0.9651, "step": 44270 }, { "epoch": 0.64, "grad_norm": 0.5625, "learning_rate": 7.072949838770737e-05, "loss": 1.0695, "step": 44275 }, { "epoch": 0.64, "grad_norm": 0.56640625, "learning_rate": 7.07055596194841e-05, "loss": 0.9352, "step": 44280 }, { "epoch": 0.64, "grad_norm": 0.60546875, "learning_rate": 7.068162268746975e-05, "loss": 0.8394, "step": 44285 }, { "epoch": 0.64, "grad_norm": 0.58203125, "learning_rate": 7.065768759316468e-05, "loss": 1.0907, "step": 44290 }, { "epoch": 0.64, "grad_norm": 0.5, "learning_rate": 7.063375433806914e-05, "loss": 0.9587, "step": 44295 }, { "epoch": 0.64, "grad_norm": 0.6015625, "learning_rate": 7.060982292368334e-05, "loss": 1.0745, "step": 44300 }, { "epoch": 0.64, "grad_norm": 0.578125, "learning_rate": 7.058589335150734e-05, "loss": 0.9421, "step": 44305 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 7.056196562304103e-05, "loss": 0.9171, "step": 44310 }, { "epoch": 0.64, "grad_norm": 0.56640625, "learning_rate": 7.053803973978423e-05, "loss": 0.9543, "step": 44315 }, { "epoch": 0.64, "grad_norm": 0.56640625, "learning_rate": 7.051411570323665e-05, "loss": 0.8534, "step": 44320 }, { "epoch": 0.64, "grad_norm": 0.53515625, "learning_rate": 7.04901935148979e-05, "loss": 0.7847, "step": 44325 }, { "epoch": 0.64, "grad_norm": 0.625, "learning_rate": 7.04662731762674e-05, "loss": 0.9292, "step": 44330 }, { "epoch": 0.64, "grad_norm": 0.59375, "learning_rate": 7.044235468884455e-05, "loss": 0.9307, "step": 44335 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 7.041843805412855e-05, "loss": 0.9345, "step": 44340 }, { "epoch": 0.64, "grad_norm": 0.5703125, "learning_rate": 7.039452327361857e-05, "loss": 0.8205, "step": 44345 }, { "epoch": 0.64, "grad_norm": 0.59375, "learning_rate": 7.037061034881358e-05, "loss": 0.9699, "step": 44350 }, { "epoch": 0.64, "grad_norm": 0.6171875, "learning_rate": 7.034669928121248e-05, "loss": 0.9605, "step": 44355 }, { "epoch": 0.64, "grad_norm": 0.625, "learning_rate": 7.032279007231406e-05, "loss": 1.0062, "step": 44360 }, { "epoch": 0.64, "grad_norm": 0.6171875, "learning_rate": 7.029888272361695e-05, "loss": 0.8797, "step": 44365 }, { "epoch": 0.64, "grad_norm": 0.546875, "learning_rate": 7.027497723661967e-05, "loss": 0.9759, "step": 44370 }, { "epoch": 0.64, "grad_norm": 0.63671875, "learning_rate": 7.025107361282069e-05, "loss": 0.9113, "step": 44375 }, { "epoch": 0.64, "grad_norm": 0.55859375, "learning_rate": 7.02271718537183e-05, "loss": 1.0109, "step": 44380 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 7.020327196081067e-05, "loss": 0.8384, "step": 44385 }, { "epoch": 0.64, "grad_norm": 0.56640625, "learning_rate": 7.01793739355959e-05, "loss": 0.9772, "step": 44390 }, { "epoch": 0.64, "grad_norm": 0.54296875, "learning_rate": 7.015547777957194e-05, "loss": 1.0573, "step": 44395 }, { "epoch": 0.64, "grad_norm": 0.58984375, "learning_rate": 7.01315834942366e-05, "loss": 1.0351, "step": 44400 }, { "epoch": 0.64, "grad_norm": 0.5234375, "learning_rate": 7.010769108108764e-05, "loss": 0.8791, "step": 44405 }, { "epoch": 0.64, "grad_norm": 0.58984375, "learning_rate": 7.008380054162268e-05, "loss": 0.9083, "step": 44410 }, { "epoch": 0.64, "grad_norm": 0.54296875, "learning_rate": 7.005991187733914e-05, "loss": 0.9679, "step": 44415 }, { "epoch": 0.64, "grad_norm": 0.55078125, "learning_rate": 7.003602508973444e-05, "loss": 0.8742, "step": 44420 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 7.001214018030578e-05, "loss": 0.8526, "step": 44425 }, { "epoch": 0.64, "grad_norm": 0.5703125, "learning_rate": 6.998825715055035e-05, "loss": 0.8964, "step": 44430 }, { "epoch": 0.64, "grad_norm": 0.55078125, "learning_rate": 6.996437600196514e-05, "loss": 1.1129, "step": 44435 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 6.994049673604703e-05, "loss": 0.8891, "step": 44440 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.991661935429284e-05, "loss": 0.8465, "step": 44445 }, { "epoch": 0.64, "grad_norm": 0.54296875, "learning_rate": 6.989274385819921e-05, "loss": 1.0433, "step": 44450 }, { "epoch": 0.64, "grad_norm": 0.65625, "learning_rate": 6.986887024926267e-05, "loss": 0.8986, "step": 44455 }, { "epoch": 0.64, "grad_norm": 0.53515625, "learning_rate": 6.984499852897968e-05, "loss": 1.0319, "step": 44460 }, { "epoch": 0.64, "grad_norm": 0.59765625, "learning_rate": 6.982112869884654e-05, "loss": 0.9768, "step": 44465 }, { "epoch": 0.64, "grad_norm": 0.734375, "learning_rate": 6.97972607603594e-05, "loss": 0.8718, "step": 44470 }, { "epoch": 0.64, "grad_norm": 0.56640625, "learning_rate": 6.977339471501436e-05, "loss": 0.969, "step": 44475 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.974953056430736e-05, "loss": 0.9752, "step": 44480 }, { "epoch": 0.64, "grad_norm": 0.5078125, "learning_rate": 6.972566830973423e-05, "loss": 0.8427, "step": 44485 }, { "epoch": 0.64, "grad_norm": 0.5234375, "learning_rate": 6.970180795279069e-05, "loss": 0.909, "step": 44490 }, { "epoch": 0.64, "grad_norm": 0.578125, "learning_rate": 6.967794949497233e-05, "loss": 0.9467, "step": 44495 }, { "epoch": 0.64, "grad_norm": 0.71484375, "learning_rate": 6.965409293777464e-05, "loss": 1.0362, "step": 44500 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.963023828269297e-05, "loss": 0.8744, "step": 44505 }, { "epoch": 0.64, "grad_norm": 0.578125, "learning_rate": 6.960638553122254e-05, "loss": 0.9793, "step": 44510 }, { "epoch": 0.64, "grad_norm": 0.4765625, "learning_rate": 6.958253468485853e-05, "loss": 0.9228, "step": 44515 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.955868574509583e-05, "loss": 0.9583, "step": 44520 }, { "epoch": 0.64, "grad_norm": 0.59765625, "learning_rate": 6.953483871342941e-05, "loss": 1.0902, "step": 44525 }, { "epoch": 0.64, "grad_norm": 0.61328125, "learning_rate": 6.951099359135399e-05, "loss": 1.0439, "step": 44530 }, { "epoch": 0.64, "grad_norm": 0.66796875, "learning_rate": 6.948715038036417e-05, "loss": 1.0237, "step": 44535 }, { "epoch": 0.64, "grad_norm": 0.5234375, "learning_rate": 6.946330908195457e-05, "loss": 1.0154, "step": 44540 }, { "epoch": 0.64, "grad_norm": 0.57421875, "learning_rate": 6.943946969761951e-05, "loss": 0.9348, "step": 44545 }, { "epoch": 0.64, "grad_norm": 0.546875, "learning_rate": 6.94156322288533e-05, "loss": 0.9672, "step": 44550 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 6.93917966771501e-05, "loss": 0.9573, "step": 44555 }, { "epoch": 0.64, "grad_norm": 0.57421875, "learning_rate": 6.936796304400395e-05, "loss": 1.1258, "step": 44560 }, { "epoch": 0.64, "grad_norm": 0.46875, "learning_rate": 6.934413133090872e-05, "loss": 0.9212, "step": 44565 }, { "epoch": 0.64, "grad_norm": 0.62890625, "learning_rate": 6.932030153935834e-05, "loss": 0.9884, "step": 44570 }, { "epoch": 0.64, "grad_norm": 0.57421875, "learning_rate": 6.929647367084633e-05, "loss": 0.9147, "step": 44575 }, { "epoch": 0.64, "grad_norm": 0.6328125, "learning_rate": 6.927264772686635e-05, "loss": 1.0419, "step": 44580 }, { "epoch": 0.64, "grad_norm": 0.640625, "learning_rate": 6.924882370891179e-05, "loss": 0.9673, "step": 44585 }, { "epoch": 0.64, "grad_norm": 0.609375, "learning_rate": 6.922500161847596e-05, "loss": 0.9509, "step": 44590 }, { "epoch": 0.64, "grad_norm": 0.55859375, "learning_rate": 6.92011814570521e-05, "loss": 1.022, "step": 44595 }, { "epoch": 0.64, "grad_norm": 0.59765625, "learning_rate": 6.917736322613329e-05, "loss": 1.0838, "step": 44600 }, { "epoch": 0.64, "grad_norm": 0.56640625, "learning_rate": 6.915354692721242e-05, "loss": 1.0785, "step": 44605 }, { "epoch": 0.64, "grad_norm": 0.5703125, "learning_rate": 6.912973256178236e-05, "loss": 1.054, "step": 44610 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.910592013133584e-05, "loss": 0.811, "step": 44615 }, { "epoch": 0.64, "grad_norm": 0.53515625, "learning_rate": 6.908210963736546e-05, "loss": 0.9427, "step": 44620 }, { "epoch": 0.64, "grad_norm": 0.51953125, "learning_rate": 6.905830108136362e-05, "loss": 0.8712, "step": 44625 }, { "epoch": 0.64, "grad_norm": 0.5546875, "learning_rate": 6.903449446482271e-05, "loss": 0.8157, "step": 44630 }, { "epoch": 0.64, "grad_norm": 0.6171875, "learning_rate": 6.901068978923495e-05, "loss": 1.0176, "step": 44635 }, { "epoch": 0.64, "grad_norm": 0.6171875, "learning_rate": 6.898688705609246e-05, "loss": 0.875, "step": 44640 }, { "epoch": 0.64, "grad_norm": 0.62109375, "learning_rate": 6.896308626688719e-05, "loss": 1.0577, "step": 44645 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 6.893928742311104e-05, "loss": 0.9952, "step": 44650 }, { "epoch": 0.64, "grad_norm": 0.494140625, "learning_rate": 6.891549052625574e-05, "loss": 0.9785, "step": 44655 }, { "epoch": 0.64, "grad_norm": 0.55859375, "learning_rate": 6.889169557781285e-05, "loss": 0.9298, "step": 44660 }, { "epoch": 0.64, "grad_norm": 0.55078125, "learning_rate": 6.886790257927395e-05, "loss": 0.9041, "step": 44665 }, { "epoch": 0.64, "grad_norm": 0.58203125, "learning_rate": 6.884411153213037e-05, "loss": 1.0325, "step": 44670 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.88203224378734e-05, "loss": 0.9072, "step": 44675 }, { "epoch": 0.64, "grad_norm": 0.53515625, "learning_rate": 6.879653529799408e-05, "loss": 1.0979, "step": 44680 }, { "epoch": 0.64, "grad_norm": 0.55078125, "learning_rate": 6.877275011398346e-05, "loss": 0.9291, "step": 44685 }, { "epoch": 0.64, "grad_norm": 0.546875, "learning_rate": 6.874896688733246e-05, "loss": 0.953, "step": 44690 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.872518561953178e-05, "loss": 0.9055, "step": 44695 }, { "epoch": 0.64, "grad_norm": 0.72265625, "learning_rate": 6.870140631207207e-05, "loss": 1.0469, "step": 44700 }, { "epoch": 0.64, "grad_norm": 0.5546875, "learning_rate": 6.86776289664439e-05, "loss": 0.885, "step": 44705 }, { "epoch": 0.64, "grad_norm": 0.59375, "learning_rate": 6.865385358413761e-05, "loss": 0.9648, "step": 44710 }, { "epoch": 0.64, "grad_norm": 0.66015625, "learning_rate": 6.863008016664344e-05, "loss": 0.9232, "step": 44715 }, { "epoch": 0.64, "grad_norm": 0.5703125, "learning_rate": 6.86063087154516e-05, "loss": 0.974, "step": 44720 }, { "epoch": 0.64, "grad_norm": 0.5625, "learning_rate": 6.85825392320521e-05, "loss": 1.0025, "step": 44725 }, { "epoch": 0.64, "grad_norm": 0.55078125, "learning_rate": 6.855877171793484e-05, "loss": 0.9794, "step": 44730 }, { "epoch": 0.64, "grad_norm": 0.546875, "learning_rate": 6.853500617458955e-05, "loss": 1.0223, "step": 44735 }, { "epoch": 0.64, "grad_norm": 0.54296875, "learning_rate": 6.851124260350588e-05, "loss": 1.0199, "step": 44740 }, { "epoch": 0.64, "grad_norm": 0.62109375, "learning_rate": 6.848748100617342e-05, "loss": 0.9964, "step": 44745 }, { "epoch": 0.64, "grad_norm": 0.494140625, "learning_rate": 6.846372138408152e-05, "loss": 0.8822, "step": 44750 }, { "epoch": 0.64, "grad_norm": 0.51953125, "learning_rate": 6.843996373871948e-05, "loss": 0.8908, "step": 44755 }, { "epoch": 0.64, "grad_norm": 0.57421875, "learning_rate": 6.841620807157647e-05, "loss": 0.9091, "step": 44760 }, { "epoch": 0.64, "grad_norm": 0.53515625, "learning_rate": 6.839245438414152e-05, "loss": 0.9188, "step": 44765 }, { "epoch": 0.64, "grad_norm": 0.609375, "learning_rate": 6.83687026779035e-05, "loss": 1.1365, "step": 44770 }, { "epoch": 0.64, "grad_norm": 0.58984375, "learning_rate": 6.834495295435123e-05, "loss": 1.0363, "step": 44775 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.832120521497339e-05, "loss": 0.9475, "step": 44780 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.829745946125847e-05, "loss": 0.9322, "step": 44785 }, { "epoch": 0.64, "grad_norm": 0.63671875, "learning_rate": 6.827371569469489e-05, "loss": 1.1476, "step": 44790 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 6.824997391677092e-05, "loss": 0.9128, "step": 44795 }, { "epoch": 0.64, "grad_norm": 0.640625, "learning_rate": 6.822623412897479e-05, "loss": 0.9565, "step": 44800 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.820249633279448e-05, "loss": 0.9199, "step": 44805 }, { "epoch": 0.64, "grad_norm": 0.5546875, "learning_rate": 6.817876052971788e-05, "loss": 1.0906, "step": 44810 }, { "epoch": 0.64, "grad_norm": 0.48046875, "learning_rate": 6.815502672123284e-05, "loss": 0.8355, "step": 44815 }, { "epoch": 0.64, "grad_norm": 0.578125, "learning_rate": 6.813129490882699e-05, "loss": 0.9748, "step": 44820 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 6.810756509398786e-05, "loss": 0.9507, "step": 44825 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 6.808383727820292e-05, "loss": 0.9011, "step": 44830 }, { "epoch": 0.64, "grad_norm": 0.57421875, "learning_rate": 6.806011146295937e-05, "loss": 1.0287, "step": 44835 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.80363876497444e-05, "loss": 1.0217, "step": 44840 }, { "epoch": 0.64, "grad_norm": 0.55078125, "learning_rate": 6.801266584004507e-05, "loss": 1.0007, "step": 44845 }, { "epoch": 0.64, "grad_norm": 0.6953125, "learning_rate": 6.798894603534827e-05, "loss": 0.9481, "step": 44850 }, { "epoch": 0.64, "grad_norm": 0.546875, "learning_rate": 6.796522823714079e-05, "loss": 1.0604, "step": 44855 }, { "epoch": 0.64, "grad_norm": 0.59765625, "learning_rate": 6.79415124469093e-05, "loss": 0.9995, "step": 44860 }, { "epoch": 0.64, "grad_norm": 0.53515625, "learning_rate": 6.791779866614028e-05, "loss": 0.951, "step": 44865 }, { "epoch": 0.64, "grad_norm": 0.6015625, "learning_rate": 6.789408689632021e-05, "loss": 0.898, "step": 44870 }, { "epoch": 0.64, "grad_norm": 0.51171875, "learning_rate": 6.787037713893536e-05, "loss": 1.0025, "step": 44875 }, { "epoch": 0.64, "grad_norm": 0.56640625, "learning_rate": 6.784666939547182e-05, "loss": 0.974, "step": 44880 }, { "epoch": 0.64, "grad_norm": 0.55859375, "learning_rate": 6.782296366741574e-05, "loss": 0.9054, "step": 44885 }, { "epoch": 0.64, "grad_norm": 0.5703125, "learning_rate": 6.779925995625287e-05, "loss": 1.0215, "step": 44890 }, { "epoch": 0.64, "grad_norm": 0.490234375, "learning_rate": 6.777555826346907e-05, "loss": 0.9463, "step": 44895 }, { "epoch": 0.64, "grad_norm": 0.6328125, "learning_rate": 6.775185859055e-05, "loss": 1.0455, "step": 44900 }, { "epoch": 0.64, "grad_norm": 0.51171875, "learning_rate": 6.772816093898114e-05, "loss": 0.8271, "step": 44905 }, { "epoch": 0.64, "grad_norm": 0.490234375, "learning_rate": 6.77044653102479e-05, "loss": 0.9981, "step": 44910 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 6.768077170583558e-05, "loss": 0.9118, "step": 44915 }, { "epoch": 0.64, "grad_norm": 0.58984375, "learning_rate": 6.765708012722927e-05, "loss": 1.0437, "step": 44920 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.763339057591404e-05, "loss": 0.8474, "step": 44925 }, { "epoch": 0.64, "grad_norm": 0.46875, "learning_rate": 6.760970305337475e-05, "loss": 0.8097, "step": 44930 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 6.758601756109617e-05, "loss": 0.8699, "step": 44935 }, { "epoch": 0.64, "grad_norm": 0.546875, "learning_rate": 6.756233410056292e-05, "loss": 0.9699, "step": 44940 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.753865267325949e-05, "loss": 0.9083, "step": 44945 }, { "epoch": 0.64, "grad_norm": 0.57421875, "learning_rate": 6.75149732806703e-05, "loss": 0.8572, "step": 44950 }, { "epoch": 0.64, "grad_norm": 0.5546875, "learning_rate": 6.749129592427958e-05, "loss": 1.0053, "step": 44955 }, { "epoch": 0.64, "grad_norm": 0.453125, "learning_rate": 6.746762060557143e-05, "loss": 0.7956, "step": 44960 }, { "epoch": 0.65, "grad_norm": 0.6171875, "learning_rate": 6.74439473260299e-05, "loss": 1.0633, "step": 44965 }, { "epoch": 0.65, "grad_norm": 0.57421875, "learning_rate": 6.742027608713883e-05, "loss": 0.8319, "step": 44970 }, { "epoch": 0.65, "grad_norm": 0.53125, "learning_rate": 6.739660689038193e-05, "loss": 0.9019, "step": 44975 }, { "epoch": 0.65, "grad_norm": 0.6484375, "learning_rate": 6.737293973724287e-05, "loss": 0.9892, "step": 44980 }, { "epoch": 0.65, "grad_norm": 0.55078125, "learning_rate": 6.73492746292051e-05, "loss": 0.9203, "step": 44985 }, { "epoch": 0.65, "grad_norm": 0.5390625, "learning_rate": 6.732561156775202e-05, "loss": 1.0347, "step": 44990 }, { "epoch": 0.65, "grad_norm": 0.6171875, "learning_rate": 6.730195055436677e-05, "loss": 0.9512, "step": 44995 }, { "epoch": 0.65, "grad_norm": 0.5859375, "learning_rate": 6.727829159053251e-05, "loss": 1.0785, "step": 45000 }, { "epoch": 0.65, "grad_norm": 0.54296875, "learning_rate": 6.725463467773221e-05, "loss": 0.9478, "step": 45005 }, { "epoch": 0.65, "grad_norm": 0.5859375, "learning_rate": 6.72309798174487e-05, "loss": 0.943, "step": 45010 }, { "epoch": 0.65, "grad_norm": 0.546875, "learning_rate": 6.720732701116468e-05, "loss": 0.9247, "step": 45015 }, { "epoch": 0.65, "grad_norm": 0.58203125, "learning_rate": 6.718367626036276e-05, "loss": 0.8705, "step": 45020 }, { "epoch": 0.65, "grad_norm": 0.478515625, "learning_rate": 6.71600275665254e-05, "loss": 0.9431, "step": 45025 }, { "epoch": 0.65, "grad_norm": 0.5078125, "learning_rate": 6.713638093113488e-05, "loss": 0.932, "step": 45030 }, { "epoch": 0.65, "grad_norm": 0.51953125, "learning_rate": 6.711273635567346e-05, "loss": 1.0349, "step": 45035 }, { "epoch": 0.65, "grad_norm": 0.578125, "learning_rate": 6.70890938416232e-05, "loss": 0.988, "step": 45040 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.7065453390466e-05, "loss": 0.9581, "step": 45045 }, { "epoch": 0.65, "grad_norm": 0.51953125, "learning_rate": 6.704181500368368e-05, "loss": 0.93, "step": 45050 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 6.701817868275792e-05, "loss": 1.0163, "step": 45055 }, { "epoch": 0.65, "grad_norm": 0.61328125, "learning_rate": 6.699454442917031e-05, "loss": 0.9731, "step": 45060 }, { "epoch": 0.65, "grad_norm": 0.5625, "learning_rate": 6.697091224440221e-05, "loss": 0.8179, "step": 45065 }, { "epoch": 0.65, "grad_norm": 0.5703125, "learning_rate": 6.6947282129935e-05, "loss": 0.9169, "step": 45070 }, { "epoch": 0.65, "grad_norm": 0.55078125, "learning_rate": 6.692365408724976e-05, "loss": 0.888, "step": 45075 }, { "epoch": 0.65, "grad_norm": 0.515625, "learning_rate": 6.690002811782754e-05, "loss": 0.9491, "step": 45080 }, { "epoch": 0.65, "grad_norm": 0.55859375, "learning_rate": 6.687640422314927e-05, "loss": 0.8988, "step": 45085 }, { "epoch": 0.65, "grad_norm": 0.5703125, "learning_rate": 6.685278240469572e-05, "loss": 1.0214, "step": 45090 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.682916266394753e-05, "loss": 1.192, "step": 45095 }, { "epoch": 0.65, "grad_norm": 0.5390625, "learning_rate": 6.680554500238519e-05, "loss": 0.9118, "step": 45100 }, { "epoch": 0.65, "grad_norm": 0.62890625, "learning_rate": 6.678192942148907e-05, "loss": 1.0664, "step": 45105 }, { "epoch": 0.65, "grad_norm": 0.640625, "learning_rate": 6.675831592273947e-05, "loss": 0.8504, "step": 45110 }, { "epoch": 0.65, "grad_norm": 0.546875, "learning_rate": 6.673470450761647e-05, "loss": 0.974, "step": 45115 }, { "epoch": 0.65, "grad_norm": 0.5390625, "learning_rate": 6.671109517760009e-05, "loss": 0.9236, "step": 45120 }, { "epoch": 0.65, "grad_norm": 0.6953125, "learning_rate": 6.668748793417017e-05, "loss": 0.9077, "step": 45125 }, { "epoch": 0.65, "grad_norm": 0.59375, "learning_rate": 6.666388277880646e-05, "loss": 0.9977, "step": 45130 }, { "epoch": 0.65, "grad_norm": 0.56640625, "learning_rate": 6.664027971298852e-05, "loss": 1.0262, "step": 45135 }, { "epoch": 0.65, "grad_norm": 0.6640625, "learning_rate": 6.661667873819586e-05, "loss": 1.0116, "step": 45140 }, { "epoch": 0.65, "grad_norm": 0.66015625, "learning_rate": 6.659307985590779e-05, "loss": 0.8108, "step": 45145 }, { "epoch": 0.65, "grad_norm": 0.53125, "learning_rate": 6.656948306760356e-05, "loss": 0.9178, "step": 45150 }, { "epoch": 0.65, "grad_norm": 0.5859375, "learning_rate": 6.654588837476216e-05, "loss": 0.8277, "step": 45155 }, { "epoch": 0.65, "grad_norm": 0.50390625, "learning_rate": 6.652229577886258e-05, "loss": 1.0075, "step": 45160 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.649870528138364e-05, "loss": 1.0151, "step": 45165 }, { "epoch": 0.65, "grad_norm": 0.52734375, "learning_rate": 6.647511688380402e-05, "loss": 0.8129, "step": 45170 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.64515305876022e-05, "loss": 1.0257, "step": 45175 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 6.642794639425671e-05, "loss": 0.8383, "step": 45180 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.640436430524576e-05, "loss": 0.9116, "step": 45185 }, { "epoch": 0.65, "grad_norm": 0.68359375, "learning_rate": 6.638078432204749e-05, "loss": 0.8931, "step": 45190 }, { "epoch": 0.65, "grad_norm": 0.53125, "learning_rate": 6.635720644613998e-05, "loss": 0.8874, "step": 45195 }, { "epoch": 0.65, "grad_norm": 0.546875, "learning_rate": 6.63336306790011e-05, "loss": 0.8541, "step": 45200 }, { "epoch": 0.65, "grad_norm": 0.50390625, "learning_rate": 6.631005702210857e-05, "loss": 0.9455, "step": 45205 }, { "epoch": 0.65, "grad_norm": 0.55859375, "learning_rate": 6.628648547694006e-05, "loss": 0.8394, "step": 45210 }, { "epoch": 0.65, "grad_norm": 0.5859375, "learning_rate": 6.626291604497299e-05, "loss": 1.0215, "step": 45215 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.623934872768478e-05, "loss": 0.9224, "step": 45220 }, { "epoch": 0.65, "grad_norm": 0.609375, "learning_rate": 6.621578352655267e-05, "loss": 0.9382, "step": 45225 }, { "epoch": 0.65, "grad_norm": 0.6328125, "learning_rate": 6.619222044305368e-05, "loss": 1.0595, "step": 45230 }, { "epoch": 0.65, "grad_norm": 0.55078125, "learning_rate": 6.616865947866484e-05, "loss": 0.8566, "step": 45235 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.614510063486296e-05, "loss": 0.8293, "step": 45240 }, { "epoch": 0.65, "grad_norm": 0.54296875, "learning_rate": 6.61215439131247e-05, "loss": 0.8726, "step": 45245 }, { "epoch": 0.65, "grad_norm": 0.578125, "learning_rate": 6.609798931492671e-05, "loss": 1.0158, "step": 45250 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.607443684174533e-05, "loss": 0.9727, "step": 45255 }, { "epoch": 0.65, "grad_norm": 0.54296875, "learning_rate": 6.605088649505689e-05, "loss": 0.9833, "step": 45260 }, { "epoch": 0.65, "grad_norm": 0.58203125, "learning_rate": 6.602733827633756e-05, "loss": 0.9667, "step": 45265 }, { "epoch": 0.65, "grad_norm": 0.61328125, "learning_rate": 6.600379218706331e-05, "loss": 1.0605, "step": 45270 }, { "epoch": 0.65, "grad_norm": 0.625, "learning_rate": 6.598024822871014e-05, "loss": 0.8976, "step": 45275 }, { "epoch": 0.65, "grad_norm": 0.578125, "learning_rate": 6.595670640275373e-05, "loss": 0.9378, "step": 45280 }, { "epoch": 0.65, "grad_norm": 0.64453125, "learning_rate": 6.593316671066972e-05, "loss": 0.9807, "step": 45285 }, { "epoch": 0.65, "grad_norm": 0.470703125, "learning_rate": 6.590962915393364e-05, "loss": 0.9207, "step": 45290 }, { "epoch": 0.65, "grad_norm": 0.625, "learning_rate": 6.588609373402084e-05, "loss": 1.0662, "step": 45295 }, { "epoch": 0.65, "grad_norm": 0.55078125, "learning_rate": 6.58625604524065e-05, "loss": 0.9258, "step": 45300 }, { "epoch": 0.65, "grad_norm": 0.5234375, "learning_rate": 6.583902931056582e-05, "loss": 0.9294, "step": 45305 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.581550030997363e-05, "loss": 0.9434, "step": 45310 }, { "epoch": 0.65, "grad_norm": 0.56640625, "learning_rate": 6.579197345210483e-05, "loss": 1.028, "step": 45315 }, { "epoch": 0.65, "grad_norm": 0.5390625, "learning_rate": 6.576844873843409e-05, "loss": 0.8776, "step": 45320 }, { "epoch": 0.65, "grad_norm": 0.58984375, "learning_rate": 6.574492617043596e-05, "loss": 1.091, "step": 45325 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.572140574958488e-05, "loss": 0.8153, "step": 45330 }, { "epoch": 0.65, "grad_norm": 0.71484375, "learning_rate": 6.569788747735515e-05, "loss": 1.0016, "step": 45335 }, { "epoch": 0.65, "grad_norm": 0.546875, "learning_rate": 6.567437135522085e-05, "loss": 0.9558, "step": 45340 }, { "epoch": 0.65, "grad_norm": 0.60546875, "learning_rate": 6.565085738465608e-05, "loss": 1.0025, "step": 45345 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 6.56273455671347e-05, "loss": 0.8308, "step": 45350 }, { "epoch": 0.65, "grad_norm": 0.578125, "learning_rate": 6.560383590413042e-05, "loss": 1.0665, "step": 45355 }, { "epoch": 0.65, "grad_norm": 0.6171875, "learning_rate": 6.558032839711693e-05, "loss": 0.9764, "step": 45360 }, { "epoch": 0.65, "grad_norm": 0.55078125, "learning_rate": 6.555682304756761e-05, "loss": 0.8749, "step": 45365 }, { "epoch": 0.65, "grad_norm": 0.54296875, "learning_rate": 6.553331985695586e-05, "loss": 1.0905, "step": 45370 }, { "epoch": 0.65, "grad_norm": 0.578125, "learning_rate": 6.550981882675487e-05, "loss": 1.0649, "step": 45375 }, { "epoch": 0.65, "grad_norm": 0.54296875, "learning_rate": 6.54863199584377e-05, "loss": 0.9374, "step": 45380 }, { "epoch": 0.65, "grad_norm": 0.56640625, "learning_rate": 6.546282325347733e-05, "loss": 1.1363, "step": 45385 }, { "epoch": 0.65, "grad_norm": 0.60546875, "learning_rate": 6.543932871334652e-05, "loss": 0.8631, "step": 45390 }, { "epoch": 0.65, "grad_norm": 0.54296875, "learning_rate": 6.541583633951795e-05, "loss": 0.7846, "step": 45395 }, { "epoch": 0.65, "grad_norm": 0.55078125, "learning_rate": 6.539234613346415e-05, "loss": 0.9328, "step": 45400 }, { "epoch": 0.65, "grad_norm": 0.58203125, "learning_rate": 6.536885809665752e-05, "loss": 1.0687, "step": 45405 }, { "epoch": 0.65, "grad_norm": 0.51953125, "learning_rate": 6.53453722305703e-05, "loss": 0.7834, "step": 45410 }, { "epoch": 0.65, "grad_norm": 0.546875, "learning_rate": 6.532188853667462e-05, "loss": 0.9781, "step": 45415 }, { "epoch": 0.65, "grad_norm": 0.4921875, "learning_rate": 6.529840701644245e-05, "loss": 0.9171, "step": 45420 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.527492767134566e-05, "loss": 0.9773, "step": 45425 }, { "epoch": 0.65, "grad_norm": 0.48046875, "learning_rate": 6.525145050285594e-05, "loss": 0.8882, "step": 45430 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 6.522797551244487e-05, "loss": 0.944, "step": 45435 }, { "epoch": 0.65, "grad_norm": 0.53125, "learning_rate": 6.520450270158391e-05, "loss": 0.9338, "step": 45440 }, { "epoch": 0.65, "grad_norm": 0.60546875, "learning_rate": 6.518103207174436e-05, "loss": 0.9582, "step": 45445 }, { "epoch": 0.65, "grad_norm": 0.55078125, "learning_rate": 6.515756362439736e-05, "loss": 0.9155, "step": 45450 }, { "epoch": 0.65, "grad_norm": 0.53125, "learning_rate": 6.513409736101396e-05, "loss": 0.9894, "step": 45455 }, { "epoch": 0.65, "grad_norm": 0.6171875, "learning_rate": 6.511063328306504e-05, "loss": 1.0464, "step": 45460 }, { "epoch": 0.65, "grad_norm": 0.56640625, "learning_rate": 6.508717139202139e-05, "loss": 0.9744, "step": 45465 }, { "epoch": 0.65, "grad_norm": 0.6015625, "learning_rate": 6.506371168935359e-05, "loss": 0.996, "step": 45470 }, { "epoch": 0.65, "grad_norm": 0.5234375, "learning_rate": 6.50402541765321e-05, "loss": 0.8536, "step": 45475 }, { "epoch": 0.65, "grad_norm": 0.63671875, "learning_rate": 6.501679885502731e-05, "loss": 1.0797, "step": 45480 }, { "epoch": 0.65, "grad_norm": 0.62890625, "learning_rate": 6.499334572630942e-05, "loss": 0.9744, "step": 45485 }, { "epoch": 0.65, "grad_norm": 0.5390625, "learning_rate": 6.496989479184847e-05, "loss": 0.921, "step": 45490 }, { "epoch": 0.65, "grad_norm": 0.6171875, "learning_rate": 6.494644605311442e-05, "loss": 1.087, "step": 45495 }, { "epoch": 0.65, "grad_norm": 0.5625, "learning_rate": 6.492299951157706e-05, "loss": 1.0464, "step": 45500 }, { "epoch": 0.65, "grad_norm": 0.61328125, "learning_rate": 6.489955516870601e-05, "loss": 0.8365, "step": 45505 }, { "epoch": 0.65, "grad_norm": 0.609375, "learning_rate": 6.487611302597085e-05, "loss": 0.9386, "step": 45510 }, { "epoch": 0.65, "grad_norm": 0.55859375, "learning_rate": 6.485267308484095e-05, "loss": 0.9561, "step": 45515 }, { "epoch": 0.65, "grad_norm": 0.5703125, "learning_rate": 6.482923534678552e-05, "loss": 0.8684, "step": 45520 }, { "epoch": 0.65, "grad_norm": 0.6171875, "learning_rate": 6.480579981327365e-05, "loss": 0.9156, "step": 45525 }, { "epoch": 0.65, "grad_norm": 0.5859375, "learning_rate": 6.478236648577431e-05, "loss": 0.9276, "step": 45530 }, { "epoch": 0.65, "grad_norm": 0.625, "learning_rate": 6.475893536575639e-05, "loss": 0.8871, "step": 45535 }, { "epoch": 0.65, "grad_norm": 0.546875, "learning_rate": 6.473550645468853e-05, "loss": 0.9564, "step": 45540 }, { "epoch": 0.65, "grad_norm": 0.5078125, "learning_rate": 6.471207975403926e-05, "loss": 0.7958, "step": 45545 }, { "epoch": 0.65, "grad_norm": 0.486328125, "learning_rate": 6.468865526527704e-05, "loss": 0.8408, "step": 45550 }, { "epoch": 0.65, "grad_norm": 0.5078125, "learning_rate": 6.466523298987013e-05, "loss": 0.8427, "step": 45555 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.464181292928664e-05, "loss": 1.0888, "step": 45560 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.461839508499461e-05, "loss": 0.9834, "step": 45565 }, { "epoch": 0.65, "grad_norm": 0.6953125, "learning_rate": 6.459497945846189e-05, "loss": 1.12, "step": 45570 }, { "epoch": 0.65, "grad_norm": 0.640625, "learning_rate": 6.457156605115615e-05, "loss": 1.0514, "step": 45575 }, { "epoch": 0.65, "grad_norm": 0.57421875, "learning_rate": 6.454815486454501e-05, "loss": 0.982, "step": 45580 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 6.452474590009587e-05, "loss": 0.9026, "step": 45585 }, { "epoch": 0.65, "grad_norm": 0.578125, "learning_rate": 6.450133915927609e-05, "loss": 0.9451, "step": 45590 }, { "epoch": 0.65, "grad_norm": 0.490234375, "learning_rate": 6.447793464355279e-05, "loss": 0.986, "step": 45595 }, { "epoch": 0.65, "grad_norm": 0.58203125, "learning_rate": 6.445453235439299e-05, "loss": 0.9617, "step": 45600 }, { "epoch": 0.65, "grad_norm": 0.6015625, "learning_rate": 6.44311322932636e-05, "loss": 0.7838, "step": 45605 }, { "epoch": 0.65, "grad_norm": 0.6953125, "learning_rate": 6.440773446163135e-05, "loss": 1.0951, "step": 45610 }, { "epoch": 0.65, "grad_norm": 0.56640625, "learning_rate": 6.438433886096283e-05, "loss": 0.9337, "step": 45615 }, { "epoch": 0.65, "grad_norm": 0.61328125, "learning_rate": 6.436094549272457e-05, "loss": 1.0079, "step": 45620 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 6.433755435838277e-05, "loss": 0.937, "step": 45625 }, { "epoch": 0.65, "grad_norm": 0.65234375, "learning_rate": 6.43141654594037e-05, "loss": 0.85, "step": 45630 }, { "epoch": 0.65, "grad_norm": 0.5859375, "learning_rate": 6.429077879725338e-05, "loss": 1.2354, "step": 45635 }, { "epoch": 0.65, "grad_norm": 0.5625, "learning_rate": 6.42673943733977e-05, "loss": 1.0328, "step": 45640 }, { "epoch": 0.65, "grad_norm": 0.4765625, "learning_rate": 6.424401218930245e-05, "loss": 0.9628, "step": 45645 }, { "epoch": 0.65, "grad_norm": 0.625, "learning_rate": 6.422063224643325e-05, "loss": 1.0466, "step": 45650 }, { "epoch": 0.65, "grad_norm": 0.58984375, "learning_rate": 6.419725454625554e-05, "loss": 1.0123, "step": 45655 }, { "epoch": 0.65, "grad_norm": 0.515625, "learning_rate": 6.417387909023471e-05, "loss": 0.8303, "step": 45660 }, { "epoch": 0.66, "grad_norm": 0.4765625, "learning_rate": 6.415050587983593e-05, "loss": 0.9342, "step": 45665 }, { "epoch": 0.66, "grad_norm": 0.5078125, "learning_rate": 6.412713491652427e-05, "loss": 0.8515, "step": 45670 }, { "epoch": 0.66, "grad_norm": 0.5703125, "learning_rate": 6.410376620176468e-05, "loss": 0.9153, "step": 45675 }, { "epoch": 0.66, "grad_norm": 0.62890625, "learning_rate": 6.40803997370219e-05, "loss": 0.895, "step": 45680 }, { "epoch": 0.66, "grad_norm": 0.55078125, "learning_rate": 6.405703552376057e-05, "loss": 0.9686, "step": 45685 }, { "epoch": 0.66, "grad_norm": 0.486328125, "learning_rate": 6.403367356344517e-05, "loss": 0.9334, "step": 45690 }, { "epoch": 0.66, "grad_norm": 0.5703125, "learning_rate": 6.401031385754006e-05, "loss": 0.9136, "step": 45695 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.39869564075095e-05, "loss": 1.0172, "step": 45700 }, { "epoch": 0.66, "grad_norm": 0.61328125, "learning_rate": 6.396360121481752e-05, "loss": 1.1214, "step": 45705 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.394024828092804e-05, "loss": 0.891, "step": 45710 }, { "epoch": 0.66, "grad_norm": 0.486328125, "learning_rate": 6.391689760730488e-05, "loss": 0.8239, "step": 45715 }, { "epoch": 0.66, "grad_norm": 0.578125, "learning_rate": 6.389354919541169e-05, "loss": 0.9533, "step": 45720 }, { "epoch": 0.66, "grad_norm": 0.55078125, "learning_rate": 6.387020304671197e-05, "loss": 0.9281, "step": 45725 }, { "epoch": 0.66, "grad_norm": 0.671875, "learning_rate": 6.384685916266901e-05, "loss": 0.9008, "step": 45730 }, { "epoch": 0.66, "grad_norm": 0.6484375, "learning_rate": 6.382351754474614e-05, "loss": 0.8932, "step": 45735 }, { "epoch": 0.66, "grad_norm": 0.51171875, "learning_rate": 6.380017819440638e-05, "loss": 0.8201, "step": 45740 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.377684111311267e-05, "loss": 0.9648, "step": 45745 }, { "epoch": 0.66, "grad_norm": 0.5234375, "learning_rate": 6.375350630232782e-05, "loss": 0.916, "step": 45750 }, { "epoch": 0.66, "grad_norm": 0.482421875, "learning_rate": 6.373017376351447e-05, "loss": 0.9437, "step": 45755 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.370684349813515e-05, "loss": 0.894, "step": 45760 }, { "epoch": 0.66, "grad_norm": 0.5390625, "learning_rate": 6.36835155076522e-05, "loss": 1.1337, "step": 45765 }, { "epoch": 0.66, "grad_norm": 0.5703125, "learning_rate": 6.366018979352786e-05, "loss": 1.0193, "step": 45770 }, { "epoch": 0.66, "grad_norm": 0.6328125, "learning_rate": 6.363686635722421e-05, "loss": 1.1689, "step": 45775 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.361354520020324e-05, "loss": 1.0801, "step": 45780 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.359022632392663e-05, "loss": 0.8107, "step": 45785 }, { "epoch": 0.66, "grad_norm": 0.51953125, "learning_rate": 6.356690972985612e-05, "loss": 0.9673, "step": 45790 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.354359541945323e-05, "loss": 0.963, "step": 45795 }, { "epoch": 0.66, "grad_norm": 0.671875, "learning_rate": 6.352028339417926e-05, "loss": 0.7841, "step": 45800 }, { "epoch": 0.66, "grad_norm": 0.64453125, "learning_rate": 6.349697365549549e-05, "loss": 1.0819, "step": 45805 }, { "epoch": 0.66, "grad_norm": 0.59765625, "learning_rate": 6.3473666204863e-05, "loss": 1.0213, "step": 45810 }, { "epoch": 0.66, "grad_norm": 0.60546875, "learning_rate": 6.345036104374267e-05, "loss": 0.9277, "step": 45815 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.342705817359538e-05, "loss": 1.005, "step": 45820 }, { "epoch": 0.66, "grad_norm": 0.578125, "learning_rate": 6.340375759588173e-05, "loss": 1.0071, "step": 45825 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.338045931206224e-05, "loss": 0.9861, "step": 45830 }, { "epoch": 0.66, "grad_norm": 0.5390625, "learning_rate": 6.335716332359725e-05, "loss": 0.9195, "step": 45835 }, { "epoch": 0.66, "grad_norm": 0.58203125, "learning_rate": 6.333386963194699e-05, "loss": 1.1381, "step": 45840 }, { "epoch": 0.66, "grad_norm": 0.58203125, "learning_rate": 6.331057823857156e-05, "loss": 0.8934, "step": 45845 }, { "epoch": 0.66, "grad_norm": 0.5390625, "learning_rate": 6.328728914493085e-05, "loss": 0.8748, "step": 45850 }, { "epoch": 0.66, "grad_norm": 0.51953125, "learning_rate": 6.326400235248466e-05, "loss": 0.8566, "step": 45855 }, { "epoch": 0.66, "grad_norm": 0.50390625, "learning_rate": 6.324071786269268e-05, "loss": 0.8186, "step": 45860 }, { "epoch": 0.66, "grad_norm": 0.57421875, "learning_rate": 6.321743567701435e-05, "loss": 1.1196, "step": 45865 }, { "epoch": 0.66, "grad_norm": 0.71875, "learning_rate": 6.319415579690902e-05, "loss": 0.9681, "step": 45870 }, { "epoch": 0.66, "grad_norm": 0.51953125, "learning_rate": 6.317087822383596e-05, "loss": 0.8658, "step": 45875 }, { "epoch": 0.66, "grad_norm": 0.5234375, "learning_rate": 6.314760295925418e-05, "loss": 1.0102, "step": 45880 }, { "epoch": 0.66, "grad_norm": 0.5390625, "learning_rate": 6.312433000462266e-05, "loss": 1.0275, "step": 45885 }, { "epoch": 0.66, "grad_norm": 0.56640625, "learning_rate": 6.310105936140009e-05, "loss": 1.1024, "step": 45890 }, { "epoch": 0.66, "grad_norm": 0.546875, "learning_rate": 6.307779103104513e-05, "loss": 0.876, "step": 45895 }, { "epoch": 0.66, "grad_norm": 0.498046875, "learning_rate": 6.30545250150163e-05, "loss": 0.8698, "step": 45900 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.303126131477193e-05, "loss": 0.8891, "step": 45905 }, { "epoch": 0.66, "grad_norm": 0.65234375, "learning_rate": 6.300799993177017e-05, "loss": 0.8649, "step": 45910 }, { "epoch": 0.66, "grad_norm": 0.65234375, "learning_rate": 6.298474086746913e-05, "loss": 0.9766, "step": 45915 }, { "epoch": 0.66, "grad_norm": 0.53125, "learning_rate": 6.29614841233267e-05, "loss": 0.9994, "step": 45920 }, { "epoch": 0.66, "grad_norm": 0.58203125, "learning_rate": 6.293822970080059e-05, "loss": 0.9491, "step": 45925 }, { "epoch": 0.66, "grad_norm": 0.59375, "learning_rate": 6.291497760134848e-05, "loss": 0.8919, "step": 45930 }, { "epoch": 0.66, "grad_norm": 0.49609375, "learning_rate": 6.289172782642782e-05, "loss": 1.018, "step": 45935 }, { "epoch": 0.66, "grad_norm": 0.5078125, "learning_rate": 6.286848037749593e-05, "loss": 0.9458, "step": 45940 }, { "epoch": 0.66, "grad_norm": 0.51953125, "learning_rate": 6.284523525600996e-05, "loss": 0.8374, "step": 45945 }, { "epoch": 0.66, "grad_norm": 0.58203125, "learning_rate": 6.282199246342694e-05, "loss": 0.9878, "step": 45950 }, { "epoch": 0.66, "grad_norm": 0.578125, "learning_rate": 6.27987520012038e-05, "loss": 0.874, "step": 45955 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.277551387079725e-05, "loss": 0.9338, "step": 45960 }, { "epoch": 0.66, "grad_norm": 0.55078125, "learning_rate": 6.275227807366387e-05, "loss": 0.95, "step": 45965 }, { "epoch": 0.66, "grad_norm": 0.625, "learning_rate": 6.272904461126012e-05, "loss": 0.9569, "step": 45970 }, { "epoch": 0.66, "grad_norm": 0.55859375, "learning_rate": 6.270581348504233e-05, "loss": 0.9139, "step": 45975 }, { "epoch": 0.66, "grad_norm": 0.494140625, "learning_rate": 6.26825846964666e-05, "loss": 1.0085, "step": 45980 }, { "epoch": 0.66, "grad_norm": 0.5625, "learning_rate": 6.265935824698897e-05, "loss": 1.03, "step": 45985 }, { "epoch": 0.66, "grad_norm": 0.59375, "learning_rate": 6.263613413806532e-05, "loss": 0.9165, "step": 45990 }, { "epoch": 0.66, "grad_norm": 0.609375, "learning_rate": 6.261291237115132e-05, "loss": 1.1229, "step": 45995 }, { "epoch": 0.66, "grad_norm": 0.54296875, "learning_rate": 6.258969294770255e-05, "loss": 0.9052, "step": 46000 }, { "epoch": 0.66, "grad_norm": 0.61328125, "learning_rate": 6.256647586917441e-05, "loss": 0.9974, "step": 46005 }, { "epoch": 0.66, "grad_norm": 0.50390625, "learning_rate": 6.254326113702222e-05, "loss": 0.8159, "step": 46010 }, { "epoch": 0.66, "grad_norm": 0.5625, "learning_rate": 6.252004875270107e-05, "loss": 0.9784, "step": 46015 }, { "epoch": 0.66, "grad_norm": 0.50390625, "learning_rate": 6.249683871766594e-05, "loss": 0.8097, "step": 46020 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.24736310333717e-05, "loss": 1.0112, "step": 46025 }, { "epoch": 0.66, "grad_norm": 0.5859375, "learning_rate": 6.245042570127299e-05, "loss": 0.9121, "step": 46030 }, { "epoch": 0.66, "grad_norm": 0.51953125, "learning_rate": 6.242722272282436e-05, "loss": 0.9148, "step": 46035 }, { "epoch": 0.66, "grad_norm": 0.60546875, "learning_rate": 6.240402209948024e-05, "loss": 1.0444, "step": 46040 }, { "epoch": 0.66, "grad_norm": 0.5078125, "learning_rate": 6.23808238326948e-05, "loss": 0.8895, "step": 46045 }, { "epoch": 0.66, "grad_norm": 0.5390625, "learning_rate": 6.235762792392215e-05, "loss": 0.839, "step": 46050 }, { "epoch": 0.66, "grad_norm": 0.5625, "learning_rate": 6.233443437461628e-05, "loss": 1.0243, "step": 46055 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.231124318623094e-05, "loss": 0.81, "step": 46060 }, { "epoch": 0.66, "grad_norm": 0.6484375, "learning_rate": 6.22880543602198e-05, "loss": 1.1031, "step": 46065 }, { "epoch": 0.66, "grad_norm": 0.62890625, "learning_rate": 6.226486789803638e-05, "loss": 0.9281, "step": 46070 }, { "epoch": 0.66, "grad_norm": 0.54296875, "learning_rate": 6.2241683801134e-05, "loss": 1.0277, "step": 46075 }, { "epoch": 0.66, "grad_norm": 0.51171875, "learning_rate": 6.221850207096589e-05, "loss": 0.9444, "step": 46080 }, { "epoch": 0.66, "grad_norm": 0.5703125, "learning_rate": 6.219532270898511e-05, "loss": 0.9583, "step": 46085 }, { "epoch": 0.66, "grad_norm": 0.439453125, "learning_rate": 6.217214571664453e-05, "loss": 0.7989, "step": 46090 }, { "epoch": 0.66, "grad_norm": 0.65234375, "learning_rate": 6.214897109539701e-05, "loss": 1.0164, "step": 46095 }, { "epoch": 0.66, "grad_norm": 0.58984375, "learning_rate": 6.212579884669503e-05, "loss": 0.9877, "step": 46100 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.210262897199113e-05, "loss": 0.8748, "step": 46105 }, { "epoch": 0.66, "grad_norm": 0.5859375, "learning_rate": 6.20794614727376e-05, "loss": 1.0434, "step": 46110 }, { "epoch": 0.66, "grad_norm": 0.66796875, "learning_rate": 6.20562963503866e-05, "loss": 1.0184, "step": 46115 }, { "epoch": 0.66, "grad_norm": 0.65234375, "learning_rate": 6.203313360639019e-05, "loss": 0.9778, "step": 46120 }, { "epoch": 0.66, "grad_norm": 0.62890625, "learning_rate": 6.200997324220018e-05, "loss": 1.0332, "step": 46125 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.198681525926831e-05, "loss": 0.9329, "step": 46130 }, { "epoch": 0.66, "grad_norm": 0.515625, "learning_rate": 6.196365965904617e-05, "loss": 0.8952, "step": 46135 }, { "epoch": 0.66, "grad_norm": 0.625, "learning_rate": 6.194050644298517e-05, "loss": 0.8317, "step": 46140 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.191735561253658e-05, "loss": 1.0471, "step": 46145 }, { "epoch": 0.66, "grad_norm": 0.51171875, "learning_rate": 6.189420716915149e-05, "loss": 1.0214, "step": 46150 }, { "epoch": 0.66, "grad_norm": 0.56640625, "learning_rate": 6.187106111428089e-05, "loss": 1.022, "step": 46155 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.184791744937562e-05, "loss": 0.9737, "step": 46160 }, { "epoch": 0.66, "grad_norm": 0.765625, "learning_rate": 6.182477617588634e-05, "loss": 0.9934, "step": 46165 }, { "epoch": 0.66, "grad_norm": 0.58984375, "learning_rate": 6.180163729526353e-05, "loss": 0.9681, "step": 46170 }, { "epoch": 0.66, "grad_norm": 0.578125, "learning_rate": 6.177850080895764e-05, "loss": 0.9406, "step": 46175 }, { "epoch": 0.66, "grad_norm": 0.58984375, "learning_rate": 6.175536671841882e-05, "loss": 0.8751, "step": 46180 }, { "epoch": 0.66, "grad_norm": 0.498046875, "learning_rate": 6.173223502509717e-05, "loss": 0.8101, "step": 46185 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.170910573044265e-05, "loss": 0.8607, "step": 46190 }, { "epoch": 0.66, "grad_norm": 0.578125, "learning_rate": 6.168597883590495e-05, "loss": 0.954, "step": 46195 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.166285434293378e-05, "loss": 0.9483, "step": 46200 }, { "epoch": 0.66, "grad_norm": 0.494140625, "learning_rate": 6.163973225297855e-05, "loss": 1.0113, "step": 46205 }, { "epoch": 0.66, "grad_norm": 0.486328125, "learning_rate": 6.161661256748856e-05, "loss": 0.9647, "step": 46210 }, { "epoch": 0.66, "grad_norm": 0.53125, "learning_rate": 6.159349528791302e-05, "loss": 1.0275, "step": 46215 }, { "epoch": 0.66, "grad_norm": 0.5390625, "learning_rate": 6.157038041570094e-05, "loss": 0.9002, "step": 46220 }, { "epoch": 0.66, "grad_norm": 0.60546875, "learning_rate": 6.154726795230117e-05, "loss": 1.0381, "step": 46225 }, { "epoch": 0.66, "grad_norm": 0.57421875, "learning_rate": 6.152415789916246e-05, "loss": 0.9387, "step": 46230 }, { "epoch": 0.66, "grad_norm": 0.5859375, "learning_rate": 6.150105025773335e-05, "loss": 0.9686, "step": 46235 }, { "epoch": 0.66, "grad_norm": 0.60546875, "learning_rate": 6.147794502946223e-05, "loss": 1.0323, "step": 46240 }, { "epoch": 0.66, "grad_norm": 0.60546875, "learning_rate": 6.145484221579743e-05, "loss": 0.8602, "step": 46245 }, { "epoch": 0.66, "grad_norm": 0.5625, "learning_rate": 6.143174181818701e-05, "loss": 0.866, "step": 46250 }, { "epoch": 0.66, "grad_norm": 0.61328125, "learning_rate": 6.140864383807894e-05, "loss": 1.0404, "step": 46255 }, { "epoch": 0.66, "grad_norm": 0.5703125, "learning_rate": 6.138554827692103e-05, "loss": 1.0325, "step": 46260 }, { "epoch": 0.66, "grad_norm": 0.65234375, "learning_rate": 6.13624551361609e-05, "loss": 1.0028, "step": 46265 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.13393644172461e-05, "loss": 0.7928, "step": 46270 }, { "epoch": 0.66, "grad_norm": 0.5234375, "learning_rate": 6.131627612162397e-05, "loss": 1.0011, "step": 46275 }, { "epoch": 0.66, "grad_norm": 0.51171875, "learning_rate": 6.12931902507417e-05, "loss": 0.9153, "step": 46280 }, { "epoch": 0.66, "grad_norm": 0.56640625, "learning_rate": 6.127010680604636e-05, "loss": 0.9783, "step": 46285 }, { "epoch": 0.66, "grad_norm": 0.59765625, "learning_rate": 6.124702578898484e-05, "loss": 1.0021, "step": 46290 }, { "epoch": 0.66, "grad_norm": 0.65234375, "learning_rate": 6.122394720100386e-05, "loss": 0.9419, "step": 46295 }, { "epoch": 0.66, "grad_norm": 0.6796875, "learning_rate": 6.120087104355006e-05, "loss": 0.9133, "step": 46300 }, { "epoch": 0.66, "grad_norm": 0.482421875, "learning_rate": 6.117779731806986e-05, "loss": 0.9986, "step": 46305 }, { "epoch": 0.66, "grad_norm": 0.5859375, "learning_rate": 6.115472602600951e-05, "loss": 0.9771, "step": 46310 }, { "epoch": 0.66, "grad_norm": 0.67578125, "learning_rate": 6.11316571688152e-05, "loss": 1.0309, "step": 46315 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.110859074793284e-05, "loss": 0.9542, "step": 46320 }, { "epoch": 0.66, "grad_norm": 0.5078125, "learning_rate": 6.108552676480834e-05, "loss": 0.8965, "step": 46325 }, { "epoch": 0.66, "grad_norm": 0.578125, "learning_rate": 6.106246522088735e-05, "loss": 0.9091, "step": 46330 }, { "epoch": 0.66, "grad_norm": 0.5859375, "learning_rate": 6.103940611761535e-05, "loss": 0.9457, "step": 46335 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.101634945643777e-05, "loss": 1.0486, "step": 46340 }, { "epoch": 0.66, "grad_norm": 0.578125, "learning_rate": 6.099329523879981e-05, "loss": 1.0533, "step": 46345 }, { "epoch": 0.66, "grad_norm": 0.53125, "learning_rate": 6.09702434661465e-05, "loss": 1.1038, "step": 46350 }, { "epoch": 0.66, "grad_norm": 0.55859375, "learning_rate": 6.0947194139922824e-05, "loss": 0.8647, "step": 46355 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 6.092414726157348e-05, "loss": 0.9562, "step": 46360 }, { "epoch": 0.67, "grad_norm": 0.478515625, "learning_rate": 6.090110283254309e-05, "loss": 0.9561, "step": 46365 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 6.087806085427611e-05, "loss": 0.8962, "step": 46370 }, { "epoch": 0.67, "grad_norm": 0.486328125, "learning_rate": 6.08550213282168e-05, "loss": 0.8846, "step": 46375 }, { "epoch": 0.67, "grad_norm": 0.5859375, "learning_rate": 6.083198425580936e-05, "loss": 0.8637, "step": 46380 }, { "epoch": 0.67, "grad_norm": 0.52734375, "learning_rate": 6.080894963849776e-05, "loss": 0.907, "step": 46385 }, { "epoch": 0.67, "grad_norm": 0.70703125, "learning_rate": 6.0785917477725806e-05, "loss": 1.0066, "step": 46390 }, { "epoch": 0.67, "grad_norm": 0.57421875, "learning_rate": 6.076288777493723e-05, "loss": 0.925, "step": 46395 }, { "epoch": 0.67, "grad_norm": 0.48046875, "learning_rate": 6.073986053157553e-05, "loss": 0.9988, "step": 46400 }, { "epoch": 0.67, "grad_norm": 0.59375, "learning_rate": 6.071683574908407e-05, "loss": 1.0245, "step": 46405 }, { "epoch": 0.67, "grad_norm": 0.5390625, "learning_rate": 6.0693813428906124e-05, "loss": 1.0269, "step": 46410 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 6.0670793572484696e-05, "loss": 0.8969, "step": 46415 }, { "epoch": 0.67, "grad_norm": 0.58984375, "learning_rate": 6.064777618126272e-05, "loss": 0.8421, "step": 46420 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 6.062476125668293e-05, "loss": 1.0216, "step": 46425 }, { "epoch": 0.67, "grad_norm": 0.58984375, "learning_rate": 6.060174880018798e-05, "loss": 1.041, "step": 46430 }, { "epoch": 0.67, "grad_norm": 0.478515625, "learning_rate": 6.0578738813220273e-05, "loss": 0.9485, "step": 46435 }, { "epoch": 0.67, "grad_norm": 0.59375, "learning_rate": 6.055573129722212e-05, "loss": 1.0267, "step": 46440 }, { "epoch": 0.67, "grad_norm": 0.5625, "learning_rate": 6.053272625363562e-05, "loss": 0.98, "step": 46445 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 6.050972368390282e-05, "loss": 0.994, "step": 46450 }, { "epoch": 0.67, "grad_norm": 0.62109375, "learning_rate": 6.048672358946552e-05, "loss": 1.0051, "step": 46455 }, { "epoch": 0.67, "grad_norm": 0.59765625, "learning_rate": 6.0463725971765396e-05, "loss": 1.0058, "step": 46460 }, { "epoch": 0.67, "grad_norm": 0.5703125, "learning_rate": 6.044073083224393e-05, "loss": 0.9911, "step": 46465 }, { "epoch": 0.67, "grad_norm": 0.5078125, "learning_rate": 6.041773817234251e-05, "loss": 0.908, "step": 46470 }, { "epoch": 0.67, "grad_norm": 0.60546875, "learning_rate": 6.0394747993502354e-05, "loss": 1.0566, "step": 46475 }, { "epoch": 0.67, "grad_norm": 0.515625, "learning_rate": 6.037176029716448e-05, "loss": 0.9758, "step": 46480 }, { "epoch": 0.67, "grad_norm": 0.62890625, "learning_rate": 6.034877508476982e-05, "loss": 1.0319, "step": 46485 }, { "epoch": 0.67, "grad_norm": 0.64453125, "learning_rate": 6.0325792357759106e-05, "loss": 0.855, "step": 46490 }, { "epoch": 0.67, "grad_norm": 0.5859375, "learning_rate": 6.0302812117572883e-05, "loss": 0.963, "step": 46495 }, { "epoch": 0.67, "grad_norm": 0.5234375, "learning_rate": 6.027983436565163e-05, "loss": 0.9689, "step": 46500 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 6.0256859103435595e-05, "loss": 1.0157, "step": 46505 }, { "epoch": 0.67, "grad_norm": 0.59375, "learning_rate": 6.0233886332364894e-05, "loss": 1.0156, "step": 46510 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 6.0210916053879515e-05, "loss": 1.0249, "step": 46515 }, { "epoch": 0.67, "grad_norm": 0.5078125, "learning_rate": 6.018794826941918e-05, "loss": 0.9484, "step": 46520 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 6.0164982980423636e-05, "loss": 0.9495, "step": 46525 }, { "epoch": 0.67, "grad_norm": 0.609375, "learning_rate": 6.01420201883323e-05, "loss": 1.0027, "step": 46530 }, { "epoch": 0.67, "grad_norm": 0.60546875, "learning_rate": 6.011905989458453e-05, "loss": 0.967, "step": 46535 }, { "epoch": 0.67, "grad_norm": 0.6640625, "learning_rate": 6.009610210061951e-05, "loss": 1.1281, "step": 46540 }, { "epoch": 0.67, "grad_norm": 0.5, "learning_rate": 6.0073146807876266e-05, "loss": 0.8276, "step": 46545 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 6.0050194017793635e-05, "loss": 1.025, "step": 46550 }, { "epoch": 0.67, "grad_norm": 0.5859375, "learning_rate": 6.0027243731810355e-05, "loss": 0.9618, "step": 46555 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 6.000429595136497e-05, "loss": 1.0105, "step": 46560 }, { "epoch": 0.67, "grad_norm": 0.5, "learning_rate": 5.998135067789589e-05, "loss": 0.8233, "step": 46565 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 5.99584079128413e-05, "loss": 0.9368, "step": 46570 }, { "epoch": 0.67, "grad_norm": 0.58203125, "learning_rate": 5.9935467657639286e-05, "loss": 0.9314, "step": 46575 }, { "epoch": 0.67, "grad_norm": 0.60546875, "learning_rate": 5.99125299137278e-05, "loss": 0.921, "step": 46580 }, { "epoch": 0.67, "grad_norm": 0.61328125, "learning_rate": 5.9889594682544604e-05, "loss": 0.9308, "step": 46585 }, { "epoch": 0.67, "grad_norm": 0.640625, "learning_rate": 5.986666196552728e-05, "loss": 1.0665, "step": 46590 }, { "epoch": 0.67, "grad_norm": 0.5625, "learning_rate": 5.984373176411331e-05, "loss": 0.8952, "step": 46595 }, { "epoch": 0.67, "grad_norm": 0.609375, "learning_rate": 5.982080407973996e-05, "loss": 0.8521, "step": 46600 }, { "epoch": 0.67, "grad_norm": 0.55859375, "learning_rate": 5.9797878913844364e-05, "loss": 1.0417, "step": 46605 }, { "epoch": 0.67, "grad_norm": 0.6171875, "learning_rate": 5.977495626786351e-05, "loss": 1.0472, "step": 46610 }, { "epoch": 0.67, "grad_norm": 0.423828125, "learning_rate": 5.975203614323421e-05, "loss": 0.9462, "step": 46615 }, { "epoch": 0.67, "grad_norm": 0.62109375, "learning_rate": 5.9729118541393156e-05, "loss": 1.0091, "step": 46620 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 5.970620346377678e-05, "loss": 1.0154, "step": 46625 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 5.968329091182145e-05, "loss": 0.9673, "step": 46630 }, { "epoch": 0.67, "grad_norm": 0.6484375, "learning_rate": 5.966038088696338e-05, "loss": 1.0245, "step": 46635 }, { "epoch": 0.67, "grad_norm": 0.55078125, "learning_rate": 5.963747339063859e-05, "loss": 0.925, "step": 46640 }, { "epoch": 0.67, "grad_norm": 0.50390625, "learning_rate": 5.96145684242829e-05, "loss": 0.8854, "step": 46645 }, { "epoch": 0.67, "grad_norm": 0.578125, "learning_rate": 5.959166598933209e-05, "loss": 0.9141, "step": 46650 }, { "epoch": 0.67, "grad_norm": 0.55859375, "learning_rate": 5.956876608722167e-05, "loss": 0.8664, "step": 46655 }, { "epoch": 0.67, "grad_norm": 0.59765625, "learning_rate": 5.954586871938702e-05, "loss": 0.9962, "step": 46660 }, { "epoch": 0.67, "grad_norm": 0.494140625, "learning_rate": 5.952297388726342e-05, "loss": 0.9225, "step": 46665 }, { "epoch": 0.67, "grad_norm": 0.60546875, "learning_rate": 5.950008159228593e-05, "loss": 0.8407, "step": 46670 }, { "epoch": 0.67, "grad_norm": 0.5703125, "learning_rate": 5.9477191835889425e-05, "loss": 1.0356, "step": 46675 }, { "epoch": 0.67, "grad_norm": 0.5625, "learning_rate": 5.945430461950869e-05, "loss": 1.0125, "step": 46680 }, { "epoch": 0.67, "grad_norm": 0.64453125, "learning_rate": 5.9431419944578305e-05, "loss": 1.1157, "step": 46685 }, { "epoch": 0.67, "grad_norm": 0.56640625, "learning_rate": 5.940853781253274e-05, "loss": 1.0508, "step": 46690 }, { "epoch": 0.67, "grad_norm": 0.71484375, "learning_rate": 5.938565822480625e-05, "loss": 1.0621, "step": 46695 }, { "epoch": 0.67, "grad_norm": 0.61328125, "learning_rate": 5.936278118283294e-05, "loss": 1.008, "step": 46700 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 5.933990668804681e-05, "loss": 0.9433, "step": 46705 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 5.931703474188164e-05, "loss": 0.9275, "step": 46710 }, { "epoch": 0.67, "grad_norm": 0.51171875, "learning_rate": 5.9294165345771036e-05, "loss": 0.9586, "step": 46715 }, { "epoch": 0.67, "grad_norm": 0.515625, "learning_rate": 5.9271298501148545e-05, "loss": 0.8778, "step": 46720 }, { "epoch": 0.67, "grad_norm": 0.55078125, "learning_rate": 5.9248434209447456e-05, "loss": 0.9347, "step": 46725 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 5.9225572472100895e-05, "loss": 1.152, "step": 46730 }, { "epoch": 0.67, "grad_norm": 0.63671875, "learning_rate": 5.92027132905419e-05, "loss": 0.9424, "step": 46735 }, { "epoch": 0.67, "grad_norm": 0.69140625, "learning_rate": 5.917985666620329e-05, "loss": 1.0837, "step": 46740 }, { "epoch": 0.67, "grad_norm": 0.5625, "learning_rate": 5.9157002600517766e-05, "loss": 0.9528, "step": 46745 }, { "epoch": 0.67, "grad_norm": 0.62109375, "learning_rate": 5.9134151094917834e-05, "loss": 0.9801, "step": 46750 }, { "epoch": 0.67, "grad_norm": 0.5390625, "learning_rate": 5.9111302150835836e-05, "loss": 0.9547, "step": 46755 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 5.908845576970401e-05, "loss": 0.96, "step": 46760 }, { "epoch": 0.67, "grad_norm": 0.46875, "learning_rate": 5.906561195295436e-05, "loss": 0.7969, "step": 46765 }, { "epoch": 0.67, "grad_norm": 0.51953125, "learning_rate": 5.904277070201876e-05, "loss": 0.9844, "step": 46770 }, { "epoch": 0.67, "grad_norm": 0.484375, "learning_rate": 5.901993201832901e-05, "loss": 0.9173, "step": 46775 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 5.89970959033165e-05, "loss": 0.8882, "step": 46780 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 5.897426235841277e-05, "loss": 0.9216, "step": 46785 }, { "epoch": 0.67, "grad_norm": 0.5625, "learning_rate": 5.895143138504899e-05, "loss": 0.963, "step": 46790 }, { "epoch": 0.67, "grad_norm": 0.478515625, "learning_rate": 5.8928602984656213e-05, "loss": 0.9357, "step": 46795 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 5.890577715866541e-05, "loss": 0.7911, "step": 46800 }, { "epoch": 0.67, "grad_norm": 0.46484375, "learning_rate": 5.888295390850729e-05, "loss": 0.9172, "step": 46805 }, { "epoch": 0.67, "grad_norm": 0.58203125, "learning_rate": 5.886013323561244e-05, "loss": 0.8876, "step": 46810 }, { "epoch": 0.67, "grad_norm": 0.55859375, "learning_rate": 5.8837315141411284e-05, "loss": 0.9875, "step": 46815 }, { "epoch": 0.67, "grad_norm": 0.52734375, "learning_rate": 5.881449962733412e-05, "loss": 0.8395, "step": 46820 }, { "epoch": 0.67, "grad_norm": 0.5625, "learning_rate": 5.8791686694811e-05, "loss": 1.0331, "step": 46825 }, { "epoch": 0.67, "grad_norm": 0.5546875, "learning_rate": 5.876887634527195e-05, "loss": 1.0062, "step": 46830 }, { "epoch": 0.67, "grad_norm": 0.5859375, "learning_rate": 5.874606858014662e-05, "loss": 1.0152, "step": 46835 }, { "epoch": 0.67, "grad_norm": 0.7109375, "learning_rate": 5.872326340086474e-05, "loss": 0.887, "step": 46840 }, { "epoch": 0.67, "grad_norm": 0.59375, "learning_rate": 5.87004608088557e-05, "loss": 0.7325, "step": 46845 }, { "epoch": 0.67, "grad_norm": 0.6015625, "learning_rate": 5.86776608055488e-05, "loss": 1.0844, "step": 46850 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 5.8654863392373185e-05, "loss": 0.9183, "step": 46855 }, { "epoch": 0.67, "grad_norm": 0.671875, "learning_rate": 5.863206857075785e-05, "loss": 0.9098, "step": 46860 }, { "epoch": 0.67, "grad_norm": 0.60546875, "learning_rate": 5.860927634213154e-05, "loss": 0.9646, "step": 46865 }, { "epoch": 0.67, "grad_norm": 0.609375, "learning_rate": 5.8586486707922924e-05, "loss": 0.9475, "step": 46870 }, { "epoch": 0.67, "grad_norm": 0.578125, "learning_rate": 5.8563699669560524e-05, "loss": 0.94, "step": 46875 }, { "epoch": 0.67, "grad_norm": 0.5859375, "learning_rate": 5.8540915228472624e-05, "loss": 0.9383, "step": 46880 }, { "epoch": 0.67, "grad_norm": 0.52734375, "learning_rate": 5.8518133386087356e-05, "loss": 1.0302, "step": 46885 }, { "epoch": 0.67, "grad_norm": 0.6796875, "learning_rate": 5.8495354143832716e-05, "loss": 0.9661, "step": 46890 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 5.847257750313656e-05, "loss": 1.0525, "step": 46895 }, { "epoch": 0.67, "grad_norm": 0.578125, "learning_rate": 5.8449803465426545e-05, "loss": 0.8351, "step": 46900 }, { "epoch": 0.67, "grad_norm": 0.5234375, "learning_rate": 5.842703203213016e-05, "loss": 0.992, "step": 46905 }, { "epoch": 0.67, "grad_norm": 0.5625, "learning_rate": 5.8404263204674756e-05, "loss": 1.0304, "step": 46910 }, { "epoch": 0.67, "grad_norm": 0.50390625, "learning_rate": 5.8381496984487495e-05, "loss": 0.7929, "step": 46915 }, { "epoch": 0.67, "grad_norm": 0.671875, "learning_rate": 5.835873337299544e-05, "loss": 1.0232, "step": 46920 }, { "epoch": 0.67, "grad_norm": 0.62890625, "learning_rate": 5.833597237162538e-05, "loss": 0.952, "step": 46925 }, { "epoch": 0.67, "grad_norm": 0.58203125, "learning_rate": 5.8313213981804005e-05, "loss": 0.9321, "step": 46930 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 5.829045820495791e-05, "loss": 0.8802, "step": 46935 }, { "epoch": 0.67, "grad_norm": 0.55859375, "learning_rate": 5.826770504251339e-05, "loss": 1.1071, "step": 46940 }, { "epoch": 0.67, "grad_norm": 0.65234375, "learning_rate": 5.82449544958966e-05, "loss": 1.0217, "step": 46945 }, { "epoch": 0.67, "grad_norm": 0.48046875, "learning_rate": 5.822220656653363e-05, "loss": 0.891, "step": 46950 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 5.8199461255850365e-05, "loss": 0.9092, "step": 46955 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 5.8176718565272436e-05, "loss": 1.0907, "step": 46960 }, { "epoch": 0.67, "grad_norm": 0.7109375, "learning_rate": 5.8153978496225415e-05, "loss": 0.9233, "step": 46965 }, { "epoch": 0.67, "grad_norm": 0.6171875, "learning_rate": 5.813124105013473e-05, "loss": 0.962, "step": 46970 }, { "epoch": 0.67, "grad_norm": 0.5390625, "learning_rate": 5.81085062284255e-05, "loss": 0.9517, "step": 46975 }, { "epoch": 0.67, "grad_norm": 0.66796875, "learning_rate": 5.8085774032522814e-05, "loss": 0.919, "step": 46980 }, { "epoch": 0.67, "grad_norm": 0.462890625, "learning_rate": 5.8063044463851623e-05, "loss": 0.8406, "step": 46985 }, { "epoch": 0.67, "grad_norm": 0.52734375, "learning_rate": 5.8040317523836475e-05, "loss": 0.9927, "step": 46990 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 5.8017593213902036e-05, "loss": 0.8488, "step": 46995 }, { "epoch": 0.67, "grad_norm": 0.52734375, "learning_rate": 5.7994871535472684e-05, "loss": 0.9303, "step": 47000 }, { "epoch": 0.67, "grad_norm": 0.5703125, "learning_rate": 5.79721524899726e-05, "loss": 0.9435, "step": 47005 }, { "epoch": 0.67, "grad_norm": 0.5625, "learning_rate": 5.794943607882586e-05, "loss": 0.8904, "step": 47010 }, { "epoch": 0.67, "grad_norm": 0.5234375, "learning_rate": 5.792672230345639e-05, "loss": 1.0229, "step": 47015 }, { "epoch": 0.67, "grad_norm": 0.56640625, "learning_rate": 5.790401116528785e-05, "loss": 0.7795, "step": 47020 }, { "epoch": 0.67, "grad_norm": 0.60546875, "learning_rate": 5.788130266574383e-05, "loss": 0.9443, "step": 47025 }, { "epoch": 0.67, "grad_norm": 0.5, "learning_rate": 5.785859680624779e-05, "loss": 0.987, "step": 47030 }, { "epoch": 0.67, "grad_norm": 0.55859375, "learning_rate": 5.783589358822283e-05, "loss": 1.0296, "step": 47035 }, { "epoch": 0.67, "grad_norm": 0.4921875, "learning_rate": 5.781319301309214e-05, "loss": 1.0317, "step": 47040 }, { "epoch": 0.67, "grad_norm": 0.51953125, "learning_rate": 5.77904950822785e-05, "loss": 0.9373, "step": 47045 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 5.776779979720475e-05, "loss": 0.9986, "step": 47050 }, { "epoch": 0.67, "grad_norm": 0.63671875, "learning_rate": 5.7745107159293365e-05, "loss": 1.0654, "step": 47055 }, { "epoch": 0.68, "grad_norm": 0.5078125, "learning_rate": 5.772241716996679e-05, "loss": 0.9629, "step": 47060 }, { "epoch": 0.68, "grad_norm": 0.5390625, "learning_rate": 5.76997298306473e-05, "loss": 0.8715, "step": 47065 }, { "epoch": 0.68, "grad_norm": 0.482421875, "learning_rate": 5.7677045142756866e-05, "loss": 0.9028, "step": 47070 }, { "epoch": 0.68, "grad_norm": 0.5703125, "learning_rate": 5.765436310771746e-05, "loss": 0.9967, "step": 47075 }, { "epoch": 0.68, "grad_norm": 0.57421875, "learning_rate": 5.763168372695085e-05, "loss": 0.8681, "step": 47080 }, { "epoch": 0.68, "grad_norm": 0.64453125, "learning_rate": 5.76090070018785e-05, "loss": 1.0474, "step": 47085 }, { "epoch": 0.68, "grad_norm": 0.61328125, "learning_rate": 5.758633293392191e-05, "loss": 0.927, "step": 47090 }, { "epoch": 0.68, "grad_norm": 0.6015625, "learning_rate": 5.7563661524502275e-05, "loss": 0.9565, "step": 47095 }, { "epoch": 0.68, "grad_norm": 0.5234375, "learning_rate": 5.754099277504064e-05, "loss": 0.8303, "step": 47100 }, { "epoch": 0.68, "grad_norm": 0.57421875, "learning_rate": 5.751832668695792e-05, "loss": 1.017, "step": 47105 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.749566326167492e-05, "loss": 0.99, "step": 47110 }, { "epoch": 0.68, "grad_norm": 0.59375, "learning_rate": 5.7473002500612114e-05, "loss": 1.0008, "step": 47115 }, { "epoch": 0.68, "grad_norm": 0.48828125, "learning_rate": 5.745034440518993e-05, "loss": 0.9646, "step": 47120 }, { "epoch": 0.68, "grad_norm": 0.5625, "learning_rate": 5.742768897682868e-05, "loss": 0.9078, "step": 47125 }, { "epoch": 0.68, "grad_norm": 0.53515625, "learning_rate": 5.7405036216948315e-05, "loss": 0.9729, "step": 47130 }, { "epoch": 0.68, "grad_norm": 0.50390625, "learning_rate": 5.73823861269688e-05, "loss": 0.9689, "step": 47135 }, { "epoch": 0.68, "grad_norm": 0.53515625, "learning_rate": 5.7359738708309885e-05, "loss": 0.987, "step": 47140 }, { "epoch": 0.68, "grad_norm": 0.51953125, "learning_rate": 5.733709396239113e-05, "loss": 0.9306, "step": 47145 }, { "epoch": 0.68, "grad_norm": 0.60546875, "learning_rate": 5.731445189063187e-05, "loss": 0.8408, "step": 47150 }, { "epoch": 0.68, "grad_norm": 0.51171875, "learning_rate": 5.729181249445137e-05, "loss": 0.9015, "step": 47155 }, { "epoch": 0.68, "grad_norm": 0.5859375, "learning_rate": 5.726917577526876e-05, "loss": 0.9337, "step": 47160 }, { "epoch": 0.68, "grad_norm": 0.59375, "learning_rate": 5.724654173450281e-05, "loss": 0.6944, "step": 47165 }, { "epoch": 0.68, "grad_norm": 0.6875, "learning_rate": 5.722391037357234e-05, "loss": 1.0884, "step": 47170 }, { "epoch": 0.68, "grad_norm": 0.5859375, "learning_rate": 5.720128169389593e-05, "loss": 0.8736, "step": 47175 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.717865569689187e-05, "loss": 0.9979, "step": 47180 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.715603238397845e-05, "loss": 0.9299, "step": 47185 }, { "epoch": 0.68, "grad_norm": 0.54296875, "learning_rate": 5.713341175657376e-05, "loss": 0.9496, "step": 47190 }, { "epoch": 0.68, "grad_norm": 0.62109375, "learning_rate": 5.7110793816095634e-05, "loss": 0.8789, "step": 47195 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.708817856396176e-05, "loss": 0.9788, "step": 47200 }, { "epoch": 0.68, "grad_norm": 0.51953125, "learning_rate": 5.7065566001589764e-05, "loss": 0.837, "step": 47205 }, { "epoch": 0.68, "grad_norm": 0.5546875, "learning_rate": 5.704295613039694e-05, "loss": 0.8921, "step": 47210 }, { "epoch": 0.68, "grad_norm": 0.56640625, "learning_rate": 5.7020348951800574e-05, "loss": 1.0042, "step": 47215 }, { "epoch": 0.68, "grad_norm": 0.50390625, "learning_rate": 5.699774446721771e-05, "loss": 0.9389, "step": 47220 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.6975142678065166e-05, "loss": 0.8968, "step": 47225 }, { "epoch": 0.68, "grad_norm": 0.59375, "learning_rate": 5.695254358575966e-05, "loss": 0.9572, "step": 47230 }, { "epoch": 0.68, "grad_norm": 0.58984375, "learning_rate": 5.6929947191717804e-05, "loss": 0.9398, "step": 47235 }, { "epoch": 0.68, "grad_norm": 0.5, "learning_rate": 5.6907353497355874e-05, "loss": 0.8282, "step": 47240 }, { "epoch": 0.68, "grad_norm": 0.54296875, "learning_rate": 5.688476250409011e-05, "loss": 0.8719, "step": 47245 }, { "epoch": 0.68, "grad_norm": 0.5546875, "learning_rate": 5.686217421333656e-05, "loss": 0.9109, "step": 47250 }, { "epoch": 0.68, "grad_norm": 0.58203125, "learning_rate": 5.683958862651106e-05, "loss": 0.9396, "step": 47255 }, { "epoch": 0.68, "grad_norm": 0.48046875, "learning_rate": 5.6817005745029284e-05, "loss": 0.9259, "step": 47260 }, { "epoch": 0.68, "grad_norm": 0.60546875, "learning_rate": 5.679442557030674e-05, "loss": 0.9045, "step": 47265 }, { "epoch": 0.68, "grad_norm": 0.58203125, "learning_rate": 5.6771848103758875e-05, "loss": 0.9453, "step": 47270 }, { "epoch": 0.68, "grad_norm": 0.63671875, "learning_rate": 5.674927334680075e-05, "loss": 0.9764, "step": 47275 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.672670130084743e-05, "loss": 0.9651, "step": 47280 }, { "epoch": 0.68, "grad_norm": 0.5390625, "learning_rate": 5.670413196731379e-05, "loss": 0.8923, "step": 47285 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.668156534761444e-05, "loss": 0.8737, "step": 47290 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.665900144316391e-05, "loss": 0.9184, "step": 47295 }, { "epoch": 0.68, "grad_norm": 0.48828125, "learning_rate": 5.66364402553766e-05, "loss": 1.0944, "step": 47300 }, { "epoch": 0.68, "grad_norm": 0.64453125, "learning_rate": 5.661388178566653e-05, "loss": 0.9557, "step": 47305 }, { "epoch": 0.68, "grad_norm": 0.59375, "learning_rate": 5.659132603544776e-05, "loss": 1.2007, "step": 47310 }, { "epoch": 0.68, "grad_norm": 0.5703125, "learning_rate": 5.6568773006134144e-05, "loss": 0.7776, "step": 47315 }, { "epoch": 0.68, "grad_norm": 0.578125, "learning_rate": 5.654622269913927e-05, "loss": 1.1321, "step": 47320 }, { "epoch": 0.68, "grad_norm": 0.51953125, "learning_rate": 5.652367511587665e-05, "loss": 0.957, "step": 47325 }, { "epoch": 0.68, "grad_norm": 0.546875, "learning_rate": 5.650113025775963e-05, "loss": 0.9662, "step": 47330 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.647858812620126e-05, "loss": 0.8585, "step": 47335 }, { "epoch": 0.68, "grad_norm": 0.58984375, "learning_rate": 5.645604872261455e-05, "loss": 0.9303, "step": 47340 }, { "epoch": 0.68, "grad_norm": 0.66015625, "learning_rate": 5.643351204841234e-05, "loss": 0.9571, "step": 47345 }, { "epoch": 0.68, "grad_norm": 0.61328125, "learning_rate": 5.6410978105007175e-05, "loss": 0.9188, "step": 47350 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.6388446893811585e-05, "loss": 0.9066, "step": 47355 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.6365918416237775e-05, "loss": 0.9174, "step": 47360 }, { "epoch": 0.68, "grad_norm": 0.55078125, "learning_rate": 5.634339267369794e-05, "loss": 0.9251, "step": 47365 }, { "epoch": 0.68, "grad_norm": 0.59375, "learning_rate": 5.6320869667603925e-05, "loss": 0.9401, "step": 47370 }, { "epoch": 0.68, "grad_norm": 0.62109375, "learning_rate": 5.629834939936755e-05, "loss": 1.0269, "step": 47375 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.6275831870400444e-05, "loss": 0.9956, "step": 47380 }, { "epoch": 0.68, "grad_norm": 0.5703125, "learning_rate": 5.625331708211394e-05, "loss": 0.9787, "step": 47385 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.6230805035919365e-05, "loss": 0.9678, "step": 47390 }, { "epoch": 0.68, "grad_norm": 0.58203125, "learning_rate": 5.620829573322781e-05, "loss": 0.8578, "step": 47395 }, { "epoch": 0.68, "grad_norm": 0.59765625, "learning_rate": 5.618578917545012e-05, "loss": 0.8684, "step": 47400 }, { "epoch": 0.68, "grad_norm": 0.61328125, "learning_rate": 5.61632853639971e-05, "loss": 1.0012, "step": 47405 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.614078430027924e-05, "loss": 1.0491, "step": 47410 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.6118285985707006e-05, "loss": 0.9195, "step": 47415 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.6095790421690554e-05, "loss": 0.8142, "step": 47420 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.607329760964e-05, "loss": 0.9404, "step": 47425 }, { "epoch": 0.68, "grad_norm": 0.57421875, "learning_rate": 5.6050807550965125e-05, "loss": 0.8679, "step": 47430 }, { "epoch": 0.68, "grad_norm": 0.70703125, "learning_rate": 5.60283202470757e-05, "loss": 0.826, "step": 47435 }, { "epoch": 0.68, "grad_norm": 0.59765625, "learning_rate": 5.600583569938127e-05, "loss": 0.9905, "step": 47440 }, { "epoch": 0.68, "grad_norm": 0.58203125, "learning_rate": 5.598335390929114e-05, "loss": 0.9148, "step": 47445 }, { "epoch": 0.68, "grad_norm": 0.52734375, "learning_rate": 5.596087487821451e-05, "loss": 1.0001, "step": 47450 }, { "epoch": 0.68, "grad_norm": 0.5625, "learning_rate": 5.593839860756044e-05, "loss": 0.9885, "step": 47455 }, { "epoch": 0.68, "grad_norm": 0.640625, "learning_rate": 5.591592509873772e-05, "loss": 0.9997, "step": 47460 }, { "epoch": 0.68, "grad_norm": 0.52734375, "learning_rate": 5.589345435315498e-05, "loss": 0.9769, "step": 47465 }, { "epoch": 0.68, "grad_norm": 0.52734375, "learning_rate": 5.587098637222077e-05, "loss": 0.892, "step": 47470 }, { "epoch": 0.68, "grad_norm": 0.578125, "learning_rate": 5.584852115734343e-05, "loss": 0.9215, "step": 47475 }, { "epoch": 0.68, "grad_norm": 0.53515625, "learning_rate": 5.582605870993103e-05, "loss": 0.8581, "step": 47480 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.580359903139156e-05, "loss": 0.8876, "step": 47485 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.5781142123132904e-05, "loss": 1.1096, "step": 47490 }, { "epoch": 0.68, "grad_norm": 0.5546875, "learning_rate": 5.575868798656256e-05, "loss": 0.9166, "step": 47495 }, { "epoch": 0.68, "grad_norm": 0.6328125, "learning_rate": 5.573623662308804e-05, "loss": 0.9829, "step": 47500 }, { "epoch": 0.68, "grad_norm": 0.59375, "learning_rate": 5.5713788034116673e-05, "loss": 0.9592, "step": 47505 }, { "epoch": 0.68, "grad_norm": 0.48046875, "learning_rate": 5.569134222105551e-05, "loss": 0.9754, "step": 47510 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.566889918531144e-05, "loss": 0.9408, "step": 47515 }, { "epoch": 0.68, "grad_norm": 0.5078125, "learning_rate": 5.564645892829126e-05, "loss": 0.9398, "step": 47520 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.5624021451401576e-05, "loss": 0.9964, "step": 47525 }, { "epoch": 0.68, "grad_norm": 0.5859375, "learning_rate": 5.560158675604873e-05, "loss": 0.9931, "step": 47530 }, { "epoch": 0.68, "grad_norm": 0.52734375, "learning_rate": 5.5579154843639e-05, "loss": 0.9833, "step": 47535 }, { "epoch": 0.68, "grad_norm": 0.58203125, "learning_rate": 5.555672571557846e-05, "loss": 0.9731, "step": 47540 }, { "epoch": 0.68, "grad_norm": 0.54296875, "learning_rate": 5.5534299373272944e-05, "loss": 1.032, "step": 47545 }, { "epoch": 0.68, "grad_norm": 0.5, "learning_rate": 5.5511875818128176e-05, "loss": 0.9431, "step": 47550 }, { "epoch": 0.68, "grad_norm": 0.5234375, "learning_rate": 5.5489455051549756e-05, "loss": 0.9872, "step": 47555 }, { "epoch": 0.68, "grad_norm": 0.59375, "learning_rate": 5.546703707494293e-05, "loss": 0.9955, "step": 47560 }, { "epoch": 0.68, "grad_norm": 0.53515625, "learning_rate": 5.544462188971298e-05, "loss": 0.8273, "step": 47565 }, { "epoch": 0.68, "grad_norm": 0.765625, "learning_rate": 5.5422209497264885e-05, "loss": 1.0531, "step": 47570 }, { "epoch": 0.68, "grad_norm": 0.6953125, "learning_rate": 5.5399799899003414e-05, "loss": 0.9159, "step": 47575 }, { "epoch": 0.68, "grad_norm": 0.58984375, "learning_rate": 5.53773930963333e-05, "loss": 0.8641, "step": 47580 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.535498909065904e-05, "loss": 0.83, "step": 47585 }, { "epoch": 0.68, "grad_norm": 0.5703125, "learning_rate": 5.5332587883384854e-05, "loss": 0.9063, "step": 47590 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.5310189475914956e-05, "loss": 1.099, "step": 47595 }, { "epoch": 0.68, "grad_norm": 0.57421875, "learning_rate": 5.5287793869653305e-05, "loss": 0.8519, "step": 47600 }, { "epoch": 0.68, "grad_norm": 0.51953125, "learning_rate": 5.5265401066003617e-05, "loss": 1.0752, "step": 47605 }, { "epoch": 0.68, "grad_norm": 0.5546875, "learning_rate": 5.5243011066369534e-05, "loss": 0.8892, "step": 47610 }, { "epoch": 0.68, "grad_norm": 0.546875, "learning_rate": 5.522062387215454e-05, "loss": 0.9662, "step": 47615 }, { "epoch": 0.68, "grad_norm": 0.54296875, "learning_rate": 5.519823948476184e-05, "loss": 0.9145, "step": 47620 }, { "epoch": 0.68, "grad_norm": 0.56640625, "learning_rate": 5.517585790559448e-05, "loss": 0.9575, "step": 47625 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.515347913605539e-05, "loss": 0.9744, "step": 47630 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.513110317754734e-05, "loss": 0.8392, "step": 47635 }, { "epoch": 0.68, "grad_norm": 0.60546875, "learning_rate": 5.510873003147281e-05, "loss": 1.0129, "step": 47640 }, { "epoch": 0.68, "grad_norm": 0.60546875, "learning_rate": 5.5086359699234205e-05, "loss": 0.9679, "step": 47645 }, { "epoch": 0.68, "grad_norm": 0.5859375, "learning_rate": 5.506399218223377e-05, "loss": 1.0772, "step": 47650 }, { "epoch": 0.68, "grad_norm": 0.515625, "learning_rate": 5.504162748187344e-05, "loss": 0.9394, "step": 47655 }, { "epoch": 0.68, "grad_norm": 0.609375, "learning_rate": 5.50192655995551e-05, "loss": 0.9517, "step": 47660 }, { "epoch": 0.68, "grad_norm": 0.609375, "learning_rate": 5.499690653668046e-05, "loss": 0.9318, "step": 47665 }, { "epoch": 0.68, "grad_norm": 0.56640625, "learning_rate": 5.4974550294650985e-05, "loss": 0.9086, "step": 47670 }, { "epoch": 0.68, "grad_norm": 0.546875, "learning_rate": 5.495219687486792e-05, "loss": 0.9837, "step": 47675 }, { "epoch": 0.68, "grad_norm": 0.62890625, "learning_rate": 5.4929846278732525e-05, "loss": 0.9616, "step": 47680 }, { "epoch": 0.68, "grad_norm": 0.54296875, "learning_rate": 5.4907498507645626e-05, "loss": 0.9245, "step": 47685 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.4885153563008095e-05, "loss": 1.0802, "step": 47690 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.4862811446220564e-05, "loss": 0.9033, "step": 47695 }, { "epoch": 0.68, "grad_norm": 0.52734375, "learning_rate": 5.484047215868336e-05, "loss": 0.9888, "step": 47700 }, { "epoch": 0.68, "grad_norm": 0.57421875, "learning_rate": 5.4818135701796814e-05, "loss": 0.9262, "step": 47705 }, { "epoch": 0.68, "grad_norm": 0.56640625, "learning_rate": 5.4795802076961e-05, "loss": 0.9719, "step": 47710 }, { "epoch": 0.68, "grad_norm": 0.58203125, "learning_rate": 5.477347128557577e-05, "loss": 0.8993, "step": 47715 }, { "epoch": 0.68, "grad_norm": 0.57421875, "learning_rate": 5.47511433290409e-05, "loss": 0.8943, "step": 47720 }, { "epoch": 0.68, "grad_norm": 0.53515625, "learning_rate": 5.4728818208755864e-05, "loss": 0.909, "step": 47725 }, { "epoch": 0.68, "grad_norm": 0.498046875, "learning_rate": 5.47064959261201e-05, "loss": 0.9259, "step": 47730 }, { "epoch": 0.68, "grad_norm": 0.625, "learning_rate": 5.4684176482532723e-05, "loss": 0.9023, "step": 47735 }, { "epoch": 0.68, "grad_norm": 0.58984375, "learning_rate": 5.4661859879392764e-05, "loss": 0.8699, "step": 47740 }, { "epoch": 0.68, "grad_norm": 0.56640625, "learning_rate": 5.463954611809912e-05, "loss": 0.9654, "step": 47745 }, { "epoch": 0.68, "grad_norm": 0.72265625, "learning_rate": 5.461723520005033e-05, "loss": 0.9852, "step": 47750 }, { "epoch": 0.69, "grad_norm": 0.515625, "learning_rate": 5.459492712664493e-05, "loss": 0.8854, "step": 47755 }, { "epoch": 0.69, "grad_norm": 0.51953125, "learning_rate": 5.457262189928125e-05, "loss": 1.0325, "step": 47760 }, { "epoch": 0.69, "grad_norm": 0.59765625, "learning_rate": 5.455031951935732e-05, "loss": 1.0155, "step": 47765 }, { "epoch": 0.69, "grad_norm": 0.58984375, "learning_rate": 5.452801998827114e-05, "loss": 0.9049, "step": 47770 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.450572330742052e-05, "loss": 0.978, "step": 47775 }, { "epoch": 0.69, "grad_norm": 0.50390625, "learning_rate": 5.4483429478202906e-05, "loss": 0.9369, "step": 47780 }, { "epoch": 0.69, "grad_norm": 0.5546875, "learning_rate": 5.446113850201578e-05, "loss": 1.041, "step": 47785 }, { "epoch": 0.69, "grad_norm": 0.53125, "learning_rate": 5.443885038025638e-05, "loss": 0.9147, "step": 47790 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.441656511432169e-05, "loss": 0.854, "step": 47795 }, { "epoch": 0.69, "grad_norm": 0.466796875, "learning_rate": 5.4394282705608626e-05, "loss": 0.918, "step": 47800 }, { "epoch": 0.69, "grad_norm": 0.5078125, "learning_rate": 5.4372003155513896e-05, "loss": 0.8515, "step": 47805 }, { "epoch": 0.69, "grad_norm": 0.60546875, "learning_rate": 5.434972646543393e-05, "loss": 1.1099, "step": 47810 }, { "epoch": 0.69, "grad_norm": 0.5390625, "learning_rate": 5.432745263676511e-05, "loss": 0.8933, "step": 47815 }, { "epoch": 0.69, "grad_norm": 0.5, "learning_rate": 5.430518167090359e-05, "loss": 1.062, "step": 47820 }, { "epoch": 0.69, "grad_norm": 0.57421875, "learning_rate": 5.428291356924535e-05, "loss": 0.8631, "step": 47825 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.4260648333186114e-05, "loss": 0.998, "step": 47830 }, { "epoch": 0.69, "grad_norm": 0.58203125, "learning_rate": 5.423838596412152e-05, "loss": 0.9869, "step": 47835 }, { "epoch": 0.69, "grad_norm": 0.59765625, "learning_rate": 5.421612646344707e-05, "loss": 1.0076, "step": 47840 }, { "epoch": 0.69, "grad_norm": 0.63671875, "learning_rate": 5.41938698325579e-05, "loss": 0.9925, "step": 47845 }, { "epoch": 0.69, "grad_norm": 0.49609375, "learning_rate": 5.417161607284915e-05, "loss": 0.8627, "step": 47850 }, { "epoch": 0.69, "grad_norm": 0.625, "learning_rate": 5.4149365185715726e-05, "loss": 1.0033, "step": 47855 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.412711717255228e-05, "loss": 1.0183, "step": 47860 }, { "epoch": 0.69, "grad_norm": 0.6171875, "learning_rate": 5.410487203475338e-05, "loss": 0.9942, "step": 47865 }, { "epoch": 0.69, "grad_norm": 0.61328125, "learning_rate": 5.40826297737134e-05, "loss": 0.9463, "step": 47870 }, { "epoch": 0.69, "grad_norm": 0.56640625, "learning_rate": 5.4060390390826444e-05, "loss": 0.9178, "step": 47875 }, { "epoch": 0.69, "grad_norm": 0.58984375, "learning_rate": 5.403815388748659e-05, "loss": 1.0525, "step": 47880 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.4015920265087574e-05, "loss": 0.8748, "step": 47885 }, { "epoch": 0.69, "grad_norm": 0.49609375, "learning_rate": 5.399368952502302e-05, "loss": 0.91, "step": 47890 }, { "epoch": 0.69, "grad_norm": 0.5078125, "learning_rate": 5.397146166868641e-05, "loss": 0.9965, "step": 47895 }, { "epoch": 0.69, "grad_norm": 0.6015625, "learning_rate": 5.3949236697471025e-05, "loss": 0.8512, "step": 47900 }, { "epoch": 0.69, "grad_norm": 0.58203125, "learning_rate": 5.3927014612769925e-05, "loss": 1.1435, "step": 47905 }, { "epoch": 0.69, "grad_norm": 0.55859375, "learning_rate": 5.3904795415975996e-05, "loss": 0.92, "step": 47910 }, { "epoch": 0.69, "grad_norm": 0.53125, "learning_rate": 5.3882579108482024e-05, "loss": 0.9276, "step": 47915 }, { "epoch": 0.69, "grad_norm": 0.6015625, "learning_rate": 5.386036569168049e-05, "loss": 1.0497, "step": 47920 }, { "epoch": 0.69, "grad_norm": 0.62109375, "learning_rate": 5.383815516696378e-05, "loss": 0.9812, "step": 47925 }, { "epoch": 0.69, "grad_norm": 0.66015625, "learning_rate": 5.3815947535724124e-05, "loss": 0.973, "step": 47930 }, { "epoch": 0.69, "grad_norm": 0.578125, "learning_rate": 5.3793742799353464e-05, "loss": 1.0856, "step": 47935 }, { "epoch": 0.69, "grad_norm": 0.515625, "learning_rate": 5.377154095924359e-05, "loss": 0.9601, "step": 47940 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.3749342016786186e-05, "loss": 0.9086, "step": 47945 }, { "epoch": 0.69, "grad_norm": 0.56640625, "learning_rate": 5.372714597337274e-05, "loss": 0.9846, "step": 47950 }, { "epoch": 0.69, "grad_norm": 0.58203125, "learning_rate": 5.370495283039445e-05, "loss": 1.0759, "step": 47955 }, { "epoch": 0.69, "grad_norm": 0.62890625, "learning_rate": 5.368276258924243e-05, "loss": 0.9919, "step": 47960 }, { "epoch": 0.69, "grad_norm": 0.53125, "learning_rate": 5.3660575251307646e-05, "loss": 0.9385, "step": 47965 }, { "epoch": 0.69, "grad_norm": 0.57421875, "learning_rate": 5.3638390817980744e-05, "loss": 0.8852, "step": 47970 }, { "epoch": 0.69, "grad_norm": 0.5546875, "learning_rate": 5.3616209290652296e-05, "loss": 0.8375, "step": 47975 }, { "epoch": 0.69, "grad_norm": 0.50390625, "learning_rate": 5.359403067071274e-05, "loss": 1.0317, "step": 47980 }, { "epoch": 0.69, "grad_norm": 0.625, "learning_rate": 5.357185495955217e-05, "loss": 0.9038, "step": 47985 }, { "epoch": 0.69, "grad_norm": 0.4921875, "learning_rate": 5.3549682158560574e-05, "loss": 0.9486, "step": 47990 }, { "epoch": 0.69, "grad_norm": 0.5625, "learning_rate": 5.3527512269127844e-05, "loss": 0.9461, "step": 47995 }, { "epoch": 0.69, "grad_norm": 0.48828125, "learning_rate": 5.350534529264353e-05, "loss": 0.9131, "step": 48000 }, { "epoch": 0.69, "grad_norm": 0.5234375, "learning_rate": 5.348318123049714e-05, "loss": 0.9683, "step": 48005 }, { "epoch": 0.69, "grad_norm": 0.53125, "learning_rate": 5.3461020084077964e-05, "loss": 0.9655, "step": 48010 }, { "epoch": 0.69, "grad_norm": 0.5078125, "learning_rate": 5.3438861854775e-05, "loss": 0.8866, "step": 48015 }, { "epoch": 0.69, "grad_norm": 0.62890625, "learning_rate": 5.341670654397721e-05, "loss": 0.924, "step": 48020 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.339455415307335e-05, "loss": 0.9218, "step": 48025 }, { "epoch": 0.69, "grad_norm": 0.53515625, "learning_rate": 5.337240468345187e-05, "loss": 0.8772, "step": 48030 }, { "epoch": 0.69, "grad_norm": 0.5703125, "learning_rate": 5.335025813650121e-05, "loss": 0.9987, "step": 48035 }, { "epoch": 0.69, "grad_norm": 0.52734375, "learning_rate": 5.332811451360946e-05, "loss": 0.9083, "step": 48040 }, { "epoch": 0.69, "grad_norm": 0.69140625, "learning_rate": 5.330597381616469e-05, "loss": 1.1062, "step": 48045 }, { "epoch": 0.69, "grad_norm": 0.73828125, "learning_rate": 5.328383604555462e-05, "loss": 0.9678, "step": 48050 }, { "epoch": 0.69, "grad_norm": 0.48046875, "learning_rate": 5.326170120316691e-05, "loss": 0.8508, "step": 48055 }, { "epoch": 0.69, "grad_norm": 0.52734375, "learning_rate": 5.3239569290389043e-05, "loss": 1.0304, "step": 48060 }, { "epoch": 0.69, "grad_norm": 0.5, "learning_rate": 5.321744030860819e-05, "loss": 0.9192, "step": 48065 }, { "epoch": 0.69, "grad_norm": 0.71484375, "learning_rate": 5.319531425921146e-05, "loss": 1.0754, "step": 48070 }, { "epoch": 0.69, "grad_norm": 0.5390625, "learning_rate": 5.317319114358576e-05, "loss": 1.098, "step": 48075 }, { "epoch": 0.69, "grad_norm": 0.9140625, "learning_rate": 5.3151070963117754e-05, "loss": 0.9126, "step": 48080 }, { "epoch": 0.69, "grad_norm": 0.57421875, "learning_rate": 5.3128953719193975e-05, "loss": 1.1703, "step": 48085 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.310683941320082e-05, "loss": 1.0177, "step": 48090 }, { "epoch": 0.69, "grad_norm": 0.515625, "learning_rate": 5.3084728046524315e-05, "loss": 0.9678, "step": 48095 }, { "epoch": 0.69, "grad_norm": 0.6640625, "learning_rate": 5.306261962055048e-05, "loss": 0.9722, "step": 48100 }, { "epoch": 0.69, "grad_norm": 0.51953125, "learning_rate": 5.3040514136665154e-05, "loss": 1.0295, "step": 48105 }, { "epoch": 0.69, "grad_norm": 0.61328125, "learning_rate": 5.3018411596253824e-05, "loss": 1.0052, "step": 48110 }, { "epoch": 0.69, "grad_norm": 0.56640625, "learning_rate": 5.2996312000701986e-05, "loss": 0.9311, "step": 48115 }, { "epoch": 0.69, "grad_norm": 0.53125, "learning_rate": 5.2974215351394863e-05, "loss": 1.0246, "step": 48120 }, { "epoch": 0.69, "grad_norm": 0.474609375, "learning_rate": 5.295212164971746e-05, "loss": 0.8808, "step": 48125 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.293003089705464e-05, "loss": 0.9481, "step": 48130 }, { "epoch": 0.69, "grad_norm": 0.50390625, "learning_rate": 5.290794309479113e-05, "loss": 0.9324, "step": 48135 }, { "epoch": 0.69, "grad_norm": 0.5078125, "learning_rate": 5.288585824431139e-05, "loss": 0.8866, "step": 48140 }, { "epoch": 0.69, "grad_norm": 0.59765625, "learning_rate": 5.286377634699967e-05, "loss": 0.9753, "step": 48145 }, { "epoch": 0.69, "grad_norm": 0.5625, "learning_rate": 5.284169740424014e-05, "loss": 0.9452, "step": 48150 }, { "epoch": 0.69, "grad_norm": 0.5859375, "learning_rate": 5.2819621417416755e-05, "loss": 1.1019, "step": 48155 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.27975483879132e-05, "loss": 0.8032, "step": 48160 }, { "epoch": 0.69, "grad_norm": 0.578125, "learning_rate": 5.277547831711307e-05, "loss": 0.9602, "step": 48165 }, { "epoch": 0.69, "grad_norm": 0.53515625, "learning_rate": 5.275341120639977e-05, "loss": 1.0003, "step": 48170 }, { "epoch": 0.69, "grad_norm": 0.5859375, "learning_rate": 5.273134705715643e-05, "loss": 1.1599, "step": 48175 }, { "epoch": 0.69, "grad_norm": 0.58984375, "learning_rate": 5.270928587076611e-05, "loss": 0.9654, "step": 48180 }, { "epoch": 0.69, "grad_norm": 0.51953125, "learning_rate": 5.268722764861164e-05, "loss": 0.8464, "step": 48185 }, { "epoch": 0.69, "grad_norm": 0.58203125, "learning_rate": 5.266517239207558e-05, "loss": 0.8963, "step": 48190 }, { "epoch": 0.69, "grad_norm": 0.59375, "learning_rate": 5.2643120102540466e-05, "loss": 1.0255, "step": 48195 }, { "epoch": 0.69, "grad_norm": 0.80078125, "learning_rate": 5.2621070781388485e-05, "loss": 1.0463, "step": 48200 }, { "epoch": 0.69, "grad_norm": 0.58984375, "learning_rate": 5.259902443000177e-05, "loss": 0.8804, "step": 48205 }, { "epoch": 0.69, "grad_norm": 0.5703125, "learning_rate": 5.257698104976217e-05, "loss": 0.8568, "step": 48210 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.25549406420514e-05, "loss": 0.894, "step": 48215 }, { "epoch": 0.69, "grad_norm": 0.56640625, "learning_rate": 5.253290320825104e-05, "loss": 0.8669, "step": 48220 }, { "epoch": 0.69, "grad_norm": 0.5859375, "learning_rate": 5.25108687497423e-05, "loss": 0.8684, "step": 48225 }, { "epoch": 0.69, "grad_norm": 0.50390625, "learning_rate": 5.248883726790646e-05, "loss": 0.9638, "step": 48230 }, { "epoch": 0.69, "grad_norm": 0.734375, "learning_rate": 5.246680876412435e-05, "loss": 0.898, "step": 48235 }, { "epoch": 0.69, "grad_norm": 0.51171875, "learning_rate": 5.244478323977681e-05, "loss": 0.954, "step": 48240 }, { "epoch": 0.69, "grad_norm": 0.58203125, "learning_rate": 5.242276069624445e-05, "loss": 0.8637, "step": 48245 }, { "epoch": 0.69, "grad_norm": 0.62109375, "learning_rate": 5.240074113490765e-05, "loss": 0.8344, "step": 48250 }, { "epoch": 0.69, "grad_norm": 0.6640625, "learning_rate": 5.2378724557146566e-05, "loss": 0.9092, "step": 48255 }, { "epoch": 0.69, "grad_norm": 0.51953125, "learning_rate": 5.235671096434125e-05, "loss": 0.9158, "step": 48260 }, { "epoch": 0.69, "grad_norm": 0.60546875, "learning_rate": 5.233470035787159e-05, "loss": 0.8737, "step": 48265 }, { "epoch": 0.69, "grad_norm": 0.58203125, "learning_rate": 5.2312692739117165e-05, "loss": 0.9542, "step": 48270 }, { "epoch": 0.69, "grad_norm": 0.50390625, "learning_rate": 5.229068810945748e-05, "loss": 0.8575, "step": 48275 }, { "epoch": 0.69, "grad_norm": 0.498046875, "learning_rate": 5.226868647027183e-05, "loss": 0.931, "step": 48280 }, { "epoch": 0.69, "grad_norm": 0.50390625, "learning_rate": 5.224668782293923e-05, "loss": 0.954, "step": 48285 }, { "epoch": 0.69, "grad_norm": 0.58203125, "learning_rate": 5.222469216883863e-05, "loss": 0.8797, "step": 48290 }, { "epoch": 0.69, "grad_norm": 0.52734375, "learning_rate": 5.220269950934877e-05, "loss": 1.0477, "step": 48295 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.218070984584815e-05, "loss": 0.912, "step": 48300 }, { "epoch": 0.69, "grad_norm": 0.609375, "learning_rate": 5.2158723179715064e-05, "loss": 1.0759, "step": 48305 }, { "epoch": 0.69, "grad_norm": 0.490234375, "learning_rate": 5.213673951232768e-05, "loss": 0.871, "step": 48310 }, { "epoch": 0.69, "grad_norm": 0.5703125, "learning_rate": 5.2114758845064026e-05, "loss": 0.8752, "step": 48315 }, { "epoch": 0.69, "grad_norm": 0.58984375, "learning_rate": 5.2092781179301776e-05, "loss": 0.9911, "step": 48320 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.2070806516418565e-05, "loss": 1.1045, "step": 48325 }, { "epoch": 0.69, "grad_norm": 0.515625, "learning_rate": 5.204883485779184e-05, "loss": 0.8972, "step": 48330 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.2026866204798704e-05, "loss": 0.9271, "step": 48335 }, { "epoch": 0.69, "grad_norm": 0.58984375, "learning_rate": 5.200490055881624e-05, "loss": 0.9302, "step": 48340 }, { "epoch": 0.69, "grad_norm": 0.53515625, "learning_rate": 5.198293792122131e-05, "loss": 0.9276, "step": 48345 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.196097829339051e-05, "loss": 0.93, "step": 48350 }, { "epoch": 0.69, "grad_norm": 0.671875, "learning_rate": 5.193902167670026e-05, "loss": 1.0635, "step": 48355 }, { "epoch": 0.69, "grad_norm": 0.515625, "learning_rate": 5.191706807252692e-05, "loss": 0.888, "step": 48360 }, { "epoch": 0.69, "grad_norm": 0.57421875, "learning_rate": 5.189511748224647e-05, "loss": 0.9246, "step": 48365 }, { "epoch": 0.69, "grad_norm": 0.50390625, "learning_rate": 5.1873169907234843e-05, "loss": 1.0039, "step": 48370 }, { "epoch": 0.69, "grad_norm": 0.625, "learning_rate": 5.185122534886777e-05, "loss": 1.0164, "step": 48375 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.18292838085207e-05, "loss": 0.8643, "step": 48380 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.1807345287568984e-05, "loss": 0.9587, "step": 48385 }, { "epoch": 0.69, "grad_norm": 0.53125, "learning_rate": 5.178540978738779e-05, "loss": 1.0061, "step": 48390 }, { "epoch": 0.69, "grad_norm": 0.6328125, "learning_rate": 5.176347730935198e-05, "loss": 1.1532, "step": 48395 }, { "epoch": 0.69, "grad_norm": 0.625, "learning_rate": 5.1741547854836344e-05, "loss": 0.7755, "step": 48400 }, { "epoch": 0.69, "grad_norm": 0.578125, "learning_rate": 5.17196214252155e-05, "loss": 0.9659, "step": 48405 }, { "epoch": 0.69, "grad_norm": 0.5234375, "learning_rate": 5.169769802186377e-05, "loss": 1.0091, "step": 48410 }, { "epoch": 0.69, "grad_norm": 0.71484375, "learning_rate": 5.16757776461553e-05, "loss": 0.9977, "step": 48415 }, { "epoch": 0.69, "grad_norm": 0.5390625, "learning_rate": 5.165386029946412e-05, "loss": 1.0268, "step": 48420 }, { "epoch": 0.69, "grad_norm": 0.52734375, "learning_rate": 5.163194598316408e-05, "loss": 0.8544, "step": 48425 }, { "epoch": 0.69, "grad_norm": 0.52734375, "learning_rate": 5.1610034698628704e-05, "loss": 0.9178, "step": 48430 }, { "epoch": 0.69, "grad_norm": 0.65234375, "learning_rate": 5.158812644723148e-05, "loss": 1.0783, "step": 48435 }, { "epoch": 0.69, "grad_norm": 0.53125, "learning_rate": 5.1566221230345655e-05, "loss": 0.9574, "step": 48440 }, { "epoch": 0.69, "grad_norm": 0.55859375, "learning_rate": 5.15443190493442e-05, "loss": 0.9738, "step": 48445 }, { "epoch": 0.69, "grad_norm": 0.5625, "learning_rate": 5.152241990560003e-05, "loss": 0.8551, "step": 48450 }, { "epoch": 0.7, "grad_norm": 0.5234375, "learning_rate": 5.150052380048587e-05, "loss": 1.049, "step": 48455 }, { "epoch": 0.7, "grad_norm": 0.625, "learning_rate": 5.147863073537402e-05, "loss": 0.8248, "step": 48460 }, { "epoch": 0.7, "grad_norm": 0.51953125, "learning_rate": 5.1456740711636866e-05, "loss": 1.0644, "step": 48465 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 5.143485373064654e-05, "loss": 0.8591, "step": 48470 }, { "epoch": 0.7, "grad_norm": 0.62109375, "learning_rate": 5.1412969793774835e-05, "loss": 0.8676, "step": 48475 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 5.139108890239353e-05, "loss": 0.9285, "step": 48480 }, { "epoch": 0.7, "grad_norm": 0.5546875, "learning_rate": 5.136921105787417e-05, "loss": 1.0226, "step": 48485 }, { "epoch": 0.7, "grad_norm": 0.5703125, "learning_rate": 5.134733626158801e-05, "loss": 0.8493, "step": 48490 }, { "epoch": 0.7, "grad_norm": 0.6328125, "learning_rate": 5.1325464514906216e-05, "loss": 1.1499, "step": 48495 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 5.130359581919979e-05, "loss": 0.8768, "step": 48500 }, { "epoch": 0.7, "grad_norm": 0.5234375, "learning_rate": 5.1281730175839394e-05, "loss": 0.9771, "step": 48505 }, { "epoch": 0.7, "grad_norm": 0.5703125, "learning_rate": 5.125986758619567e-05, "loss": 1.0365, "step": 48510 }, { "epoch": 0.7, "grad_norm": 0.5703125, "learning_rate": 5.123800805163893e-05, "loss": 0.9222, "step": 48515 }, { "epoch": 0.7, "grad_norm": 0.5390625, "learning_rate": 5.1216151573539404e-05, "loss": 1.0843, "step": 48520 }, { "epoch": 0.7, "grad_norm": 0.49609375, "learning_rate": 5.119429815326704e-05, "loss": 1.0634, "step": 48525 }, { "epoch": 0.7, "grad_norm": 0.671875, "learning_rate": 5.1172447792191637e-05, "loss": 0.9098, "step": 48530 }, { "epoch": 0.7, "grad_norm": 0.59765625, "learning_rate": 5.115060049168286e-05, "loss": 0.9464, "step": 48535 }, { "epoch": 0.7, "grad_norm": 0.53515625, "learning_rate": 5.1128756253110044e-05, "loss": 0.853, "step": 48540 }, { "epoch": 0.7, "grad_norm": 0.55859375, "learning_rate": 5.110691507784244e-05, "loss": 0.95, "step": 48545 }, { "epoch": 0.7, "grad_norm": 0.58984375, "learning_rate": 5.108507696724911e-05, "loss": 0.8101, "step": 48550 }, { "epoch": 0.7, "grad_norm": 0.4609375, "learning_rate": 5.106324192269884e-05, "loss": 0.9499, "step": 48555 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 5.104140994556035e-05, "loss": 1.067, "step": 48560 }, { "epoch": 0.7, "grad_norm": 0.61328125, "learning_rate": 5.1019581037202036e-05, "loss": 0.9056, "step": 48565 }, { "epoch": 0.7, "grad_norm": 0.5625, "learning_rate": 5.099775519899213e-05, "loss": 0.972, "step": 48570 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 5.097593243229872e-05, "loss": 0.8852, "step": 48575 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 5.0954112738489754e-05, "loss": 0.9366, "step": 48580 }, { "epoch": 0.7, "grad_norm": 0.5234375, "learning_rate": 5.093229611893281e-05, "loss": 1.0244, "step": 48585 }, { "epoch": 0.7, "grad_norm": 0.609375, "learning_rate": 5.0910482574995445e-05, "loss": 0.9055, "step": 48590 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 5.088867210804496e-05, "loss": 0.828, "step": 48595 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 5.0866864719448414e-05, "loss": 1.0362, "step": 48600 }, { "epoch": 0.7, "grad_norm": 0.59375, "learning_rate": 5.084506041057274e-05, "loss": 1.0137, "step": 48605 }, { "epoch": 0.7, "grad_norm": 0.57421875, "learning_rate": 5.08232591827847e-05, "loss": 0.9519, "step": 48610 }, { "epoch": 0.7, "grad_norm": 0.498046875, "learning_rate": 5.0801461037450794e-05, "loss": 0.8714, "step": 48615 }, { "epoch": 0.7, "grad_norm": 0.58203125, "learning_rate": 5.077966597593728e-05, "loss": 0.8594, "step": 48620 }, { "epoch": 0.7, "grad_norm": 0.58984375, "learning_rate": 5.0757873999610385e-05, "loss": 0.9234, "step": 48625 }, { "epoch": 0.7, "grad_norm": 0.546875, "learning_rate": 5.073608510983606e-05, "loss": 0.9935, "step": 48630 }, { "epoch": 0.7, "grad_norm": 0.6484375, "learning_rate": 5.0714299307979994e-05, "loss": 1.1072, "step": 48635 }, { "epoch": 0.7, "grad_norm": 0.51171875, "learning_rate": 5.069251659540777e-05, "loss": 1.0978, "step": 48640 }, { "epoch": 0.7, "grad_norm": 0.578125, "learning_rate": 5.06707369734848e-05, "loss": 0.877, "step": 48645 }, { "epoch": 0.7, "grad_norm": 0.5625, "learning_rate": 5.0648960443576185e-05, "loss": 0.9152, "step": 48650 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 5.062718700704693e-05, "loss": 0.9402, "step": 48655 }, { "epoch": 0.7, "grad_norm": 0.56640625, "learning_rate": 5.0605416665261864e-05, "loss": 0.9516, "step": 48660 }, { "epoch": 0.7, "grad_norm": 0.66015625, "learning_rate": 5.058364941958553e-05, "loss": 0.85, "step": 48665 }, { "epoch": 0.7, "grad_norm": 0.50390625, "learning_rate": 5.0561885271382316e-05, "loss": 0.9248, "step": 48670 }, { "epoch": 0.7, "grad_norm": 0.79296875, "learning_rate": 5.054012422201646e-05, "loss": 1.043, "step": 48675 }, { "epoch": 0.7, "grad_norm": 0.5625, "learning_rate": 5.051836627285191e-05, "loss": 0.8783, "step": 48680 }, { "epoch": 0.7, "grad_norm": 0.53515625, "learning_rate": 5.0496611425252515e-05, "loss": 1.0747, "step": 48685 }, { "epoch": 0.7, "grad_norm": 0.640625, "learning_rate": 5.047485968058194e-05, "loss": 0.9795, "step": 48690 }, { "epoch": 0.7, "grad_norm": 0.5859375, "learning_rate": 5.045311104020353e-05, "loss": 1.0219, "step": 48695 }, { "epoch": 0.7, "grad_norm": 0.578125, "learning_rate": 5.0431365505480536e-05, "loss": 0.924, "step": 48700 }, { "epoch": 0.7, "grad_norm": 0.578125, "learning_rate": 5.040962307777606e-05, "loss": 0.8713, "step": 48705 }, { "epoch": 0.7, "grad_norm": 0.58203125, "learning_rate": 5.0387883758452836e-05, "loss": 1.0171, "step": 48710 }, { "epoch": 0.7, "grad_norm": 0.5859375, "learning_rate": 5.036614754887356e-05, "loss": 0.9331, "step": 48715 }, { "epoch": 0.7, "grad_norm": 0.6328125, "learning_rate": 5.0344414450400734e-05, "loss": 0.9752, "step": 48720 }, { "epoch": 0.7, "grad_norm": 0.58203125, "learning_rate": 5.032268446439656e-05, "loss": 0.909, "step": 48725 }, { "epoch": 0.7, "grad_norm": 0.61328125, "learning_rate": 5.030095759222306e-05, "loss": 0.9741, "step": 48730 }, { "epoch": 0.7, "grad_norm": 0.578125, "learning_rate": 5.0279233835242135e-05, "loss": 0.9015, "step": 48735 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 5.025751319481551e-05, "loss": 0.8833, "step": 48740 }, { "epoch": 0.7, "grad_norm": 0.5546875, "learning_rate": 5.023579567230456e-05, "loss": 0.8863, "step": 48745 }, { "epoch": 0.7, "grad_norm": 0.55859375, "learning_rate": 5.021408126907063e-05, "loss": 1.0758, "step": 48750 }, { "epoch": 0.7, "grad_norm": 0.640625, "learning_rate": 5.019236998647482e-05, "loss": 0.9996, "step": 48755 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 5.017066182587794e-05, "loss": 0.905, "step": 48760 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 5.0148956788640736e-05, "loss": 1.0319, "step": 48765 }, { "epoch": 0.7, "grad_norm": 0.55078125, "learning_rate": 5.012725487612376e-05, "loss": 0.8233, "step": 48770 }, { "epoch": 0.7, "grad_norm": 0.50390625, "learning_rate": 5.0105556089687186e-05, "loss": 0.9083, "step": 48775 }, { "epoch": 0.7, "grad_norm": 0.68359375, "learning_rate": 5.0083860430691175e-05, "loss": 1.1075, "step": 48780 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 5.0062167900495674e-05, "loss": 1.0288, "step": 48785 }, { "epoch": 0.7, "grad_norm": 0.49609375, "learning_rate": 5.004047850046034e-05, "loss": 0.9933, "step": 48790 }, { "epoch": 0.7, "grad_norm": 0.625, "learning_rate": 5.00187922319447e-05, "loss": 0.9871, "step": 48795 }, { "epoch": 0.7, "grad_norm": 0.5703125, "learning_rate": 4.999710909630813e-05, "loss": 0.9935, "step": 48800 }, { "epoch": 0.7, "grad_norm": 0.5390625, "learning_rate": 4.997542909490968e-05, "loss": 0.8804, "step": 48805 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 4.9953752229108295e-05, "loss": 0.9151, "step": 48810 }, { "epoch": 0.7, "grad_norm": 0.490234375, "learning_rate": 4.993207850026276e-05, "loss": 0.93, "step": 48815 }, { "epoch": 0.7, "grad_norm": 0.58984375, "learning_rate": 4.991040790973153e-05, "loss": 0.9725, "step": 48820 }, { "epoch": 0.7, "grad_norm": 0.59765625, "learning_rate": 4.9888740458873016e-05, "loss": 0.8681, "step": 48825 }, { "epoch": 0.7, "grad_norm": 0.65625, "learning_rate": 4.986707614904529e-05, "loss": 0.8718, "step": 48830 }, { "epoch": 0.7, "grad_norm": 0.578125, "learning_rate": 4.984541498160635e-05, "loss": 0.957, "step": 48835 }, { "epoch": 0.7, "grad_norm": 0.5234375, "learning_rate": 4.982375695791389e-05, "loss": 1.0334, "step": 48840 }, { "epoch": 0.7, "grad_norm": 0.65234375, "learning_rate": 4.98021020793255e-05, "loss": 1.1653, "step": 48845 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 4.9780450347198546e-05, "loss": 0.9699, "step": 48850 }, { "epoch": 0.7, "grad_norm": 0.60546875, "learning_rate": 4.975880176289014e-05, "loss": 0.9317, "step": 48855 }, { "epoch": 0.7, "grad_norm": 0.62109375, "learning_rate": 4.973715632775724e-05, "loss": 1.0843, "step": 48860 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 4.9715514043156675e-05, "loss": 0.8991, "step": 48865 }, { "epoch": 0.7, "grad_norm": 0.458984375, "learning_rate": 4.9693874910444914e-05, "loss": 0.9711, "step": 48870 }, { "epoch": 0.7, "grad_norm": 0.71875, "learning_rate": 4.967223893097841e-05, "loss": 0.8734, "step": 48875 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 4.965060610611325e-05, "loss": 0.8456, "step": 48880 }, { "epoch": 0.7, "grad_norm": 0.58203125, "learning_rate": 4.9628976437205485e-05, "loss": 0.882, "step": 48885 }, { "epoch": 0.7, "grad_norm": 0.6015625, "learning_rate": 4.96073499256108e-05, "loss": 0.9383, "step": 48890 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 4.958572657268482e-05, "loss": 1.0069, "step": 48895 }, { "epoch": 0.7, "grad_norm": 0.625, "learning_rate": 4.956410637978295e-05, "loss": 1.146, "step": 48900 }, { "epoch": 0.7, "grad_norm": 0.546875, "learning_rate": 4.954248934826029e-05, "loss": 0.8967, "step": 48905 }, { "epoch": 0.7, "grad_norm": 0.5625, "learning_rate": 4.9520875479471904e-05, "loss": 1.0125, "step": 48910 }, { "epoch": 0.7, "grad_norm": 0.5078125, "learning_rate": 4.94992647747725e-05, "loss": 0.954, "step": 48915 }, { "epoch": 0.7, "grad_norm": 0.5625, "learning_rate": 4.9477657235516696e-05, "loss": 0.9786, "step": 48920 }, { "epoch": 0.7, "grad_norm": 0.73828125, "learning_rate": 4.945605286305891e-05, "loss": 0.865, "step": 48925 }, { "epoch": 0.7, "grad_norm": 0.51171875, "learning_rate": 4.94344516587533e-05, "loss": 0.8614, "step": 48930 }, { "epoch": 0.7, "grad_norm": 0.75, "learning_rate": 4.941285362395382e-05, "loss": 0.9378, "step": 48935 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 4.939125876001427e-05, "loss": 0.9931, "step": 48940 }, { "epoch": 0.7, "grad_norm": 0.578125, "learning_rate": 4.9369667068288303e-05, "loss": 1.0125, "step": 48945 }, { "epoch": 0.7, "grad_norm": 0.55859375, "learning_rate": 4.934807855012924e-05, "loss": 0.9566, "step": 48950 }, { "epoch": 0.7, "grad_norm": 0.6640625, "learning_rate": 4.932649320689029e-05, "loss": 1.149, "step": 48955 }, { "epoch": 0.7, "grad_norm": 0.55859375, "learning_rate": 4.93049110399245e-05, "loss": 1.0048, "step": 48960 }, { "epoch": 0.7, "grad_norm": 0.4765625, "learning_rate": 4.928333205058459e-05, "loss": 1.0096, "step": 48965 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 4.9261756240223175e-05, "loss": 0.9787, "step": 48970 }, { "epoch": 0.7, "grad_norm": 0.57421875, "learning_rate": 4.924018361019271e-05, "loss": 0.9944, "step": 48975 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 4.9218614161845345e-05, "loss": 0.8065, "step": 48980 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 4.919704789653304e-05, "loss": 1.0229, "step": 48985 }, { "epoch": 0.7, "grad_norm": 0.54296875, "learning_rate": 4.917548481560764e-05, "loss": 0.8176, "step": 48990 }, { "epoch": 0.7, "grad_norm": 0.5390625, "learning_rate": 4.915392492042078e-05, "loss": 0.9873, "step": 48995 }, { "epoch": 0.7, "grad_norm": 0.58984375, "learning_rate": 4.913236821232377e-05, "loss": 0.9549, "step": 49000 }, { "epoch": 0.7, "grad_norm": 0.62109375, "learning_rate": 4.9110814692667836e-05, "loss": 1.1025, "step": 49005 }, { "epoch": 0.7, "grad_norm": 0.5, "learning_rate": 4.908926436280406e-05, "loss": 0.9839, "step": 49010 }, { "epoch": 0.7, "grad_norm": 0.51953125, "learning_rate": 4.906771722408312e-05, "loss": 0.9247, "step": 49015 }, { "epoch": 0.7, "grad_norm": 0.60546875, "learning_rate": 4.904617327785568e-05, "loss": 0.9522, "step": 49020 }, { "epoch": 0.7, "grad_norm": 0.5625, "learning_rate": 4.902463252547217e-05, "loss": 0.9195, "step": 49025 }, { "epoch": 0.7, "grad_norm": 0.62109375, "learning_rate": 4.90030949682827e-05, "loss": 0.9347, "step": 49030 }, { "epoch": 0.7, "grad_norm": 0.58984375, "learning_rate": 4.898156060763737e-05, "loss": 0.9197, "step": 49035 }, { "epoch": 0.7, "grad_norm": 0.63671875, "learning_rate": 4.896002944488593e-05, "loss": 0.8434, "step": 49040 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 4.893850148137792e-05, "loss": 0.8596, "step": 49045 }, { "epoch": 0.7, "grad_norm": 0.59375, "learning_rate": 4.891697671846279e-05, "loss": 0.9378, "step": 49050 }, { "epoch": 0.7, "grad_norm": 0.62890625, "learning_rate": 4.889545515748979e-05, "loss": 0.9958, "step": 49055 }, { "epoch": 0.7, "grad_norm": 0.498046875, "learning_rate": 4.8873936799807816e-05, "loss": 0.8579, "step": 49060 }, { "epoch": 0.7, "grad_norm": 0.50390625, "learning_rate": 4.885242164676571e-05, "loss": 0.8905, "step": 49065 }, { "epoch": 0.7, "grad_norm": 0.57421875, "learning_rate": 4.883090969971211e-05, "loss": 0.9437, "step": 49070 }, { "epoch": 0.7, "grad_norm": 0.494140625, "learning_rate": 4.8809400959995335e-05, "loss": 0.949, "step": 49075 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 4.87878954289636e-05, "loss": 0.9098, "step": 49080 }, { "epoch": 0.7, "grad_norm": 0.5546875, "learning_rate": 4.876639310796495e-05, "loss": 0.94, "step": 49085 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 4.874489399834712e-05, "loss": 0.9608, "step": 49090 }, { "epoch": 0.7, "grad_norm": 0.609375, "learning_rate": 4.872339810145767e-05, "loss": 1.0145, "step": 49095 }, { "epoch": 0.7, "grad_norm": 0.6171875, "learning_rate": 4.8701905418644044e-05, "loss": 1.0344, "step": 49100 }, { "epoch": 0.7, "grad_norm": 0.5546875, "learning_rate": 4.8680415951253436e-05, "loss": 0.9625, "step": 49105 }, { "epoch": 0.7, "grad_norm": 0.63671875, "learning_rate": 4.865892970063276e-05, "loss": 0.8345, "step": 49110 }, { "epoch": 0.7, "grad_norm": 0.6640625, "learning_rate": 4.863744666812885e-05, "loss": 1.032, "step": 49115 }, { "epoch": 0.7, "grad_norm": 0.71875, "learning_rate": 4.861596685508832e-05, "loss": 1.0246, "step": 49120 }, { "epoch": 0.7, "grad_norm": 0.51171875, "learning_rate": 4.8594490262857474e-05, "loss": 1.0238, "step": 49125 }, { "epoch": 0.7, "grad_norm": 0.61328125, "learning_rate": 4.857301689278252e-05, "loss": 1.1131, "step": 49130 }, { "epoch": 0.7, "grad_norm": 0.6640625, "learning_rate": 4.8551546746209474e-05, "loss": 1.04, "step": 49135 }, { "epoch": 0.7, "grad_norm": 0.6328125, "learning_rate": 4.853007982448408e-05, "loss": 0.9312, "step": 49140 }, { "epoch": 0.7, "grad_norm": 0.53515625, "learning_rate": 4.8508616128951844e-05, "loss": 1.0589, "step": 49145 }, { "epoch": 0.71, "grad_norm": 0.58203125, "learning_rate": 4.848715566095825e-05, "loss": 0.9474, "step": 49150 }, { "epoch": 0.71, "grad_norm": 0.578125, "learning_rate": 4.846569842184836e-05, "loss": 1.0166, "step": 49155 }, { "epoch": 0.71, "grad_norm": 0.57421875, "learning_rate": 4.8444244412967186e-05, "loss": 1.1753, "step": 49160 }, { "epoch": 0.71, "grad_norm": 0.56640625, "learning_rate": 4.8422793635659535e-05, "loss": 0.9829, "step": 49165 }, { "epoch": 0.71, "grad_norm": 0.51171875, "learning_rate": 4.8401346091269887e-05, "loss": 0.89, "step": 49170 }, { "epoch": 0.71, "grad_norm": 0.5390625, "learning_rate": 4.837990178114261e-05, "loss": 0.9116, "step": 49175 }, { "epoch": 0.71, "grad_norm": 0.6328125, "learning_rate": 4.8358460706621934e-05, "loss": 0.8536, "step": 49180 }, { "epoch": 0.71, "grad_norm": 0.57421875, "learning_rate": 4.833702286905172e-05, "loss": 0.9304, "step": 49185 }, { "epoch": 0.71, "grad_norm": 0.5390625, "learning_rate": 4.831558826977578e-05, "loss": 1.0011, "step": 49190 }, { "epoch": 0.71, "grad_norm": 0.5078125, "learning_rate": 4.82941569101376e-05, "loss": 0.807, "step": 49195 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.827272879148058e-05, "loss": 0.8885, "step": 49200 }, { "epoch": 0.71, "grad_norm": 0.5703125, "learning_rate": 4.8251303915147805e-05, "loss": 0.8502, "step": 49205 }, { "epoch": 0.71, "grad_norm": 0.515625, "learning_rate": 4.822988228248224e-05, "loss": 1.0452, "step": 49210 }, { "epoch": 0.71, "grad_norm": 0.515625, "learning_rate": 4.8208463894826635e-05, "loss": 0.9482, "step": 49215 }, { "epoch": 0.71, "grad_norm": 0.5859375, "learning_rate": 4.818704875352348e-05, "loss": 0.8321, "step": 49220 }, { "epoch": 0.71, "grad_norm": 0.58203125, "learning_rate": 4.816563685991511e-05, "loss": 1.0368, "step": 49225 }, { "epoch": 0.71, "grad_norm": 0.52734375, "learning_rate": 4.81442282153437e-05, "loss": 0.9997, "step": 49230 }, { "epoch": 0.71, "grad_norm": 0.5234375, "learning_rate": 4.812282282115108e-05, "loss": 0.9241, "step": 49235 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.810142067867901e-05, "loss": 1.0477, "step": 49240 }, { "epoch": 0.71, "grad_norm": 0.55078125, "learning_rate": 4.808002178926908e-05, "loss": 0.9772, "step": 49245 }, { "epoch": 0.71, "grad_norm": 0.546875, "learning_rate": 4.805862615426243e-05, "loss": 0.9679, "step": 49250 }, { "epoch": 0.71, "grad_norm": 0.51953125, "learning_rate": 4.8037233775000254e-05, "loss": 0.8722, "step": 49255 }, { "epoch": 0.71, "grad_norm": 0.57421875, "learning_rate": 4.8015844652823494e-05, "loss": 1.0573, "step": 49260 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.799445878907275e-05, "loss": 0.9677, "step": 49265 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.797307618508855e-05, "loss": 0.939, "step": 49270 }, { "epoch": 0.71, "grad_norm": 0.609375, "learning_rate": 4.7951696842211235e-05, "loss": 0.9858, "step": 49275 }, { "epoch": 0.71, "grad_norm": 0.51953125, "learning_rate": 4.79303207617808e-05, "loss": 1.0011, "step": 49280 }, { "epoch": 0.71, "grad_norm": 0.59375, "learning_rate": 4.7908947945137164e-05, "loss": 0.8575, "step": 49285 }, { "epoch": 0.71, "grad_norm": 0.51953125, "learning_rate": 4.788757839362004e-05, "loss": 0.9424, "step": 49290 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.786621210856884e-05, "loss": 0.9988, "step": 49295 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.784484909132281e-05, "loss": 0.8938, "step": 49300 }, { "epoch": 0.71, "grad_norm": 0.60546875, "learning_rate": 4.782348934322104e-05, "loss": 0.9112, "step": 49305 }, { "epoch": 0.71, "grad_norm": 0.5546875, "learning_rate": 4.7802132865602425e-05, "loss": 0.9679, "step": 49310 }, { "epoch": 0.71, "grad_norm": 0.62109375, "learning_rate": 4.778077965980552e-05, "loss": 0.8787, "step": 49315 }, { "epoch": 0.71, "grad_norm": 0.6015625, "learning_rate": 4.775942972716883e-05, "loss": 1.1053, "step": 49320 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.773808306903061e-05, "loss": 0.9912, "step": 49325 }, { "epoch": 0.71, "grad_norm": 0.59765625, "learning_rate": 4.771673968672883e-05, "loss": 1.0677, "step": 49330 }, { "epoch": 0.71, "grad_norm": 0.5390625, "learning_rate": 4.769539958160135e-05, "loss": 0.8735, "step": 49335 }, { "epoch": 0.71, "grad_norm": 0.578125, "learning_rate": 4.767406275498584e-05, "loss": 1.1303, "step": 49340 }, { "epoch": 0.71, "grad_norm": 0.578125, "learning_rate": 4.765272920821963e-05, "loss": 0.9533, "step": 49345 }, { "epoch": 0.71, "grad_norm": 0.5546875, "learning_rate": 4.763139894264e-05, "loss": 0.97, "step": 49350 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.7610071959583935e-05, "loss": 0.9927, "step": 49355 }, { "epoch": 0.71, "grad_norm": 0.55078125, "learning_rate": 4.758874826038819e-05, "loss": 0.9571, "step": 49360 }, { "epoch": 0.71, "grad_norm": 0.5234375, "learning_rate": 4.75674278463894e-05, "loss": 0.8435, "step": 49365 }, { "epoch": 0.71, "grad_norm": 0.6015625, "learning_rate": 4.7546110718923974e-05, "loss": 1.0449, "step": 49370 }, { "epoch": 0.71, "grad_norm": 0.5234375, "learning_rate": 4.7524796879328035e-05, "loss": 0.9614, "step": 49375 }, { "epoch": 0.71, "grad_norm": 0.57421875, "learning_rate": 4.750348632893759e-05, "loss": 0.9131, "step": 49380 }, { "epoch": 0.71, "grad_norm": 0.5703125, "learning_rate": 4.748217906908846e-05, "loss": 0.9521, "step": 49385 }, { "epoch": 0.71, "grad_norm": 0.6015625, "learning_rate": 4.7460875101116106e-05, "loss": 0.9026, "step": 49390 }, { "epoch": 0.71, "grad_norm": 0.56640625, "learning_rate": 4.7439574426355946e-05, "loss": 0.9808, "step": 49395 }, { "epoch": 0.71, "grad_norm": 0.65234375, "learning_rate": 4.741827704614317e-05, "loss": 0.8594, "step": 49400 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.739698296181265e-05, "loss": 0.9514, "step": 49405 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.737569217469913e-05, "loss": 0.8326, "step": 49410 }, { "epoch": 0.71, "grad_norm": 0.625, "learning_rate": 4.7354404686137155e-05, "loss": 1.1199, "step": 49415 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.733312049746108e-05, "loss": 0.9097, "step": 49420 }, { "epoch": 0.71, "grad_norm": 0.546875, "learning_rate": 4.731183961000496e-05, "loss": 0.8891, "step": 49425 }, { "epoch": 0.71, "grad_norm": 0.498046875, "learning_rate": 4.729056202510275e-05, "loss": 0.934, "step": 49430 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.726928774408818e-05, "loss": 0.8811, "step": 49435 }, { "epoch": 0.71, "grad_norm": 0.59765625, "learning_rate": 4.724801676829466e-05, "loss": 1.2237, "step": 49440 }, { "epoch": 0.71, "grad_norm": 0.5546875, "learning_rate": 4.7226749099055537e-05, "loss": 0.9113, "step": 49445 }, { "epoch": 0.71, "grad_norm": 0.515625, "learning_rate": 4.720548473770392e-05, "loss": 0.9965, "step": 49450 }, { "epoch": 0.71, "grad_norm": 0.5703125, "learning_rate": 4.718422368557265e-05, "loss": 0.9313, "step": 49455 }, { "epoch": 0.71, "grad_norm": 0.58984375, "learning_rate": 4.716296594399434e-05, "loss": 1.0277, "step": 49460 }, { "epoch": 0.71, "grad_norm": 0.5703125, "learning_rate": 4.714171151430156e-05, "loss": 1.0236, "step": 49465 }, { "epoch": 0.71, "grad_norm": 0.64453125, "learning_rate": 4.712046039782645e-05, "loss": 0.8902, "step": 49470 }, { "epoch": 0.71, "grad_norm": 0.578125, "learning_rate": 4.709921259590111e-05, "loss": 0.8605, "step": 49475 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.707796810985742e-05, "loss": 0.9146, "step": 49480 }, { "epoch": 0.71, "grad_norm": 0.62890625, "learning_rate": 4.705672694102691e-05, "loss": 1.152, "step": 49485 }, { "epoch": 0.71, "grad_norm": 0.4765625, "learning_rate": 4.703548909074106e-05, "loss": 0.9783, "step": 49490 }, { "epoch": 0.71, "grad_norm": 0.7109375, "learning_rate": 4.7014254560331115e-05, "loss": 0.9032, "step": 49495 }, { "epoch": 0.71, "grad_norm": 0.58984375, "learning_rate": 4.699302335112799e-05, "loss": 0.9469, "step": 49500 }, { "epoch": 0.71, "grad_norm": 0.59765625, "learning_rate": 4.6971795464462584e-05, "loss": 0.9139, "step": 49505 }, { "epoch": 0.71, "grad_norm": 0.546875, "learning_rate": 4.695057090166539e-05, "loss": 0.852, "step": 49510 }, { "epoch": 0.71, "grad_norm": 0.5859375, "learning_rate": 4.692934966406687e-05, "loss": 0.9828, "step": 49515 }, { "epoch": 0.71, "grad_norm": 0.6328125, "learning_rate": 4.690813175299712e-05, "loss": 0.9554, "step": 49520 }, { "epoch": 0.71, "grad_norm": 0.625, "learning_rate": 4.688691716978613e-05, "loss": 0.9984, "step": 49525 }, { "epoch": 0.71, "grad_norm": 0.5546875, "learning_rate": 4.68657059157637e-05, "loss": 0.9265, "step": 49530 }, { "epoch": 0.71, "grad_norm": 0.5625, "learning_rate": 4.684449799225931e-05, "loss": 0.9029, "step": 49535 }, { "epoch": 0.71, "grad_norm": 0.66796875, "learning_rate": 4.682329340060232e-05, "loss": 0.9228, "step": 49540 }, { "epoch": 0.71, "grad_norm": 0.6015625, "learning_rate": 4.68020921421219e-05, "loss": 0.9272, "step": 49545 }, { "epoch": 0.71, "grad_norm": 0.5390625, "learning_rate": 4.6780894218146886e-05, "loss": 0.8161, "step": 49550 }, { "epoch": 0.71, "grad_norm": 0.5859375, "learning_rate": 4.6759699630006046e-05, "loss": 0.9438, "step": 49555 }, { "epoch": 0.71, "grad_norm": 0.546875, "learning_rate": 4.67385083790279e-05, "loss": 0.8106, "step": 49560 }, { "epoch": 0.71, "grad_norm": 0.52734375, "learning_rate": 4.6717320466540715e-05, "loss": 0.9336, "step": 49565 }, { "epoch": 0.71, "grad_norm": 0.51953125, "learning_rate": 4.6696135893872525e-05, "loss": 0.9585, "step": 49570 }, { "epoch": 0.71, "grad_norm": 0.6015625, "learning_rate": 4.667495466235125e-05, "loss": 0.9121, "step": 49575 }, { "epoch": 0.71, "grad_norm": 0.6171875, "learning_rate": 4.665377677330458e-05, "loss": 1.0132, "step": 49580 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.66326022280599e-05, "loss": 1.0023, "step": 49585 }, { "epoch": 0.71, "grad_norm": 0.53125, "learning_rate": 4.661143102794453e-05, "loss": 0.9591, "step": 49590 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.659026317428543e-05, "loss": 0.8945, "step": 49595 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.656909866840947e-05, "loss": 0.9979, "step": 49600 }, { "epoch": 0.71, "grad_norm": 0.578125, "learning_rate": 4.654793751164329e-05, "loss": 0.7919, "step": 49605 }, { "epoch": 0.71, "grad_norm": 0.578125, "learning_rate": 4.652677970531323e-05, "loss": 1.1003, "step": 49610 }, { "epoch": 0.71, "grad_norm": 0.640625, "learning_rate": 4.6505625250745557e-05, "loss": 1.0737, "step": 49615 }, { "epoch": 0.71, "grad_norm": 0.46484375, "learning_rate": 4.6484474149266166e-05, "loss": 0.842, "step": 49620 }, { "epoch": 0.71, "grad_norm": 0.5234375, "learning_rate": 4.6463326402200934e-05, "loss": 1.0013, "step": 49625 }, { "epoch": 0.71, "grad_norm": 0.466796875, "learning_rate": 4.6442182010875345e-05, "loss": 0.8792, "step": 49630 }, { "epoch": 0.71, "grad_norm": 0.515625, "learning_rate": 4.642104097661477e-05, "loss": 0.7644, "step": 49635 }, { "epoch": 0.71, "grad_norm": 0.53125, "learning_rate": 4.639990330074442e-05, "loss": 0.9765, "step": 49640 }, { "epoch": 0.71, "grad_norm": 0.5625, "learning_rate": 4.637876898458913e-05, "loss": 0.8962, "step": 49645 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.635763802947367e-05, "loss": 0.9404, "step": 49650 }, { "epoch": 0.71, "grad_norm": 0.48828125, "learning_rate": 4.633651043672259e-05, "loss": 0.9556, "step": 49655 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.631538620766012e-05, "loss": 0.8018, "step": 49660 }, { "epoch": 0.71, "grad_norm": 0.64453125, "learning_rate": 4.629426534361042e-05, "loss": 0.9562, "step": 49665 }, { "epoch": 0.71, "grad_norm": 0.59765625, "learning_rate": 4.627314784589729e-05, "loss": 0.8734, "step": 49670 }, { "epoch": 0.71, "grad_norm": 0.60546875, "learning_rate": 4.625203371584449e-05, "loss": 0.9724, "step": 49675 }, { "epoch": 0.71, "grad_norm": 0.59375, "learning_rate": 4.6230922954775394e-05, "loss": 0.9479, "step": 49680 }, { "epoch": 0.71, "grad_norm": 0.57421875, "learning_rate": 4.620981556401327e-05, "loss": 0.995, "step": 49685 }, { "epoch": 0.71, "grad_norm": 0.63671875, "learning_rate": 4.6188711544881215e-05, "loss": 0.8704, "step": 49690 }, { "epoch": 0.71, "grad_norm": 0.5625, "learning_rate": 4.616761089870197e-05, "loss": 0.8862, "step": 49695 }, { "epoch": 0.71, "grad_norm": 0.57421875, "learning_rate": 4.6146513626798186e-05, "loss": 0.9441, "step": 49700 }, { "epoch": 0.71, "grad_norm": 0.60546875, "learning_rate": 4.612541973049229e-05, "loss": 0.8817, "step": 49705 }, { "epoch": 0.71, "grad_norm": 0.51953125, "learning_rate": 4.6104329211106426e-05, "loss": 0.9116, "step": 49710 }, { "epoch": 0.71, "grad_norm": 0.51953125, "learning_rate": 4.6083242069962564e-05, "loss": 0.9826, "step": 49715 }, { "epoch": 0.71, "grad_norm": 0.53125, "learning_rate": 4.6062158308382584e-05, "loss": 0.9834, "step": 49720 }, { "epoch": 0.71, "grad_norm": 0.5859375, "learning_rate": 4.604107792768787e-05, "loss": 0.8969, "step": 49725 }, { "epoch": 0.71, "grad_norm": 0.52734375, "learning_rate": 4.6020000929199856e-05, "loss": 0.9264, "step": 49730 }, { "epoch": 0.71, "grad_norm": 0.5546875, "learning_rate": 4.5998927314239694e-05, "loss": 0.9001, "step": 49735 }, { "epoch": 0.71, "grad_norm": 0.62890625, "learning_rate": 4.597785708412823e-05, "loss": 1.0587, "step": 49740 }, { "epoch": 0.71, "grad_norm": 0.53125, "learning_rate": 4.595679024018621e-05, "loss": 0.914, "step": 49745 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.593572678373417e-05, "loss": 0.9823, "step": 49750 }, { "epoch": 0.71, "grad_norm": 0.546875, "learning_rate": 4.59146667160923e-05, "loss": 0.9742, "step": 49755 }, { "epoch": 0.71, "grad_norm": 0.640625, "learning_rate": 4.589361003858072e-05, "loss": 1.2027, "step": 49760 }, { "epoch": 0.71, "grad_norm": 0.5078125, "learning_rate": 4.5872556752519335e-05, "loss": 1.0227, "step": 49765 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.585150685922773e-05, "loss": 0.9103, "step": 49770 }, { "epoch": 0.71, "grad_norm": 0.60546875, "learning_rate": 4.5830460360025315e-05, "loss": 0.9096, "step": 49775 }, { "epoch": 0.71, "grad_norm": 0.515625, "learning_rate": 4.580941725623134e-05, "loss": 1.0385, "step": 49780 }, { "epoch": 0.71, "grad_norm": 0.53125, "learning_rate": 4.578837754916483e-05, "loss": 0.8463, "step": 49785 }, { "epoch": 0.71, "grad_norm": 0.490234375, "learning_rate": 4.576734124014454e-05, "loss": 0.9174, "step": 49790 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.574630833048905e-05, "loss": 0.9285, "step": 49795 }, { "epoch": 0.71, "grad_norm": 0.5546875, "learning_rate": 4.572527882151678e-05, "loss": 0.95, "step": 49800 }, { "epoch": 0.71, "grad_norm": 0.5546875, "learning_rate": 4.5704252714545824e-05, "loss": 0.9859, "step": 49805 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.5683230010894143e-05, "loss": 1.0285, "step": 49810 }, { "epoch": 0.71, "grad_norm": 0.72265625, "learning_rate": 4.5662210711879495e-05, "loss": 0.9972, "step": 49815 }, { "epoch": 0.71, "grad_norm": 0.51171875, "learning_rate": 4.564119481881933e-05, "loss": 0.925, "step": 49820 }, { "epoch": 0.71, "grad_norm": 0.5859375, "learning_rate": 4.5620182333031025e-05, "loss": 0.8579, "step": 49825 }, { "epoch": 0.71, "grad_norm": 0.5078125, "learning_rate": 4.559917325583162e-05, "loss": 0.8751, "step": 49830 }, { "epoch": 0.71, "grad_norm": 0.57421875, "learning_rate": 4.557816758853796e-05, "loss": 0.9685, "step": 49835 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.5557165332466736e-05, "loss": 0.9727, "step": 49840 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.553616648893443e-05, "loss": 1.0464, "step": 49845 }, { "epoch": 0.72, "grad_norm": 0.5390625, "learning_rate": 4.55151710592572e-05, "loss": 0.9426, "step": 49850 }, { "epoch": 0.72, "grad_norm": 0.62109375, "learning_rate": 4.549417904475111e-05, "loss": 0.8065, "step": 49855 }, { "epoch": 0.72, "grad_norm": 0.57421875, "learning_rate": 4.547319044673197e-05, "loss": 0.9354, "step": 49860 }, { "epoch": 0.72, "grad_norm": 0.4609375, "learning_rate": 4.545220526651534e-05, "loss": 0.825, "step": 49865 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.54312235054166e-05, "loss": 0.8915, "step": 49870 }, { "epoch": 0.72, "grad_norm": 0.51171875, "learning_rate": 4.5410245164750955e-05, "loss": 0.8697, "step": 49875 }, { "epoch": 0.72, "grad_norm": 0.435546875, "learning_rate": 4.5389270245833316e-05, "loss": 0.884, "step": 49880 }, { "epoch": 0.72, "grad_norm": 0.57421875, "learning_rate": 4.536829874997838e-05, "loss": 0.9207, "step": 49885 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.5347330678500686e-05, "loss": 0.9551, "step": 49890 }, { "epoch": 0.72, "grad_norm": 0.59375, "learning_rate": 4.5326366032714595e-05, "loss": 0.9075, "step": 49895 }, { "epoch": 0.72, "grad_norm": 0.56640625, "learning_rate": 4.530540481393412e-05, "loss": 0.8319, "step": 49900 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.528444702347315e-05, "loss": 0.8604, "step": 49905 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.5263492662645404e-05, "loss": 0.8437, "step": 49910 }, { "epoch": 0.72, "grad_norm": 0.65625, "learning_rate": 4.524254173276423e-05, "loss": 1.0757, "step": 49915 }, { "epoch": 0.72, "grad_norm": 0.51171875, "learning_rate": 4.522159423514292e-05, "loss": 0.9885, "step": 49920 }, { "epoch": 0.72, "grad_norm": 0.58203125, "learning_rate": 4.5200650171094495e-05, "loss": 1.059, "step": 49925 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.517970954193172e-05, "loss": 0.9149, "step": 49930 }, { "epoch": 0.72, "grad_norm": 0.5859375, "learning_rate": 4.515877234896717e-05, "loss": 0.8068, "step": 49935 }, { "epoch": 0.72, "grad_norm": 0.53515625, "learning_rate": 4.5137838593513246e-05, "loss": 0.9659, "step": 49940 }, { "epoch": 0.72, "grad_norm": 0.50390625, "learning_rate": 4.5116908276882064e-05, "loss": 0.9508, "step": 49945 }, { "epoch": 0.72, "grad_norm": 0.58984375, "learning_rate": 4.509598140038557e-05, "loss": 0.9591, "step": 49950 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.507505796533552e-05, "loss": 0.9213, "step": 49955 }, { "epoch": 0.72, "grad_norm": 0.67578125, "learning_rate": 4.505413797304336e-05, "loss": 0.8213, "step": 49960 }, { "epoch": 0.72, "grad_norm": 0.515625, "learning_rate": 4.5033221424820415e-05, "loss": 1.0068, "step": 49965 }, { "epoch": 0.72, "grad_norm": 0.49609375, "learning_rate": 4.501230832197778e-05, "loss": 0.8619, "step": 49970 }, { "epoch": 0.72, "grad_norm": 0.62109375, "learning_rate": 4.499139866582626e-05, "loss": 1.0136, "step": 49975 }, { "epoch": 0.72, "grad_norm": 0.59765625, "learning_rate": 4.497049245767655e-05, "loss": 1.099, "step": 49980 }, { "epoch": 0.72, "grad_norm": 0.56640625, "learning_rate": 4.4949589698839015e-05, "loss": 0.9329, "step": 49985 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.4928690390623916e-05, "loss": 1.0729, "step": 49990 }, { "epoch": 0.72, "grad_norm": 0.5, "learning_rate": 4.490779453434121e-05, "loss": 0.8685, "step": 49995 }, { "epoch": 0.72, "grad_norm": 0.65625, "learning_rate": 4.488690213130067e-05, "loss": 1.0066, "step": 50000 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.486601318281192e-05, "loss": 0.9442, "step": 50005 }, { "epoch": 0.72, "grad_norm": 0.6640625, "learning_rate": 4.484512769018422e-05, "loss": 1.0699, "step": 50010 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.482424565472673e-05, "loss": 0.9667, "step": 50015 }, { "epoch": 0.72, "grad_norm": 0.5625, "learning_rate": 4.480336707774839e-05, "loss": 1.105, "step": 50020 }, { "epoch": 0.72, "grad_norm": 0.56640625, "learning_rate": 4.4782491960557835e-05, "loss": 1.1043, "step": 50025 }, { "epoch": 0.72, "grad_norm": 0.54296875, "learning_rate": 4.476162030446357e-05, "loss": 0.9254, "step": 50030 }, { "epoch": 0.72, "grad_norm": 0.58203125, "learning_rate": 4.474075211077393e-05, "loss": 1.0297, "step": 50035 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.4719887380796824e-05, "loss": 0.9864, "step": 50040 }, { "epoch": 0.72, "grad_norm": 0.58203125, "learning_rate": 4.4699026115840115e-05, "loss": 0.8713, "step": 50045 }, { "epoch": 0.72, "grad_norm": 0.50390625, "learning_rate": 4.4678168317211475e-05, "loss": 1.0196, "step": 50050 }, { "epoch": 0.72, "grad_norm": 0.56640625, "learning_rate": 4.465731398621822e-05, "loss": 0.8765, "step": 50055 }, { "epoch": 0.72, "grad_norm": 0.609375, "learning_rate": 4.463646312416754e-05, "loss": 0.9513, "step": 50060 }, { "epoch": 0.72, "grad_norm": 0.6328125, "learning_rate": 4.461561573236645e-05, "loss": 0.9159, "step": 50065 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.459477181212161e-05, "loss": 0.9396, "step": 50070 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.457393136473957e-05, "loss": 0.9387, "step": 50075 }, { "epoch": 0.72, "grad_norm": 0.58984375, "learning_rate": 4.4553094391526684e-05, "loss": 0.9666, "step": 50080 }, { "epoch": 0.72, "grad_norm": 0.578125, "learning_rate": 4.453226089378898e-05, "loss": 1.2035, "step": 50085 }, { "epoch": 0.72, "grad_norm": 0.5625, "learning_rate": 4.4511430872832294e-05, "loss": 0.9999, "step": 50090 }, { "epoch": 0.72, "grad_norm": 0.55078125, "learning_rate": 4.449060432996233e-05, "loss": 1.0427, "step": 50095 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.4469781266484534e-05, "loss": 0.9715, "step": 50100 }, { "epoch": 0.72, "grad_norm": 0.60546875, "learning_rate": 4.444896168370406e-05, "loss": 1.0078, "step": 50105 }, { "epoch": 0.72, "grad_norm": 0.6015625, "learning_rate": 4.442814558292593e-05, "loss": 1.0199, "step": 50110 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.4407332965454975e-05, "loss": 0.9279, "step": 50115 }, { "epoch": 0.72, "grad_norm": 0.515625, "learning_rate": 4.438652383259565e-05, "loss": 0.8802, "step": 50120 }, { "epoch": 0.72, "grad_norm": 0.58203125, "learning_rate": 4.436571818565236e-05, "loss": 1.0009, "step": 50125 }, { "epoch": 0.72, "grad_norm": 0.50390625, "learning_rate": 4.434491602592925e-05, "loss": 0.9599, "step": 50130 }, { "epoch": 0.72, "grad_norm": 0.56640625, "learning_rate": 4.432411735473015e-05, "loss": 0.9421, "step": 50135 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.430332217335883e-05, "loss": 0.9521, "step": 50140 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.42825304831187e-05, "loss": 0.9073, "step": 50145 }, { "epoch": 0.72, "grad_norm": 0.5859375, "learning_rate": 4.426174228531299e-05, "loss": 1.0736, "step": 50150 }, { "epoch": 0.72, "grad_norm": 0.53515625, "learning_rate": 4.424095758124474e-05, "loss": 0.9449, "step": 50155 }, { "epoch": 0.72, "grad_norm": 0.58203125, "learning_rate": 4.4220176372216826e-05, "loss": 0.9873, "step": 50160 }, { "epoch": 0.72, "grad_norm": 0.546875, "learning_rate": 4.419939865953173e-05, "loss": 0.9748, "step": 50165 }, { "epoch": 0.72, "grad_norm": 0.3984375, "learning_rate": 4.417862444449189e-05, "loss": 0.883, "step": 50170 }, { "epoch": 0.72, "grad_norm": 0.46484375, "learning_rate": 4.4157853728399475e-05, "loss": 0.8066, "step": 50175 }, { "epoch": 0.72, "grad_norm": 0.5703125, "learning_rate": 4.4137086512556356e-05, "loss": 0.8805, "step": 50180 }, { "epoch": 0.72, "grad_norm": 0.62109375, "learning_rate": 4.4116322798264276e-05, "loss": 0.8867, "step": 50185 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.4095562586824766e-05, "loss": 1.1039, "step": 50190 }, { "epoch": 0.72, "grad_norm": 0.5625, "learning_rate": 4.4074805879539064e-05, "loss": 0.9521, "step": 50195 }, { "epoch": 0.72, "grad_norm": 0.50390625, "learning_rate": 4.405405267770818e-05, "loss": 0.9552, "step": 50200 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.4033302982633e-05, "loss": 0.9926, "step": 50205 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.401255679561416e-05, "loss": 1.058, "step": 50210 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.3991814117951994e-05, "loss": 0.8447, "step": 50215 }, { "epoch": 0.72, "grad_norm": 0.6328125, "learning_rate": 4.39710749509467e-05, "loss": 1.0724, "step": 50220 }, { "epoch": 0.72, "grad_norm": 0.66015625, "learning_rate": 4.395033929589828e-05, "loss": 1.06, "step": 50225 }, { "epoch": 0.72, "grad_norm": 0.4765625, "learning_rate": 4.392960715410639e-05, "loss": 1.0209, "step": 50230 }, { "epoch": 0.72, "grad_norm": 0.68359375, "learning_rate": 4.3908878526870575e-05, "loss": 0.9987, "step": 50235 }, { "epoch": 0.72, "grad_norm": 0.5, "learning_rate": 4.388815341549019e-05, "loss": 0.966, "step": 50240 }, { "epoch": 0.72, "grad_norm": 0.5, "learning_rate": 4.386743182126424e-05, "loss": 0.9384, "step": 50245 }, { "epoch": 0.72, "grad_norm": 0.57421875, "learning_rate": 4.384671374549156e-05, "loss": 0.9489, "step": 50250 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.382599918947082e-05, "loss": 0.9616, "step": 50255 }, { "epoch": 0.72, "grad_norm": 0.609375, "learning_rate": 4.3805288154500456e-05, "loss": 1.1693, "step": 50260 }, { "epoch": 0.72, "grad_norm": 0.73046875, "learning_rate": 4.3784580641878606e-05, "loss": 1.0767, "step": 50265 }, { "epoch": 0.72, "grad_norm": 0.5390625, "learning_rate": 4.376387665290329e-05, "loss": 0.9063, "step": 50270 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.3743176188872205e-05, "loss": 1.0128, "step": 50275 }, { "epoch": 0.72, "grad_norm": 0.57421875, "learning_rate": 4.37224792510829e-05, "loss": 0.9175, "step": 50280 }, { "epoch": 0.72, "grad_norm": 0.5390625, "learning_rate": 4.370178584083274e-05, "loss": 0.9061, "step": 50285 }, { "epoch": 0.72, "grad_norm": 0.5546875, "learning_rate": 4.368109595941872e-05, "loss": 0.8362, "step": 50290 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.366040960813778e-05, "loss": 0.949, "step": 50295 }, { "epoch": 0.72, "grad_norm": 0.5, "learning_rate": 4.3639726788286495e-05, "loss": 0.9306, "step": 50300 }, { "epoch": 0.72, "grad_norm": 0.6796875, "learning_rate": 4.361904750116136e-05, "loss": 1.1314, "step": 50305 }, { "epoch": 0.72, "grad_norm": 0.57421875, "learning_rate": 4.359837174805851e-05, "loss": 1.0325, "step": 50310 }, { "epoch": 0.72, "grad_norm": 0.5390625, "learning_rate": 4.357769953027394e-05, "loss": 0.9517, "step": 50315 }, { "epoch": 0.72, "grad_norm": 0.5390625, "learning_rate": 4.3557030849103474e-05, "loss": 0.8991, "step": 50320 }, { "epoch": 0.72, "grad_norm": 0.54296875, "learning_rate": 4.353636570584255e-05, "loss": 0.9992, "step": 50325 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.351570410178654e-05, "loss": 0.8851, "step": 50330 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.3495046038230556e-05, "loss": 0.794, "step": 50335 }, { "epoch": 0.72, "grad_norm": 0.5546875, "learning_rate": 4.3474391516469416e-05, "loss": 0.9003, "step": 50340 }, { "epoch": 0.72, "grad_norm": 0.5078125, "learning_rate": 4.345374053779778e-05, "loss": 1.0992, "step": 50345 }, { "epoch": 0.72, "grad_norm": 0.55078125, "learning_rate": 4.343309310351013e-05, "loss": 0.9804, "step": 50350 }, { "epoch": 0.72, "grad_norm": 0.54296875, "learning_rate": 4.341244921490063e-05, "loss": 1.01, "step": 50355 }, { "epoch": 0.72, "grad_norm": 0.62109375, "learning_rate": 4.3391808873263215e-05, "loss": 0.7897, "step": 50360 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.33711720798917e-05, "loss": 0.9257, "step": 50365 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.335053883607965e-05, "loss": 1.0943, "step": 50370 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.332990914312031e-05, "loss": 1.0418, "step": 50375 }, { "epoch": 0.72, "grad_norm": 0.6171875, "learning_rate": 4.33092830023068e-05, "loss": 0.913, "step": 50380 }, { "epoch": 0.72, "grad_norm": 0.59375, "learning_rate": 4.3288660414932034e-05, "loss": 0.9002, "step": 50385 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.3268041382288594e-05, "loss": 0.7632, "step": 50390 }, { "epoch": 0.72, "grad_norm": 0.6015625, "learning_rate": 4.324742590566893e-05, "loss": 1.0351, "step": 50395 }, { "epoch": 0.72, "grad_norm": 0.5859375, "learning_rate": 4.322681398636531e-05, "loss": 0.8847, "step": 50400 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.3206205625669574e-05, "loss": 1.0687, "step": 50405 }, { "epoch": 0.72, "grad_norm": 0.57421875, "learning_rate": 4.3185600824873565e-05, "loss": 1.0468, "step": 50410 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.316499958526883e-05, "loss": 1.1039, "step": 50415 }, { "epoch": 0.72, "grad_norm": 0.5546875, "learning_rate": 4.3144401908146614e-05, "loss": 0.9662, "step": 50420 }, { "epoch": 0.72, "grad_norm": 0.62109375, "learning_rate": 4.3123807794798046e-05, "loss": 0.8069, "step": 50425 }, { "epoch": 0.72, "grad_norm": 0.515625, "learning_rate": 4.3103217246514015e-05, "loss": 0.9225, "step": 50430 }, { "epoch": 0.72, "grad_norm": 0.56640625, "learning_rate": 4.308263026458509e-05, "loss": 0.9417, "step": 50435 }, { "epoch": 0.72, "grad_norm": 0.6328125, "learning_rate": 4.3062046850301716e-05, "loss": 0.9334, "step": 50440 }, { "epoch": 0.72, "grad_norm": 0.515625, "learning_rate": 4.304146700495414e-05, "loss": 0.8333, "step": 50445 }, { "epoch": 0.72, "grad_norm": 0.5625, "learning_rate": 4.3020890729832244e-05, "loss": 0.9604, "step": 50450 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.300031802622585e-05, "loss": 0.8044, "step": 50455 }, { "epoch": 0.72, "grad_norm": 0.578125, "learning_rate": 4.2979748895424396e-05, "loss": 0.9168, "step": 50460 }, { "epoch": 0.72, "grad_norm": 0.61328125, "learning_rate": 4.2959183338717255e-05, "loss": 0.9734, "step": 50465 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.2938621357393436e-05, "loss": 0.9601, "step": 50470 }, { "epoch": 0.72, "grad_norm": 0.64453125, "learning_rate": 4.291806295274182e-05, "loss": 1.0021, "step": 50475 }, { "epoch": 0.72, "grad_norm": 0.494140625, "learning_rate": 4.289750812605107e-05, "loss": 0.9122, "step": 50480 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.28769568786095e-05, "loss": 0.8227, "step": 50485 }, { "epoch": 0.72, "grad_norm": 0.54296875, "learning_rate": 4.285640921170533e-05, "loss": 0.9285, "step": 50490 }, { "epoch": 0.72, "grad_norm": 0.62109375, "learning_rate": 4.283586512662655e-05, "loss": 0.8889, "step": 50495 }, { "epoch": 0.72, "grad_norm": 0.6015625, "learning_rate": 4.2815324624660814e-05, "loss": 1.0344, "step": 50500 }, { "epoch": 0.72, "grad_norm": 0.67578125, "learning_rate": 4.279478770709569e-05, "loss": 0.9646, "step": 50505 }, { "epoch": 0.72, "grad_norm": 0.546875, "learning_rate": 4.277425437521843e-05, "loss": 0.842, "step": 50510 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.275372463031604e-05, "loss": 0.8737, "step": 50515 }, { "epoch": 0.72, "grad_norm": 0.62109375, "learning_rate": 4.273319847367539e-05, "loss": 0.8028, "step": 50520 }, { "epoch": 0.72, "grad_norm": 0.69921875, "learning_rate": 4.271267590658311e-05, "loss": 0.9467, "step": 50525 }, { "epoch": 0.72, "grad_norm": 0.67578125, "learning_rate": 4.269215693032552e-05, "loss": 1.0773, "step": 50530 }, { "epoch": 0.72, "grad_norm": 0.70703125, "learning_rate": 4.26716415461888e-05, "loss": 1.038, "step": 50535 }, { "epoch": 0.72, "grad_norm": 0.59765625, "learning_rate": 4.2651129755458916e-05, "loss": 1.0356, "step": 50540 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.26306215594215e-05, "loss": 1.1298, "step": 50545 }, { "epoch": 0.73, "grad_norm": 0.50390625, "learning_rate": 4.2610116959362057e-05, "loss": 0.9046, "step": 50550 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.2589615956565885e-05, "loss": 0.8925, "step": 50555 }, { "epoch": 0.73, "grad_norm": 0.51171875, "learning_rate": 4.256911855231798e-05, "loss": 0.97, "step": 50560 }, { "epoch": 0.73, "grad_norm": 0.6171875, "learning_rate": 4.2548624747903076e-05, "loss": 1.0284, "step": 50565 }, { "epoch": 0.73, "grad_norm": 0.61328125, "learning_rate": 4.2528134544605813e-05, "loss": 1.0616, "step": 50570 }, { "epoch": 0.73, "grad_norm": 0.61328125, "learning_rate": 4.250764794371057e-05, "loss": 0.9394, "step": 50575 }, { "epoch": 0.73, "grad_norm": 0.5, "learning_rate": 4.24871649465014e-05, "loss": 0.9177, "step": 50580 }, { "epoch": 0.73, "grad_norm": 0.60546875, "learning_rate": 4.246668555426223e-05, "loss": 0.965, "step": 50585 }, { "epoch": 0.73, "grad_norm": 0.578125, "learning_rate": 4.244620976827677e-05, "loss": 0.9909, "step": 50590 }, { "epoch": 0.73, "grad_norm": 0.6484375, "learning_rate": 4.242573758982839e-05, "loss": 1.0053, "step": 50595 }, { "epoch": 0.73, "grad_norm": 0.5703125, "learning_rate": 4.240526902020035e-05, "loss": 0.9533, "step": 50600 }, { "epoch": 0.73, "grad_norm": 0.5546875, "learning_rate": 4.238480406067567e-05, "loss": 0.9128, "step": 50605 }, { "epoch": 0.73, "grad_norm": 0.609375, "learning_rate": 4.236434271253711e-05, "loss": 1.0219, "step": 50610 }, { "epoch": 0.73, "grad_norm": 0.6328125, "learning_rate": 4.234388497706715e-05, "loss": 0.8261, "step": 50615 }, { "epoch": 0.73, "grad_norm": 0.50390625, "learning_rate": 4.2323430855548174e-05, "loss": 0.8961, "step": 50620 }, { "epoch": 0.73, "grad_norm": 1.03125, "learning_rate": 4.23029803492622e-05, "loss": 0.8444, "step": 50625 }, { "epoch": 0.73, "grad_norm": 0.52734375, "learning_rate": 4.228253345949114e-05, "loss": 1.001, "step": 50630 }, { "epoch": 0.73, "grad_norm": 0.61328125, "learning_rate": 4.2262090187516644e-05, "loss": 0.955, "step": 50635 }, { "epoch": 0.73, "grad_norm": 0.5390625, "learning_rate": 4.224165053462006e-05, "loss": 0.8988, "step": 50640 }, { "epoch": 0.73, "grad_norm": 0.48828125, "learning_rate": 4.2221214502082594e-05, "loss": 0.8975, "step": 50645 }, { "epoch": 0.73, "grad_norm": 0.6171875, "learning_rate": 4.220078209118525e-05, "loss": 0.9368, "step": 50650 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.218035330320869e-05, "loss": 0.8767, "step": 50655 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.2159928139433427e-05, "loss": 1.0704, "step": 50660 }, { "epoch": 0.73, "grad_norm": 0.73046875, "learning_rate": 4.2139506601139766e-05, "loss": 0.8408, "step": 50665 }, { "epoch": 0.73, "grad_norm": 0.5625, "learning_rate": 4.211908868960774e-05, "loss": 0.9176, "step": 50670 }, { "epoch": 0.73, "grad_norm": 0.546875, "learning_rate": 4.209867440611712e-05, "loss": 0.9723, "step": 50675 }, { "epoch": 0.73, "grad_norm": 0.58984375, "learning_rate": 4.2078263751947535e-05, "loss": 0.9628, "step": 50680 }, { "epoch": 0.73, "grad_norm": 0.5703125, "learning_rate": 4.205785672837837e-05, "loss": 1.221, "step": 50685 }, { "epoch": 0.73, "grad_norm": 0.54296875, "learning_rate": 4.2037453336688715e-05, "loss": 0.9129, "step": 50690 }, { "epoch": 0.73, "grad_norm": 0.58203125, "learning_rate": 4.201705357815748e-05, "loss": 1.0118, "step": 50695 }, { "epoch": 0.73, "grad_norm": 0.59765625, "learning_rate": 4.1996657454063415e-05, "loss": 1.0532, "step": 50700 }, { "epoch": 0.73, "grad_norm": 0.50390625, "learning_rate": 4.197626496568488e-05, "loss": 0.9336, "step": 50705 }, { "epoch": 0.73, "grad_norm": 0.484375, "learning_rate": 4.195587611430014e-05, "loss": 1.0174, "step": 50710 }, { "epoch": 0.73, "grad_norm": 0.60546875, "learning_rate": 4.193549090118727e-05, "loss": 1.0614, "step": 50715 }, { "epoch": 0.73, "grad_norm": 0.53515625, "learning_rate": 4.191510932762388e-05, "loss": 1.0202, "step": 50720 }, { "epoch": 0.73, "grad_norm": 0.6796875, "learning_rate": 4.189473139488759e-05, "loss": 0.9998, "step": 50725 }, { "epoch": 0.73, "grad_norm": 0.58203125, "learning_rate": 4.187435710425576e-05, "loss": 0.8776, "step": 50730 }, { "epoch": 0.73, "grad_norm": 0.6328125, "learning_rate": 4.1853986457005376e-05, "loss": 0.9999, "step": 50735 }, { "epoch": 0.73, "grad_norm": 0.470703125, "learning_rate": 4.183361945441335e-05, "loss": 0.8458, "step": 50740 }, { "epoch": 0.73, "grad_norm": 0.52734375, "learning_rate": 4.181325609775634e-05, "loss": 0.7763, "step": 50745 }, { "epoch": 0.73, "grad_norm": 0.66015625, "learning_rate": 4.179289638831067e-05, "loss": 1.0876, "step": 50750 }, { "epoch": 0.73, "grad_norm": 0.625, "learning_rate": 4.177254032735254e-05, "loss": 1.1139, "step": 50755 }, { "epoch": 0.73, "grad_norm": 0.84375, "learning_rate": 4.1752187916157945e-05, "loss": 1.0421, "step": 50760 }, { "epoch": 0.73, "grad_norm": 1.234375, "learning_rate": 4.173183915600251e-05, "loss": 0.9571, "step": 50765 }, { "epoch": 0.73, "grad_norm": 0.490234375, "learning_rate": 4.171149404816179e-05, "loss": 0.9549, "step": 50770 }, { "epoch": 0.73, "grad_norm": 0.6953125, "learning_rate": 4.1691152593910975e-05, "loss": 0.9044, "step": 50775 }, { "epoch": 0.73, "grad_norm": 0.5390625, "learning_rate": 4.167081479452516e-05, "loss": 1.0115, "step": 50780 }, { "epoch": 0.73, "grad_norm": 0.54296875, "learning_rate": 4.165048065127907e-05, "loss": 1.009, "step": 50785 }, { "epoch": 0.73, "grad_norm": 0.6328125, "learning_rate": 4.16301501654473e-05, "loss": 1.0415, "step": 50790 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.1609823338304246e-05, "loss": 0.9369, "step": 50795 }, { "epoch": 0.73, "grad_norm": 0.5078125, "learning_rate": 4.158950017112392e-05, "loss": 0.8816, "step": 50800 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.156918066518025e-05, "loss": 0.9797, "step": 50805 }, { "epoch": 0.73, "grad_norm": 0.4921875, "learning_rate": 4.154886482174691e-05, "loss": 0.8669, "step": 50810 }, { "epoch": 0.73, "grad_norm": 0.546875, "learning_rate": 4.152855264209727e-05, "loss": 0.9375, "step": 50815 }, { "epoch": 0.73, "grad_norm": 0.671875, "learning_rate": 4.150824412750458e-05, "loss": 1.0531, "step": 50820 }, { "epoch": 0.73, "grad_norm": 0.5, "learning_rate": 4.148793927924176e-05, "loss": 0.998, "step": 50825 }, { "epoch": 0.73, "grad_norm": 0.53515625, "learning_rate": 4.146763809858151e-05, "loss": 0.856, "step": 50830 }, { "epoch": 0.73, "grad_norm": 0.58203125, "learning_rate": 4.144734058679636e-05, "loss": 0.9849, "step": 50835 }, { "epoch": 0.73, "grad_norm": 0.50390625, "learning_rate": 4.142704674515863e-05, "loss": 0.9253, "step": 50840 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.140675657494029e-05, "loss": 0.9308, "step": 50845 }, { "epoch": 0.73, "grad_norm": 0.49609375, "learning_rate": 4.1386470077413166e-05, "loss": 0.9967, "step": 50850 }, { "epoch": 0.73, "grad_norm": 0.52734375, "learning_rate": 4.13661872538489e-05, "loss": 0.9413, "step": 50855 }, { "epoch": 0.73, "grad_norm": 0.392578125, "learning_rate": 4.134590810551875e-05, "loss": 0.8395, "step": 50860 }, { "epoch": 0.73, "grad_norm": 0.578125, "learning_rate": 4.1325632633693886e-05, "loss": 1.0183, "step": 50865 }, { "epoch": 0.73, "grad_norm": 0.6875, "learning_rate": 4.130536083964524e-05, "loss": 1.0262, "step": 50870 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.12850927246434e-05, "loss": 1.027, "step": 50875 }, { "epoch": 0.73, "grad_norm": 0.703125, "learning_rate": 4.12648282899588e-05, "loss": 1.0127, "step": 50880 }, { "epoch": 0.73, "grad_norm": 0.50390625, "learning_rate": 4.124456753686166e-05, "loss": 0.9388, "step": 50885 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.1224310466621965e-05, "loss": 0.9285, "step": 50890 }, { "epoch": 0.73, "grad_norm": 0.65625, "learning_rate": 4.120405708050941e-05, "loss": 1.0692, "step": 50895 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.11838073797935e-05, "loss": 0.9283, "step": 50900 }, { "epoch": 0.73, "grad_norm": 0.486328125, "learning_rate": 4.116356136574359e-05, "loss": 0.8697, "step": 50905 }, { "epoch": 0.73, "grad_norm": 0.490234375, "learning_rate": 4.11433190396286e-05, "loss": 1.0036, "step": 50910 }, { "epoch": 0.73, "grad_norm": 0.47265625, "learning_rate": 4.1123080402717415e-05, "loss": 0.9792, "step": 50915 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.110284545627865e-05, "loss": 1.0078, "step": 50920 }, { "epoch": 0.73, "grad_norm": 0.70703125, "learning_rate": 4.1082614201580604e-05, "loss": 1.0572, "step": 50925 }, { "epoch": 0.73, "grad_norm": 0.37890625, "learning_rate": 4.106238663989137e-05, "loss": 0.7276, "step": 50930 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.10421627724789e-05, "loss": 0.9774, "step": 50935 }, { "epoch": 0.73, "grad_norm": 0.6640625, "learning_rate": 4.102194260061078e-05, "loss": 1.0234, "step": 50940 }, { "epoch": 0.73, "grad_norm": 0.5078125, "learning_rate": 4.100172612555446e-05, "loss": 0.873, "step": 50945 }, { "epoch": 0.73, "grad_norm": 0.58984375, "learning_rate": 4.098151334857718e-05, "loss": 1.0226, "step": 50950 }, { "epoch": 0.73, "grad_norm": 0.546875, "learning_rate": 4.0961304270945824e-05, "loss": 1.0075, "step": 50955 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.094109889392715e-05, "loss": 0.8681, "step": 50960 }, { "epoch": 0.73, "grad_norm": 0.609375, "learning_rate": 4.0920897218787704e-05, "loss": 1.0173, "step": 50965 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.090069924679367e-05, "loss": 1.1367, "step": 50970 }, { "epoch": 0.73, "grad_norm": 0.5859375, "learning_rate": 4.088050497921111e-05, "loss": 0.8676, "step": 50975 }, { "epoch": 0.73, "grad_norm": 0.5234375, "learning_rate": 4.086031441730587e-05, "loss": 1.0029, "step": 50980 }, { "epoch": 0.73, "grad_norm": 0.5390625, "learning_rate": 4.0840127562343476e-05, "loss": 0.8638, "step": 50985 }, { "epoch": 0.73, "grad_norm": 0.609375, "learning_rate": 4.081994441558923e-05, "loss": 1.0133, "step": 50990 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.0799764978308265e-05, "loss": 0.9686, "step": 50995 }, { "epoch": 0.73, "grad_norm": 0.6328125, "learning_rate": 4.0779589251765495e-05, "loss": 1.0507, "step": 51000 }, { "epoch": 0.73, "grad_norm": 0.62890625, "learning_rate": 4.075941723722547e-05, "loss": 0.8381, "step": 51005 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.0739248935952646e-05, "loss": 0.9353, "step": 51010 }, { "epoch": 0.73, "grad_norm": 0.53125, "learning_rate": 4.071908434921123e-05, "loss": 0.8137, "step": 51015 }, { "epoch": 0.73, "grad_norm": 0.515625, "learning_rate": 4.069892347826509e-05, "loss": 0.9861, "step": 51020 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.0678766324377957e-05, "loss": 0.8659, "step": 51025 }, { "epoch": 0.73, "grad_norm": 0.51171875, "learning_rate": 4.065861288881335e-05, "loss": 0.9603, "step": 51030 }, { "epoch": 0.73, "grad_norm": 0.5625, "learning_rate": 4.0638463172834484e-05, "loss": 0.9187, "step": 51035 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.0618317177704304e-05, "loss": 0.9147, "step": 51040 }, { "epoch": 0.73, "grad_norm": 0.66796875, "learning_rate": 4.059817490468564e-05, "loss": 1.0506, "step": 51045 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.057803635504105e-05, "loss": 0.9171, "step": 51050 }, { "epoch": 0.73, "grad_norm": 0.5859375, "learning_rate": 4.0557901530032794e-05, "loss": 1.1271, "step": 51055 }, { "epoch": 0.73, "grad_norm": 0.5625, "learning_rate": 4.053777043092296e-05, "loss": 0.9112, "step": 51060 }, { "epoch": 0.73, "grad_norm": 0.58203125, "learning_rate": 4.051764305897344e-05, "loss": 0.9855, "step": 51065 }, { "epoch": 0.73, "grad_norm": 0.546875, "learning_rate": 4.049751941544576e-05, "loss": 0.8994, "step": 51070 }, { "epoch": 0.73, "grad_norm": 0.58203125, "learning_rate": 4.047739950160137e-05, "loss": 0.9347, "step": 51075 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.045728331870132e-05, "loss": 0.9463, "step": 51080 }, { "epoch": 0.73, "grad_norm": 0.494140625, "learning_rate": 4.043717086800661e-05, "loss": 0.8492, "step": 51085 }, { "epoch": 0.73, "grad_norm": 0.5703125, "learning_rate": 4.041706215077784e-05, "loss": 1.1038, "step": 51090 }, { "epoch": 0.73, "grad_norm": 0.5234375, "learning_rate": 4.03969571682755e-05, "loss": 0.8422, "step": 51095 }, { "epoch": 0.73, "grad_norm": 0.6328125, "learning_rate": 4.0376855921759725e-05, "loss": 1.0364, "step": 51100 }, { "epoch": 0.73, "grad_norm": 0.7890625, "learning_rate": 4.0356758412490535e-05, "loss": 0.9729, "step": 51105 }, { "epoch": 0.73, "grad_norm": 0.515625, "learning_rate": 4.0336664641727697e-05, "loss": 1.0945, "step": 51110 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.031657461073063e-05, "loss": 0.9299, "step": 51115 }, { "epoch": 0.73, "grad_norm": 0.56640625, "learning_rate": 4.029648832075863e-05, "loss": 0.9332, "step": 51120 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.027640577307078e-05, "loss": 1.052, "step": 51125 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.025632696892581e-05, "loss": 1.0119, "step": 51130 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.0236251909582325e-05, "loss": 0.9903, "step": 51135 }, { "epoch": 0.73, "grad_norm": 0.578125, "learning_rate": 4.021618059629861e-05, "loss": 1.0225, "step": 51140 }, { "epoch": 0.73, "grad_norm": 0.474609375, "learning_rate": 4.0196113030332814e-05, "loss": 1.0148, "step": 51145 }, { "epoch": 0.73, "grad_norm": 0.5546875, "learning_rate": 4.017604921294273e-05, "loss": 0.9159, "step": 51150 }, { "epoch": 0.73, "grad_norm": 0.5546875, "learning_rate": 4.015598914538603e-05, "loss": 0.9721, "step": 51155 }, { "epoch": 0.73, "grad_norm": 0.515625, "learning_rate": 4.013593282892011e-05, "loss": 0.9497, "step": 51160 }, { "epoch": 0.73, "grad_norm": 0.5546875, "learning_rate": 4.011588026480206e-05, "loss": 0.9915, "step": 51165 }, { "epoch": 0.73, "grad_norm": 0.53515625, "learning_rate": 4.009583145428884e-05, "loss": 0.7746, "step": 51170 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.007578639863717e-05, "loss": 0.8888, "step": 51175 }, { "epoch": 0.73, "grad_norm": 0.54296875, "learning_rate": 4.005574509910342e-05, "loss": 0.9815, "step": 51180 }, { "epoch": 0.73, "grad_norm": 0.6796875, "learning_rate": 4.0035707556943834e-05, "loss": 0.9475, "step": 51185 }, { "epoch": 0.73, "grad_norm": 0.56640625, "learning_rate": 4.0015673773414464e-05, "loss": 0.8932, "step": 51190 }, { "epoch": 0.73, "grad_norm": 0.60546875, "learning_rate": 3.99956437497709e-05, "loss": 0.8803, "step": 51195 }, { "epoch": 0.73, "grad_norm": 0.490234375, "learning_rate": 3.9975617487268744e-05, "loss": 0.7229, "step": 51200 }, { "epoch": 0.73, "grad_norm": 0.46484375, "learning_rate": 3.995559498716327e-05, "loss": 0.9048, "step": 51205 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 3.993557625070945e-05, "loss": 0.8673, "step": 51210 }, { "epoch": 0.73, "grad_norm": 0.58203125, "learning_rate": 3.9915561279162125e-05, "loss": 0.9174, "step": 51215 }, { "epoch": 0.73, "grad_norm": 0.52734375, "learning_rate": 3.989555007377588e-05, "loss": 0.902, "step": 51220 }, { "epoch": 0.73, "grad_norm": 0.4765625, "learning_rate": 3.9875542635804976e-05, "loss": 0.9872, "step": 51225 }, { "epoch": 0.73, "grad_norm": 0.57421875, "learning_rate": 3.985553896650354e-05, "loss": 0.9109, "step": 51230 }, { "epoch": 0.73, "grad_norm": 0.5078125, "learning_rate": 3.983553906712544e-05, "loss": 1.0102, "step": 51235 }, { "epoch": 0.74, "grad_norm": 0.494140625, "learning_rate": 3.9815542938924286e-05, "loss": 0.8146, "step": 51240 }, { "epoch": 0.74, "grad_norm": 0.5625, "learning_rate": 3.9795550583153404e-05, "loss": 0.9904, "step": 51245 }, { "epoch": 0.74, "grad_norm": 0.58203125, "learning_rate": 3.977556200106598e-05, "loss": 0.9373, "step": 51250 }, { "epoch": 0.74, "grad_norm": 0.51171875, "learning_rate": 3.975557719391496e-05, "loss": 0.9666, "step": 51255 }, { "epoch": 0.74, "grad_norm": 0.609375, "learning_rate": 3.973559616295294e-05, "loss": 1.1397, "step": 51260 }, { "epoch": 0.74, "grad_norm": 0.58203125, "learning_rate": 3.971561890943237e-05, "loss": 1.0834, "step": 51265 }, { "epoch": 0.74, "grad_norm": 0.55859375, "learning_rate": 3.969564543460552e-05, "loss": 0.9221, "step": 51270 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.967567573972425e-05, "loss": 1.0058, "step": 51275 }, { "epoch": 0.74, "grad_norm": 0.5859375, "learning_rate": 3.965570982604033e-05, "loss": 0.9367, "step": 51280 }, { "epoch": 0.74, "grad_norm": 0.58984375, "learning_rate": 3.963574769480528e-05, "loss": 1.0296, "step": 51285 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.9615789347270285e-05, "loss": 0.8648, "step": 51290 }, { "epoch": 0.74, "grad_norm": 0.57421875, "learning_rate": 3.9595834784686414e-05, "loss": 1.0255, "step": 51295 }, { "epoch": 0.74, "grad_norm": 0.53125, "learning_rate": 3.957588400830441e-05, "loss": 0.9507, "step": 51300 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.955593701937479e-05, "loss": 1.0923, "step": 51305 }, { "epoch": 0.74, "grad_norm": 0.58984375, "learning_rate": 3.953599381914787e-05, "loss": 1.0396, "step": 51310 }, { "epoch": 0.74, "grad_norm": 1.09375, "learning_rate": 3.951605440887375e-05, "loss": 0.9572, "step": 51315 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.9496118789802196e-05, "loss": 0.9524, "step": 51320 }, { "epoch": 0.74, "grad_norm": 0.53125, "learning_rate": 3.947618696318282e-05, "loss": 0.8093, "step": 51325 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.945625893026502e-05, "loss": 0.862, "step": 51330 }, { "epoch": 0.74, "grad_norm": 0.5234375, "learning_rate": 3.943633469229783e-05, "loss": 0.8987, "step": 51335 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.941641425053014e-05, "loss": 1.0828, "step": 51340 }, { "epoch": 0.74, "grad_norm": 0.5703125, "learning_rate": 3.939649760621066e-05, "loss": 0.9208, "step": 51345 }, { "epoch": 0.74, "grad_norm": 0.59765625, "learning_rate": 3.937658476058772e-05, "loss": 0.8918, "step": 51350 }, { "epoch": 0.74, "grad_norm": 0.6953125, "learning_rate": 3.9356675714909455e-05, "loss": 1.0362, "step": 51355 }, { "epoch": 0.74, "grad_norm": 0.53125, "learning_rate": 3.933677047042382e-05, "loss": 0.9233, "step": 51360 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.931686902837854e-05, "loss": 1.0053, "step": 51365 }, { "epoch": 0.74, "grad_norm": 0.56640625, "learning_rate": 3.9296971390021e-05, "loss": 0.9761, "step": 51370 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.9277077556598415e-05, "loss": 0.8039, "step": 51375 }, { "epoch": 0.74, "grad_norm": 0.55859375, "learning_rate": 3.9257187529357806e-05, "loss": 0.9285, "step": 51380 }, { "epoch": 0.74, "grad_norm": 0.50390625, "learning_rate": 3.9237301309545826e-05, "loss": 0.8729, "step": 51385 }, { "epoch": 0.74, "grad_norm": 0.51171875, "learning_rate": 3.9217418898408996e-05, "loss": 0.8877, "step": 51390 }, { "epoch": 0.74, "grad_norm": 0.458984375, "learning_rate": 3.919754029719363e-05, "loss": 0.7871, "step": 51395 }, { "epoch": 0.74, "grad_norm": 0.48828125, "learning_rate": 3.917766550714567e-05, "loss": 0.8088, "step": 51400 }, { "epoch": 0.74, "grad_norm": 0.56640625, "learning_rate": 3.915779452951087e-05, "loss": 0.9516, "step": 51405 }, { "epoch": 0.74, "grad_norm": 0.498046875, "learning_rate": 3.913792736553484e-05, "loss": 0.8929, "step": 51410 }, { "epoch": 0.74, "grad_norm": 0.57421875, "learning_rate": 3.9118064016462806e-05, "loss": 0.8689, "step": 51415 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.909820448353986e-05, "loss": 0.8161, "step": 51420 }, { "epoch": 0.74, "grad_norm": 0.5625, "learning_rate": 3.907834876801085e-05, "loss": 0.9621, "step": 51425 }, { "epoch": 0.74, "grad_norm": 0.60546875, "learning_rate": 3.9058496871120295e-05, "loss": 0.9807, "step": 51430 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.903864879411255e-05, "loss": 0.9334, "step": 51435 }, { "epoch": 0.74, "grad_norm": 0.5859375, "learning_rate": 3.9018804538231776e-05, "loss": 0.9602, "step": 51440 }, { "epoch": 0.74, "grad_norm": 0.50390625, "learning_rate": 3.8998964104721745e-05, "loss": 0.8113, "step": 51445 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.897912749482615e-05, "loss": 1.0266, "step": 51450 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.895929470978831e-05, "loss": 1.0061, "step": 51455 }, { "epoch": 0.74, "grad_norm": 0.6328125, "learning_rate": 3.8939465750851434e-05, "loss": 0.9953, "step": 51460 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.891964061925835e-05, "loss": 1.0519, "step": 51465 }, { "epoch": 0.74, "grad_norm": 0.3984375, "learning_rate": 3.8899819316251753e-05, "loss": 0.7409, "step": 51470 }, { "epoch": 0.74, "grad_norm": 0.58984375, "learning_rate": 3.888000184307411e-05, "loss": 0.9194, "step": 51475 }, { "epoch": 0.74, "grad_norm": 0.55078125, "learning_rate": 3.8860188200967516e-05, "loss": 1.035, "step": 51480 }, { "epoch": 0.74, "grad_norm": 0.51953125, "learning_rate": 3.884037839117396e-05, "loss": 1.0426, "step": 51485 }, { "epoch": 0.74, "grad_norm": 0.5078125, "learning_rate": 3.8820572414935185e-05, "loss": 1.03, "step": 51490 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.880077027349257e-05, "loss": 0.9797, "step": 51495 }, { "epoch": 0.74, "grad_norm": 0.55859375, "learning_rate": 3.878097196808737e-05, "loss": 0.8733, "step": 51500 }, { "epoch": 0.74, "grad_norm": 0.57421875, "learning_rate": 3.876117749996064e-05, "loss": 0.911, "step": 51505 }, { "epoch": 0.74, "grad_norm": 0.462890625, "learning_rate": 3.8741386870352994e-05, "loss": 1.0591, "step": 51510 }, { "epoch": 0.74, "grad_norm": 0.416015625, "learning_rate": 3.872160008050497e-05, "loss": 0.8526, "step": 51515 }, { "epoch": 0.74, "grad_norm": 0.6015625, "learning_rate": 3.870181713165688e-05, "loss": 0.9296, "step": 51520 }, { "epoch": 0.74, "grad_norm": 0.486328125, "learning_rate": 3.868203802504867e-05, "loss": 0.8969, "step": 51525 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.866226276192016e-05, "loss": 0.9046, "step": 51530 }, { "epoch": 0.74, "grad_norm": 0.58203125, "learning_rate": 3.864249134351091e-05, "loss": 0.8912, "step": 51535 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.8622723771060145e-05, "loss": 1.1907, "step": 51540 }, { "epoch": 0.74, "grad_norm": 0.609375, "learning_rate": 3.860296004580696e-05, "loss": 0.9601, "step": 51545 }, { "epoch": 0.74, "grad_norm": 0.5703125, "learning_rate": 3.8583200168990195e-05, "loss": 0.9729, "step": 51550 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.856344414184839e-05, "loss": 0.9276, "step": 51555 }, { "epoch": 0.74, "grad_norm": 0.56640625, "learning_rate": 3.854369196561984e-05, "loss": 1.0324, "step": 51560 }, { "epoch": 0.74, "grad_norm": 0.51171875, "learning_rate": 3.852394364154268e-05, "loss": 0.8486, "step": 51565 }, { "epoch": 0.74, "grad_norm": 0.59375, "learning_rate": 3.850419917085478e-05, "loss": 1.0582, "step": 51570 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.848445855479368e-05, "loss": 0.9645, "step": 51575 }, { "epoch": 0.74, "grad_norm": 0.5625, "learning_rate": 3.846472179459678e-05, "loss": 0.8136, "step": 51580 }, { "epoch": 0.74, "grad_norm": 0.66015625, "learning_rate": 3.8444988891501224e-05, "loss": 1.1109, "step": 51585 }, { "epoch": 0.74, "grad_norm": 0.578125, "learning_rate": 3.8425259846743845e-05, "loss": 0.9099, "step": 51590 }, { "epoch": 0.74, "grad_norm": 0.62109375, "learning_rate": 3.8405534661561304e-05, "loss": 1.062, "step": 51595 }, { "epoch": 0.74, "grad_norm": 0.5625, "learning_rate": 3.838581333719004e-05, "loss": 0.9472, "step": 51600 }, { "epoch": 0.74, "grad_norm": 0.58984375, "learning_rate": 3.836609587486612e-05, "loss": 0.9514, "step": 51605 }, { "epoch": 0.74, "grad_norm": 0.490234375, "learning_rate": 3.834638227582555e-05, "loss": 1.0129, "step": 51610 }, { "epoch": 0.74, "grad_norm": 0.53125, "learning_rate": 3.832667254130396e-05, "loss": 0.9279, "step": 51615 }, { "epoch": 0.74, "grad_norm": 0.65234375, "learning_rate": 3.830696667253674e-05, "loss": 0.9951, "step": 51620 }, { "epoch": 0.74, "grad_norm": 0.6953125, "learning_rate": 3.8287264670759106e-05, "loss": 0.9776, "step": 51625 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.826756653720605e-05, "loss": 0.9665, "step": 51630 }, { "epoch": 0.74, "grad_norm": 0.55859375, "learning_rate": 3.824787227311218e-05, "loss": 0.9334, "step": 51635 }, { "epoch": 0.74, "grad_norm": 0.6328125, "learning_rate": 3.822818187971201e-05, "loss": 1.0076, "step": 51640 }, { "epoch": 0.74, "grad_norm": 0.5234375, "learning_rate": 3.8208495358239796e-05, "loss": 0.8727, "step": 51645 }, { "epoch": 0.74, "grad_norm": 0.486328125, "learning_rate": 3.818881270992943e-05, "loss": 0.9704, "step": 51650 }, { "epoch": 0.74, "grad_norm": 0.51171875, "learning_rate": 3.816913393601468e-05, "loss": 0.9842, "step": 51655 }, { "epoch": 0.74, "grad_norm": 0.58203125, "learning_rate": 3.8149459037729076e-05, "loss": 0.9255, "step": 51660 }, { "epoch": 0.74, "grad_norm": 0.5546875, "learning_rate": 3.8129788016305814e-05, "loss": 1.0225, "step": 51665 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.811012087297786e-05, "loss": 0.9501, "step": 51670 }, { "epoch": 0.74, "grad_norm": 0.57421875, "learning_rate": 3.809045760897802e-05, "loss": 1.0628, "step": 51675 }, { "epoch": 0.74, "grad_norm": 0.58984375, "learning_rate": 3.807079822553885e-05, "loss": 1.0562, "step": 51680 }, { "epoch": 0.74, "grad_norm": 0.5703125, "learning_rate": 3.805114272389254e-05, "loss": 0.9083, "step": 51685 }, { "epoch": 0.74, "grad_norm": 0.56640625, "learning_rate": 3.803149110527116e-05, "loss": 0.9235, "step": 51690 }, { "epoch": 0.74, "grad_norm": 0.62890625, "learning_rate": 3.801184337090653e-05, "loss": 1.0131, "step": 51695 }, { "epoch": 0.74, "grad_norm": 0.65625, "learning_rate": 3.7992199522030115e-05, "loss": 1.0319, "step": 51700 }, { "epoch": 0.74, "grad_norm": 0.5703125, "learning_rate": 3.797255955987326e-05, "loss": 1.0243, "step": 51705 }, { "epoch": 0.74, "grad_norm": 0.578125, "learning_rate": 3.7952923485667045e-05, "loss": 0.885, "step": 51710 }, { "epoch": 0.74, "grad_norm": 0.50390625, "learning_rate": 3.793329130064225e-05, "loss": 0.9075, "step": 51715 }, { "epoch": 0.74, "grad_norm": 0.5078125, "learning_rate": 3.791366300602941e-05, "loss": 1.0565, "step": 51720 }, { "epoch": 0.74, "grad_norm": 0.5859375, "learning_rate": 3.789403860305889e-05, "loss": 0.8994, "step": 51725 }, { "epoch": 0.74, "grad_norm": 0.578125, "learning_rate": 3.7874418092960796e-05, "loss": 0.9463, "step": 51730 }, { "epoch": 0.74, "grad_norm": 0.52734375, "learning_rate": 3.7854801476964895e-05, "loss": 0.9532, "step": 51735 }, { "epoch": 0.74, "grad_norm": 0.47265625, "learning_rate": 3.783518875630081e-05, "loss": 0.8684, "step": 51740 }, { "epoch": 0.74, "grad_norm": 0.56640625, "learning_rate": 3.781557993219794e-05, "loss": 0.904, "step": 51745 }, { "epoch": 0.74, "grad_norm": 0.51953125, "learning_rate": 3.77959750058853e-05, "loss": 0.8483, "step": 51750 }, { "epoch": 0.74, "grad_norm": 0.5859375, "learning_rate": 3.777637397859183e-05, "loss": 1.0518, "step": 51755 }, { "epoch": 0.74, "grad_norm": 0.57421875, "learning_rate": 3.775677685154606e-05, "loss": 0.8509, "step": 51760 }, { "epoch": 0.74, "grad_norm": 0.61328125, "learning_rate": 3.7737183625976446e-05, "loss": 0.9962, "step": 51765 }, { "epoch": 0.74, "grad_norm": 0.466796875, "learning_rate": 3.771759430311105e-05, "loss": 0.7977, "step": 51770 }, { "epoch": 0.74, "grad_norm": 0.58203125, "learning_rate": 3.7698008884177794e-05, "loss": 0.9294, "step": 51775 }, { "epoch": 0.74, "grad_norm": 0.578125, "learning_rate": 3.767842737040427e-05, "loss": 1.0377, "step": 51780 }, { "epoch": 0.74, "grad_norm": 0.69140625, "learning_rate": 3.76588497630179e-05, "loss": 1.0127, "step": 51785 }, { "epoch": 0.74, "grad_norm": 0.6328125, "learning_rate": 3.7639276063245855e-05, "loss": 1.0065, "step": 51790 }, { "epoch": 0.74, "grad_norm": 0.57421875, "learning_rate": 3.761970627231498e-05, "loss": 0.8589, "step": 51795 }, { "epoch": 0.74, "grad_norm": 0.55859375, "learning_rate": 3.760014039145197e-05, "loss": 0.8028, "step": 51800 }, { "epoch": 0.74, "grad_norm": 0.56640625, "learning_rate": 3.758057842188325e-05, "loss": 0.8983, "step": 51805 }, { "epoch": 0.74, "grad_norm": 0.52734375, "learning_rate": 3.756102036483493e-05, "loss": 0.9926, "step": 51810 }, { "epoch": 0.74, "grad_norm": 0.55859375, "learning_rate": 3.754146622153296e-05, "loss": 0.9784, "step": 51815 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.752191599320307e-05, "loss": 0.9877, "step": 51820 }, { "epoch": 0.74, "grad_norm": 0.64453125, "learning_rate": 3.7502369681070635e-05, "loss": 1.0543, "step": 51825 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.748282728636081e-05, "loss": 0.9123, "step": 51830 }, { "epoch": 0.74, "grad_norm": 0.57421875, "learning_rate": 3.746328881029858e-05, "loss": 0.8175, "step": 51835 }, { "epoch": 0.74, "grad_norm": 0.51953125, "learning_rate": 3.744375425410867e-05, "loss": 0.92, "step": 51840 }, { "epoch": 0.74, "grad_norm": 0.5859375, "learning_rate": 3.742422361901544e-05, "loss": 0.9807, "step": 51845 }, { "epoch": 0.74, "grad_norm": 0.59765625, "learning_rate": 3.740469690624314e-05, "loss": 0.9612, "step": 51850 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.738517411701578e-05, "loss": 0.9129, "step": 51855 }, { "epoch": 0.74, "grad_norm": 0.515625, "learning_rate": 3.7365655252556965e-05, "loss": 0.8131, "step": 51860 }, { "epoch": 0.74, "grad_norm": 0.58984375, "learning_rate": 3.7346140314090226e-05, "loss": 0.9964, "step": 51865 }, { "epoch": 0.74, "grad_norm": 0.59765625, "learning_rate": 3.732662930283883e-05, "loss": 1.0277, "step": 51870 }, { "epoch": 0.74, "grad_norm": 0.55859375, "learning_rate": 3.7307122220025625e-05, "loss": 0.9241, "step": 51875 }, { "epoch": 0.74, "grad_norm": 0.7578125, "learning_rate": 3.728761906687339e-05, "loss": 0.9222, "step": 51880 }, { "epoch": 0.74, "grad_norm": 0.5546875, "learning_rate": 3.726811984460467e-05, "loss": 1.0535, "step": 51885 }, { "epoch": 0.74, "grad_norm": 0.609375, "learning_rate": 3.724862455444159e-05, "loss": 1.073, "step": 51890 }, { "epoch": 0.74, "grad_norm": 0.63671875, "learning_rate": 3.72291331976062e-05, "loss": 0.9135, "step": 51895 }, { "epoch": 0.74, "grad_norm": 0.578125, "learning_rate": 3.7209645775320265e-05, "loss": 0.959, "step": 51900 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.7190162288805205e-05, "loss": 0.9446, "step": 51905 }, { "epoch": 0.74, "grad_norm": 0.765625, "learning_rate": 3.717068273928232e-05, "loss": 1.0108, "step": 51910 }, { "epoch": 0.74, "grad_norm": 0.498046875, "learning_rate": 3.715120712797262e-05, "loss": 0.8893, "step": 51915 }, { "epoch": 0.74, "grad_norm": 0.58984375, "learning_rate": 3.713173545609681e-05, "loss": 1.1068, "step": 51920 }, { "epoch": 0.74, "grad_norm": 0.5546875, "learning_rate": 3.711226772487545e-05, "loss": 0.7963, "step": 51925 }, { "epoch": 0.74, "grad_norm": 0.62109375, "learning_rate": 3.7092803935528734e-05, "loss": 0.9644, "step": 51930 }, { "epoch": 0.74, "grad_norm": 0.5625, "learning_rate": 3.7073344089276754e-05, "loss": 0.9825, "step": 51935 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.705388818733919e-05, "loss": 0.9845, "step": 51940 }, { "epoch": 0.75, "grad_norm": 0.61328125, "learning_rate": 3.703443623093562e-05, "loss": 0.9744, "step": 51945 }, { "epoch": 0.75, "grad_norm": 0.5859375, "learning_rate": 3.7014988221285315e-05, "loss": 0.8523, "step": 51950 }, { "epoch": 0.75, "grad_norm": 0.5546875, "learning_rate": 3.699554415960727e-05, "loss": 0.8706, "step": 51955 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.6976104047120264e-05, "loss": 1.1154, "step": 51960 }, { "epoch": 0.75, "grad_norm": 0.58203125, "learning_rate": 3.695666788504286e-05, "loss": 0.9287, "step": 51965 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.693723567459329e-05, "loss": 0.8874, "step": 51970 }, { "epoch": 0.75, "grad_norm": 0.48046875, "learning_rate": 3.691780741698964e-05, "loss": 0.9014, "step": 51975 }, { "epoch": 0.75, "grad_norm": 0.5078125, "learning_rate": 3.689838311344966e-05, "loss": 1.0094, "step": 51980 }, { "epoch": 0.75, "grad_norm": 0.58203125, "learning_rate": 3.687896276519086e-05, "loss": 1.0438, "step": 51985 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.6859546373430576e-05, "loss": 0.8317, "step": 51990 }, { "epoch": 0.75, "grad_norm": 0.58203125, "learning_rate": 3.6840133939385854e-05, "loss": 0.9784, "step": 51995 }, { "epoch": 0.75, "grad_norm": 0.5078125, "learning_rate": 3.682072546427344e-05, "loss": 1.0079, "step": 52000 }, { "epoch": 0.75, "grad_norm": 0.50390625, "learning_rate": 3.680132094930992e-05, "loss": 0.9256, "step": 52005 }, { "epoch": 0.75, "grad_norm": 0.494140625, "learning_rate": 3.678192039571161e-05, "loss": 0.8137, "step": 52010 }, { "epoch": 0.75, "grad_norm": 0.55078125, "learning_rate": 3.676252380469448e-05, "loss": 0.9156, "step": 52015 }, { "epoch": 0.75, "grad_norm": 0.61328125, "learning_rate": 3.67431311774744e-05, "loss": 0.9449, "step": 52020 }, { "epoch": 0.75, "grad_norm": 0.6015625, "learning_rate": 3.6723742515266924e-05, "loss": 1.0522, "step": 52025 }, { "epoch": 0.75, "grad_norm": 0.6015625, "learning_rate": 3.6704357819287336e-05, "loss": 0.887, "step": 52030 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.668497709075065e-05, "loss": 0.879, "step": 52035 }, { "epoch": 0.75, "grad_norm": 0.73046875, "learning_rate": 3.666560033087172e-05, "loss": 1.0117, "step": 52040 }, { "epoch": 0.75, "grad_norm": 0.66015625, "learning_rate": 3.6646227540865105e-05, "loss": 0.9591, "step": 52045 }, { "epoch": 0.75, "grad_norm": 0.546875, "learning_rate": 3.662685872194509e-05, "loss": 0.9551, "step": 52050 }, { "epoch": 0.75, "grad_norm": 0.57421875, "learning_rate": 3.660749387532574e-05, "loss": 0.8861, "step": 52055 }, { "epoch": 0.75, "grad_norm": 0.546875, "learning_rate": 3.658813300222091e-05, "loss": 1.0416, "step": 52060 }, { "epoch": 0.75, "grad_norm": 0.50390625, "learning_rate": 3.656877610384407e-05, "loss": 0.9706, "step": 52065 }, { "epoch": 0.75, "grad_norm": 0.58984375, "learning_rate": 3.65494231814086e-05, "loss": 1.0754, "step": 52070 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.6530074236127585e-05, "loss": 0.869, "step": 52075 }, { "epoch": 0.75, "grad_norm": 0.578125, "learning_rate": 3.6510729269213805e-05, "loss": 0.9369, "step": 52080 }, { "epoch": 0.75, "grad_norm": 0.58984375, "learning_rate": 3.649138828187978e-05, "loss": 1.013, "step": 52085 }, { "epoch": 0.75, "grad_norm": 0.58203125, "learning_rate": 3.6472051275337904e-05, "loss": 1.021, "step": 52090 }, { "epoch": 0.75, "grad_norm": 0.61328125, "learning_rate": 3.6452718250800174e-05, "loss": 0.9296, "step": 52095 }, { "epoch": 0.75, "grad_norm": 0.68359375, "learning_rate": 3.643338920947844e-05, "loss": 1.0464, "step": 52100 }, { "epoch": 0.75, "grad_norm": 0.58984375, "learning_rate": 3.64140641525843e-05, "loss": 0.9818, "step": 52105 }, { "epoch": 0.75, "grad_norm": 0.78125, "learning_rate": 3.639474308132901e-05, "loss": 1.1438, "step": 52110 }, { "epoch": 0.75, "grad_norm": 0.51953125, "learning_rate": 3.637542599692365e-05, "loss": 0.9877, "step": 52115 }, { "epoch": 0.75, "grad_norm": 0.61328125, "learning_rate": 3.63561129005791e-05, "loss": 0.9625, "step": 52120 }, { "epoch": 0.75, "grad_norm": 0.5, "learning_rate": 3.633680379350583e-05, "loss": 0.8335, "step": 52125 }, { "epoch": 0.75, "grad_norm": 0.5859375, "learning_rate": 3.631749867691421e-05, "loss": 1.027, "step": 52130 }, { "epoch": 0.75, "grad_norm": 0.5859375, "learning_rate": 3.6298197552014336e-05, "loss": 0.8022, "step": 52135 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.627890042001598e-05, "loss": 1.0426, "step": 52140 }, { "epoch": 0.75, "grad_norm": 0.60546875, "learning_rate": 3.625960728212869e-05, "loss": 1.2428, "step": 52145 }, { "epoch": 0.75, "grad_norm": 0.55078125, "learning_rate": 3.6240318139561826e-05, "loss": 0.882, "step": 52150 }, { "epoch": 0.75, "grad_norm": 0.55078125, "learning_rate": 3.622103299352445e-05, "loss": 0.9101, "step": 52155 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.620175184522534e-05, "loss": 0.9604, "step": 52160 }, { "epoch": 0.75, "grad_norm": 0.63671875, "learning_rate": 3.6182474695873084e-05, "loss": 1.1251, "step": 52165 }, { "epoch": 0.75, "grad_norm": 0.50390625, "learning_rate": 3.616320154667603e-05, "loss": 1.1223, "step": 52170 }, { "epoch": 0.75, "grad_norm": 0.609375, "learning_rate": 3.614393239884216e-05, "loss": 0.8701, "step": 52175 }, { "epoch": 0.75, "grad_norm": 0.484375, "learning_rate": 3.612466725357935e-05, "loss": 0.8804, "step": 52180 }, { "epoch": 0.75, "grad_norm": 0.56640625, "learning_rate": 3.6105406112095207e-05, "loss": 1.0503, "step": 52185 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.6086148975596914e-05, "loss": 0.9704, "step": 52190 }, { "epoch": 0.75, "grad_norm": 0.54296875, "learning_rate": 3.6066895845291595e-05, "loss": 0.9705, "step": 52195 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.604764672238609e-05, "loss": 0.9165, "step": 52200 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.602840160808688e-05, "loss": 0.9666, "step": 52205 }, { "epoch": 0.75, "grad_norm": 0.58984375, "learning_rate": 3.6009160503600326e-05, "loss": 0.9914, "step": 52210 }, { "epoch": 0.75, "grad_norm": 0.5, "learning_rate": 3.5989923410132495e-05, "loss": 0.9454, "step": 52215 }, { "epoch": 0.75, "grad_norm": 0.458984375, "learning_rate": 3.597069032888915e-05, "loss": 0.9694, "step": 52220 }, { "epoch": 0.75, "grad_norm": 0.51171875, "learning_rate": 3.5951461261075845e-05, "loss": 0.9661, "step": 52225 }, { "epoch": 0.75, "grad_norm": 0.48828125, "learning_rate": 3.593223620789793e-05, "loss": 1.0651, "step": 52230 }, { "epoch": 0.75, "grad_norm": 0.5234375, "learning_rate": 3.5913015170560385e-05, "loss": 0.933, "step": 52235 }, { "epoch": 0.75, "grad_norm": 0.5234375, "learning_rate": 3.589379815026806e-05, "loss": 0.9405, "step": 52240 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.5874585148225456e-05, "loss": 0.9426, "step": 52245 }, { "epoch": 0.75, "grad_norm": 0.609375, "learning_rate": 3.5855376165636924e-05, "loss": 0.9903, "step": 52250 }, { "epoch": 0.75, "grad_norm": 0.51953125, "learning_rate": 3.5836171203706425e-05, "loss": 1.2026, "step": 52255 }, { "epoch": 0.75, "grad_norm": 0.49609375, "learning_rate": 3.5816970263637796e-05, "loss": 0.965, "step": 52260 }, { "epoch": 0.75, "grad_norm": 0.455078125, "learning_rate": 3.579777334663461e-05, "loss": 0.9093, "step": 52265 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.577858045390007e-05, "loss": 1.1153, "step": 52270 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.575939158663725e-05, "loss": 0.9002, "step": 52275 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.5740206746048965e-05, "loss": 0.9292, "step": 52280 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.572102593333767e-05, "loss": 0.8759, "step": 52285 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.570184914970571e-05, "loss": 0.946, "step": 52290 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.568267639635507e-05, "loss": 0.9866, "step": 52295 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.5663507674487505e-05, "loss": 0.892, "step": 52300 }, { "epoch": 0.75, "grad_norm": 0.48046875, "learning_rate": 3.5644342985304545e-05, "loss": 0.8986, "step": 52305 }, { "epoch": 0.75, "grad_norm": 0.52734375, "learning_rate": 3.562518233000749e-05, "loss": 0.9364, "step": 52310 }, { "epoch": 0.75, "grad_norm": 0.4609375, "learning_rate": 3.56060257097973e-05, "loss": 0.8437, "step": 52315 }, { "epoch": 0.75, "grad_norm": 0.61328125, "learning_rate": 3.558687312587474e-05, "loss": 0.9295, "step": 52320 }, { "epoch": 0.75, "grad_norm": 0.58203125, "learning_rate": 3.5567724579440386e-05, "loss": 1.0193, "step": 52325 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.554858007169439e-05, "loss": 1.019, "step": 52330 }, { "epoch": 0.75, "grad_norm": 0.515625, "learning_rate": 3.552943960383681e-05, "loss": 0.9998, "step": 52335 }, { "epoch": 0.75, "grad_norm": 0.60546875, "learning_rate": 3.551030317706742e-05, "loss": 1.1862, "step": 52340 }, { "epoch": 0.75, "grad_norm": 0.57421875, "learning_rate": 3.549117079258566e-05, "loss": 1.1737, "step": 52345 }, { "epoch": 0.75, "grad_norm": 0.57421875, "learning_rate": 3.547204245159075e-05, "loss": 0.9703, "step": 52350 }, { "epoch": 0.75, "grad_norm": 0.6953125, "learning_rate": 3.545291815528171e-05, "loss": 0.9504, "step": 52355 }, { "epoch": 0.75, "grad_norm": 0.63671875, "learning_rate": 3.543379790485732e-05, "loss": 0.8241, "step": 52360 }, { "epoch": 0.75, "grad_norm": 0.56640625, "learning_rate": 3.541468170151597e-05, "loss": 0.9014, "step": 52365 }, { "epoch": 0.75, "grad_norm": 0.5859375, "learning_rate": 3.539556954645593e-05, "loss": 0.9119, "step": 52370 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.53764614408752e-05, "loss": 0.9338, "step": 52375 }, { "epoch": 0.75, "grad_norm": 0.55078125, "learning_rate": 3.535735738597144e-05, "loss": 0.9221, "step": 52380 }, { "epoch": 0.75, "grad_norm": 0.49609375, "learning_rate": 3.533825738294213e-05, "loss": 0.888, "step": 52385 }, { "epoch": 0.75, "grad_norm": 0.66796875, "learning_rate": 3.5319161432984525e-05, "loss": 1.0516, "step": 52390 }, { "epoch": 0.75, "grad_norm": 0.57421875, "learning_rate": 3.5300069537295556e-05, "loss": 0.8644, "step": 52395 }, { "epoch": 0.75, "grad_norm": 0.609375, "learning_rate": 3.528098169707187e-05, "loss": 0.8862, "step": 52400 }, { "epoch": 0.75, "grad_norm": 0.640625, "learning_rate": 3.5261897913509964e-05, "loss": 1.0723, "step": 52405 }, { "epoch": 0.75, "grad_norm": 0.62109375, "learning_rate": 3.524281818780607e-05, "loss": 0.9493, "step": 52410 }, { "epoch": 0.75, "grad_norm": 0.5078125, "learning_rate": 3.522374252115604e-05, "loss": 0.9562, "step": 52415 }, { "epoch": 0.75, "grad_norm": 0.6328125, "learning_rate": 3.520467091475561e-05, "loss": 0.9403, "step": 52420 }, { "epoch": 0.75, "grad_norm": 0.58984375, "learning_rate": 3.518560336980024e-05, "loss": 0.9999, "step": 52425 }, { "epoch": 0.75, "grad_norm": 0.57421875, "learning_rate": 3.516653988748503e-05, "loss": 0.866, "step": 52430 }, { "epoch": 0.75, "grad_norm": 0.51171875, "learning_rate": 3.514748046900497e-05, "loss": 0.9026, "step": 52435 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.5128425115554656e-05, "loss": 1.0292, "step": 52440 }, { "epoch": 0.75, "grad_norm": 0.5, "learning_rate": 3.510937382832854e-05, "loss": 0.9959, "step": 52445 }, { "epoch": 0.75, "grad_norm": 0.59375, "learning_rate": 3.509032660852082e-05, "loss": 1.0212, "step": 52450 }, { "epoch": 0.75, "grad_norm": 0.5546875, "learning_rate": 3.5071283457325344e-05, "loss": 0.8349, "step": 52455 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.5052244375935736e-05, "loss": 0.9994, "step": 52460 }, { "epoch": 0.75, "grad_norm": 0.52734375, "learning_rate": 3.503320936554543e-05, "loss": 0.8113, "step": 52465 }, { "epoch": 0.75, "grad_norm": 0.578125, "learning_rate": 3.501417842734758e-05, "loss": 0.97, "step": 52470 }, { "epoch": 0.75, "grad_norm": 0.625, "learning_rate": 3.4995151562535e-05, "loss": 0.9237, "step": 52475 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.4976128772300364e-05, "loss": 1.1457, "step": 52480 }, { "epoch": 0.75, "grad_norm": 0.6640625, "learning_rate": 3.4957110057836065e-05, "loss": 1.0911, "step": 52485 }, { "epoch": 0.75, "grad_norm": 0.494140625, "learning_rate": 3.493809542033414e-05, "loss": 0.8484, "step": 52490 }, { "epoch": 0.75, "grad_norm": 0.54296875, "learning_rate": 3.4919084860986506e-05, "loss": 1.061, "step": 52495 }, { "epoch": 0.75, "grad_norm": 0.61328125, "learning_rate": 3.490007838098478e-05, "loss": 0.9506, "step": 52500 }, { "epoch": 0.75, "grad_norm": 0.578125, "learning_rate": 3.4881075981520284e-05, "loss": 0.8539, "step": 52505 }, { "epoch": 0.75, "grad_norm": 0.625, "learning_rate": 3.4862077663784074e-05, "loss": 0.962, "step": 52510 }, { "epoch": 0.75, "grad_norm": 0.494140625, "learning_rate": 3.484308342896703e-05, "loss": 0.8708, "step": 52515 }, { "epoch": 0.75, "grad_norm": 0.482421875, "learning_rate": 3.482409327825975e-05, "loss": 0.8459, "step": 52520 }, { "epoch": 0.75, "grad_norm": 0.59765625, "learning_rate": 3.4805107212852504e-05, "loss": 0.9101, "step": 52525 }, { "epoch": 0.75, "grad_norm": 0.5234375, "learning_rate": 3.4786125233935386e-05, "loss": 0.9337, "step": 52530 }, { "epoch": 0.75, "grad_norm": 0.90234375, "learning_rate": 3.4767147342698244e-05, "loss": 0.8437, "step": 52535 }, { "epoch": 0.75, "grad_norm": 0.51171875, "learning_rate": 3.474817354033058e-05, "loss": 1.005, "step": 52540 }, { "epoch": 0.75, "grad_norm": 0.62109375, "learning_rate": 3.4729203828021694e-05, "loss": 0.9563, "step": 52545 }, { "epoch": 0.75, "grad_norm": 0.546875, "learning_rate": 3.47102382069607e-05, "loss": 0.9094, "step": 52550 }, { "epoch": 0.75, "grad_norm": 0.5546875, "learning_rate": 3.469127667833631e-05, "loss": 0.8738, "step": 52555 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.467231924333707e-05, "loss": 0.8692, "step": 52560 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.465336590315128e-05, "loss": 0.9131, "step": 52565 }, { "epoch": 0.75, "grad_norm": 0.56640625, "learning_rate": 3.463441665896692e-05, "loss": 0.9032, "step": 52570 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.461547151197175e-05, "loss": 1.0797, "step": 52575 }, { "epoch": 0.75, "grad_norm": 0.58203125, "learning_rate": 3.4596530463353336e-05, "loss": 0.9467, "step": 52580 }, { "epoch": 0.75, "grad_norm": 0.66015625, "learning_rate": 3.457759351429884e-05, "loss": 0.8499, "step": 52585 }, { "epoch": 0.75, "grad_norm": 0.453125, "learning_rate": 3.455866066599531e-05, "loss": 0.8842, "step": 52590 }, { "epoch": 0.75, "grad_norm": 0.57421875, "learning_rate": 3.453973191962948e-05, "loss": 0.7765, "step": 52595 }, { "epoch": 0.75, "grad_norm": 0.57421875, "learning_rate": 3.452080727638778e-05, "loss": 1.0101, "step": 52600 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.450188673745648e-05, "loss": 0.9075, "step": 52605 }, { "epoch": 0.75, "grad_norm": 0.5703125, "learning_rate": 3.448297030402149e-05, "loss": 0.9609, "step": 52610 }, { "epoch": 0.75, "grad_norm": 0.765625, "learning_rate": 3.446405797726857e-05, "loss": 1.009, "step": 52615 }, { "epoch": 0.75, "grad_norm": 0.66015625, "learning_rate": 3.4445149758383096e-05, "loss": 1.0084, "step": 52620 }, { "epoch": 0.75, "grad_norm": 0.8671875, "learning_rate": 3.44262456485503e-05, "loss": 0.8443, "step": 52625 }, { "epoch": 0.75, "grad_norm": 0.515625, "learning_rate": 3.440734564895515e-05, "loss": 0.7951, "step": 52630 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.438844976078224e-05, "loss": 0.9905, "step": 52635 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.436955798521602e-05, "loss": 0.995, "step": 52640 }, { "epoch": 0.76, "grad_norm": 0.57421875, "learning_rate": 3.4350670323440684e-05, "loss": 1.0952, "step": 52645 }, { "epoch": 0.76, "grad_norm": 0.5859375, "learning_rate": 3.4331786776640075e-05, "loss": 0.8872, "step": 52650 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.431290734599785e-05, "loss": 0.9607, "step": 52655 }, { "epoch": 0.76, "grad_norm": 0.64453125, "learning_rate": 3.429403203269748e-05, "loss": 1.0511, "step": 52660 }, { "epoch": 0.76, "grad_norm": 0.515625, "learning_rate": 3.427516083792194e-05, "loss": 0.9045, "step": 52665 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 3.425629376285418e-05, "loss": 1.0049, "step": 52670 }, { "epoch": 0.76, "grad_norm": 0.5625, "learning_rate": 3.423743080867684e-05, "loss": 1.0459, "step": 52675 }, { "epoch": 0.76, "grad_norm": 0.48828125, "learning_rate": 3.421857197657219e-05, "loss": 0.9, "step": 52680 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.419971726772238e-05, "loss": 1.0364, "step": 52685 }, { "epoch": 0.76, "grad_norm": 0.55859375, "learning_rate": 3.4180866683309255e-05, "loss": 1.015, "step": 52690 }, { "epoch": 0.76, "grad_norm": 0.69140625, "learning_rate": 3.416202022451433e-05, "loss": 0.9177, "step": 52695 }, { "epoch": 0.76, "grad_norm": 0.59765625, "learning_rate": 3.4143177892518975e-05, "loss": 1.0352, "step": 52700 }, { "epoch": 0.76, "grad_norm": 0.55859375, "learning_rate": 3.412433968850426e-05, "loss": 1.0523, "step": 52705 }, { "epoch": 0.76, "grad_norm": 0.51953125, "learning_rate": 3.4105505613650956e-05, "loss": 0.886, "step": 52710 }, { "epoch": 0.76, "grad_norm": 0.578125, "learning_rate": 3.408667566913958e-05, "loss": 0.9176, "step": 52715 }, { "epoch": 0.76, "grad_norm": 0.59765625, "learning_rate": 3.406784985615044e-05, "loss": 0.9577, "step": 52720 }, { "epoch": 0.76, "grad_norm": 0.57421875, "learning_rate": 3.40490281758636e-05, "loss": 1.008, "step": 52725 }, { "epoch": 0.76, "grad_norm": 0.5625, "learning_rate": 3.403021062945875e-05, "loss": 0.8865, "step": 52730 }, { "epoch": 0.76, "grad_norm": 0.74609375, "learning_rate": 3.4011397218115425e-05, "loss": 0.9519, "step": 52735 }, { "epoch": 0.76, "grad_norm": 0.5859375, "learning_rate": 3.399258794301291e-05, "loss": 1.0096, "step": 52740 }, { "epoch": 0.76, "grad_norm": 0.5390625, "learning_rate": 3.3973782805330135e-05, "loss": 1.1155, "step": 52745 }, { "epoch": 0.76, "grad_norm": 0.50390625, "learning_rate": 3.395498180624584e-05, "loss": 0.9467, "step": 52750 }, { "epoch": 0.76, "grad_norm": 0.5625, "learning_rate": 3.3936184946938544e-05, "loss": 0.8723, "step": 52755 }, { "epoch": 0.76, "grad_norm": 0.56640625, "learning_rate": 3.391739222858639e-05, "loss": 0.928, "step": 52760 }, { "epoch": 0.76, "grad_norm": 0.54296875, "learning_rate": 3.3898603652367364e-05, "loss": 0.8712, "step": 52765 }, { "epoch": 0.76, "grad_norm": 0.5859375, "learning_rate": 3.387981921945916e-05, "loss": 0.9256, "step": 52770 }, { "epoch": 0.76, "grad_norm": 0.6171875, "learning_rate": 3.386103893103916e-05, "loss": 1.0196, "step": 52775 }, { "epoch": 0.76, "grad_norm": 0.57421875, "learning_rate": 3.384226278828456e-05, "loss": 0.8886, "step": 52780 }, { "epoch": 0.76, "grad_norm": 0.63671875, "learning_rate": 3.382349079237232e-05, "loss": 1.0258, "step": 52785 }, { "epoch": 0.76, "grad_norm": 0.5234375, "learning_rate": 3.3804722944479004e-05, "loss": 0.8668, "step": 52790 }, { "epoch": 0.76, "grad_norm": 0.5625, "learning_rate": 3.378595924578104e-05, "loss": 0.8021, "step": 52795 }, { "epoch": 0.76, "grad_norm": 0.5703125, "learning_rate": 3.37671996974546e-05, "loss": 0.9738, "step": 52800 }, { "epoch": 0.76, "grad_norm": 0.5234375, "learning_rate": 3.3748444300675484e-05, "loss": 0.8922, "step": 52805 }, { "epoch": 0.76, "grad_norm": 0.7265625, "learning_rate": 3.372969305661934e-05, "loss": 0.9705, "step": 52810 }, { "epoch": 0.76, "grad_norm": 0.6015625, "learning_rate": 3.371094596646153e-05, "loss": 0.9663, "step": 52815 }, { "epoch": 0.76, "grad_norm": 0.5078125, "learning_rate": 3.369220303137712e-05, "loss": 1.0698, "step": 52820 }, { "epoch": 0.76, "grad_norm": 0.5625, "learning_rate": 3.367346425254093e-05, "loss": 0.8487, "step": 52825 }, { "epoch": 0.76, "grad_norm": 0.6015625, "learning_rate": 3.365472963112752e-05, "loss": 0.9631, "step": 52830 }, { "epoch": 0.76, "grad_norm": 0.5546875, "learning_rate": 3.363599916831126e-05, "loss": 0.9238, "step": 52835 }, { "epoch": 0.76, "grad_norm": 0.5, "learning_rate": 3.361727286526612e-05, "loss": 0.9257, "step": 52840 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 3.359855072316592e-05, "loss": 0.9111, "step": 52845 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.357983274318422e-05, "loss": 0.9004, "step": 52850 }, { "epoch": 0.76, "grad_norm": 0.55859375, "learning_rate": 3.356111892649423e-05, "loss": 1.0007, "step": 52855 }, { "epoch": 0.76, "grad_norm": 0.671875, "learning_rate": 3.354240927426895e-05, "loss": 1.0987, "step": 52860 }, { "epoch": 0.76, "grad_norm": 0.60546875, "learning_rate": 3.352370378768119e-05, "loss": 1.0265, "step": 52865 }, { "epoch": 0.76, "grad_norm": 0.6171875, "learning_rate": 3.350500246790339e-05, "loss": 1.024, "step": 52870 }, { "epoch": 0.76, "grad_norm": 0.5703125, "learning_rate": 3.348630531610773e-05, "loss": 0.8475, "step": 52875 }, { "epoch": 0.76, "grad_norm": 0.5859375, "learning_rate": 3.346761233346624e-05, "loss": 0.8669, "step": 52880 }, { "epoch": 0.76, "grad_norm": 0.55859375, "learning_rate": 3.344892352115055e-05, "loss": 0.8269, "step": 52885 }, { "epoch": 0.76, "grad_norm": 0.4765625, "learning_rate": 3.3430238880332124e-05, "loss": 0.868, "step": 52890 }, { "epoch": 0.76, "grad_norm": 0.58984375, "learning_rate": 3.3411558412182165e-05, "loss": 0.8541, "step": 52895 }, { "epoch": 0.76, "grad_norm": 0.53125, "learning_rate": 3.339288211787155e-05, "loss": 0.8378, "step": 52900 }, { "epoch": 0.76, "grad_norm": 0.5625, "learning_rate": 3.3374209998570924e-05, "loss": 0.9926, "step": 52905 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.335554205545075e-05, "loss": 0.9391, "step": 52910 }, { "epoch": 0.76, "grad_norm": 0.609375, "learning_rate": 3.333687828968105e-05, "loss": 0.9534, "step": 52915 }, { "epoch": 0.76, "grad_norm": 0.56640625, "learning_rate": 3.331821870243179e-05, "loss": 0.9321, "step": 52920 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.3299563294872485e-05, "loss": 0.9496, "step": 52925 }, { "epoch": 0.76, "grad_norm": 0.57421875, "learning_rate": 3.3280912068172554e-05, "loss": 1.0303, "step": 52930 }, { "epoch": 0.76, "grad_norm": 0.81640625, "learning_rate": 3.3262265023501e-05, "loss": 1.015, "step": 52935 }, { "epoch": 0.76, "grad_norm": 0.53515625, "learning_rate": 3.324362216202669e-05, "loss": 1.0326, "step": 52940 }, { "epoch": 0.76, "grad_norm": 0.60546875, "learning_rate": 3.3224983484918205e-05, "loss": 0.8786, "step": 52945 }, { "epoch": 0.76, "grad_norm": 0.54296875, "learning_rate": 3.320634899334377e-05, "loss": 0.9051, "step": 52950 }, { "epoch": 0.76, "grad_norm": 0.53515625, "learning_rate": 3.3187718688471446e-05, "loss": 0.9922, "step": 52955 }, { "epoch": 0.76, "grad_norm": 0.5546875, "learning_rate": 3.316909257146905e-05, "loss": 0.9637, "step": 52960 }, { "epoch": 0.76, "grad_norm": 0.53515625, "learning_rate": 3.315047064350402e-05, "loss": 0.9554, "step": 52965 }, { "epoch": 0.76, "grad_norm": 0.455078125, "learning_rate": 3.313185290574361e-05, "loss": 0.7879, "step": 52970 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.311323935935489e-05, "loss": 1.035, "step": 52975 }, { "epoch": 0.76, "grad_norm": 0.51953125, "learning_rate": 3.3094630005504435e-05, "loss": 0.9484, "step": 52980 }, { "epoch": 0.76, "grad_norm": 0.640625, "learning_rate": 3.307602484535877e-05, "loss": 1.1787, "step": 52985 }, { "epoch": 0.76, "grad_norm": 0.5703125, "learning_rate": 3.3057423880084114e-05, "loss": 0.8673, "step": 52990 }, { "epoch": 0.76, "grad_norm": 0.5546875, "learning_rate": 3.303882711084635e-05, "loss": 0.9424, "step": 52995 }, { "epoch": 0.76, "grad_norm": 0.51171875, "learning_rate": 3.3020234538811155e-05, "loss": 0.9029, "step": 53000 }, { "epoch": 0.76, "grad_norm": 0.5859375, "learning_rate": 3.3001646165143986e-05, "loss": 1.0614, "step": 53005 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.29830619910099e-05, "loss": 1.0087, "step": 53010 }, { "epoch": 0.76, "grad_norm": 0.56640625, "learning_rate": 3.2964482017573816e-05, "loss": 0.9137, "step": 53015 }, { "epoch": 0.76, "grad_norm": 0.57421875, "learning_rate": 3.294590624600038e-05, "loss": 0.9672, "step": 53020 }, { "epoch": 0.76, "grad_norm": 0.5078125, "learning_rate": 3.29273346774539e-05, "loss": 0.932, "step": 53025 }, { "epoch": 0.76, "grad_norm": 0.5546875, "learning_rate": 3.2908767313098453e-05, "loss": 0.9715, "step": 53030 }, { "epoch": 0.76, "grad_norm": 0.65234375, "learning_rate": 3.289020415409787e-05, "loss": 0.9634, "step": 53035 }, { "epoch": 0.76, "grad_norm": 0.5390625, "learning_rate": 3.2871645201615765e-05, "loss": 1.0967, "step": 53040 }, { "epoch": 0.76, "grad_norm": 0.5625, "learning_rate": 3.285309045681535e-05, "loss": 1.0092, "step": 53045 }, { "epoch": 0.76, "grad_norm": 0.498046875, "learning_rate": 3.283453992085971e-05, "loss": 0.902, "step": 53050 }, { "epoch": 0.76, "grad_norm": 0.578125, "learning_rate": 3.281599359491162e-05, "loss": 0.9241, "step": 53055 }, { "epoch": 0.76, "grad_norm": 0.51171875, "learning_rate": 3.279745148013354e-05, "loss": 0.9882, "step": 53060 }, { "epoch": 0.76, "grad_norm": 0.50390625, "learning_rate": 3.2778913577687754e-05, "loss": 0.9183, "step": 53065 }, { "epoch": 0.76, "grad_norm": 0.58203125, "learning_rate": 3.2760379888736236e-05, "loss": 1.0216, "step": 53070 }, { "epoch": 0.76, "grad_norm": 0.5546875, "learning_rate": 3.274185041444067e-05, "loss": 1.0704, "step": 53075 }, { "epoch": 0.76, "grad_norm": 0.56640625, "learning_rate": 3.272332515596254e-05, "loss": 1.0757, "step": 53080 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.270480411446298e-05, "loss": 0.9288, "step": 53085 }, { "epoch": 0.76, "grad_norm": 0.5078125, "learning_rate": 3.268628729110298e-05, "loss": 1.0104, "step": 53090 }, { "epoch": 0.76, "grad_norm": 0.49609375, "learning_rate": 3.2667774687043116e-05, "loss": 0.8726, "step": 53095 }, { "epoch": 0.76, "grad_norm": 0.5390625, "learning_rate": 3.264926630344386e-05, "loss": 0.9184, "step": 53100 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.2630762141465265e-05, "loss": 0.9145, "step": 53105 }, { "epoch": 0.76, "grad_norm": 0.6015625, "learning_rate": 3.261226220226722e-05, "loss": 1.1971, "step": 53110 }, { "epoch": 0.76, "grad_norm": 0.578125, "learning_rate": 3.259376648700937e-05, "loss": 0.8967, "step": 53115 }, { "epoch": 0.76, "grad_norm": 0.478515625, "learning_rate": 3.257527499685096e-05, "loss": 0.9529, "step": 53120 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 3.255678773295111e-05, "loss": 1.1128, "step": 53125 }, { "epoch": 0.76, "grad_norm": 0.6015625, "learning_rate": 3.253830469646866e-05, "loss": 0.8973, "step": 53130 }, { "epoch": 0.76, "grad_norm": 0.474609375, "learning_rate": 3.251982588856208e-05, "loss": 1.0136, "step": 53135 }, { "epoch": 0.76, "grad_norm": 0.5546875, "learning_rate": 3.250135131038966e-05, "loss": 0.9316, "step": 53140 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.24828809631094e-05, "loss": 0.8264, "step": 53145 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.2464414847879084e-05, "loss": 0.972, "step": 53150 }, { "epoch": 0.76, "grad_norm": 0.65625, "learning_rate": 3.244595296585614e-05, "loss": 0.847, "step": 53155 }, { "epoch": 0.76, "grad_norm": 0.59375, "learning_rate": 3.24274953181978e-05, "loss": 0.9734, "step": 53160 }, { "epoch": 0.76, "grad_norm": 0.6015625, "learning_rate": 3.240904190606105e-05, "loss": 1.0219, "step": 53165 }, { "epoch": 0.76, "grad_norm": 0.578125, "learning_rate": 3.239059273060249e-05, "loss": 0.9728, "step": 53170 }, { "epoch": 0.76, "grad_norm": 0.53515625, "learning_rate": 3.237214779297858e-05, "loss": 1.1157, "step": 53175 }, { "epoch": 0.76, "grad_norm": 1.15625, "learning_rate": 3.235370709434551e-05, "loss": 0.9806, "step": 53180 }, { "epoch": 0.76, "grad_norm": 0.515625, "learning_rate": 3.23352706358591e-05, "loss": 0.942, "step": 53185 }, { "epoch": 0.76, "grad_norm": 0.494140625, "learning_rate": 3.2316838418674975e-05, "loss": 1.0261, "step": 53190 }, { "epoch": 0.76, "grad_norm": 0.48828125, "learning_rate": 3.2298410443948504e-05, "loss": 0.9171, "step": 53195 }, { "epoch": 0.76, "grad_norm": 0.6640625, "learning_rate": 3.22799867128348e-05, "loss": 0.9859, "step": 53200 }, { "epoch": 0.76, "grad_norm": 0.55859375, "learning_rate": 3.2261567226488634e-05, "loss": 0.7889, "step": 53205 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 3.2243151986064565e-05, "loss": 1.0395, "step": 53210 }, { "epoch": 0.76, "grad_norm": 0.57421875, "learning_rate": 3.222474099271694e-05, "loss": 0.9108, "step": 53215 }, { "epoch": 0.76, "grad_norm": 0.51171875, "learning_rate": 3.22063342475997e-05, "loss": 1.0814, "step": 53220 }, { "epoch": 0.76, "grad_norm": 0.5546875, "learning_rate": 3.218793175186664e-05, "loss": 0.9087, "step": 53225 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 3.216953350667129e-05, "loss": 0.9244, "step": 53230 }, { "epoch": 0.76, "grad_norm": 0.482421875, "learning_rate": 3.2151139513166825e-05, "loss": 0.8413, "step": 53235 }, { "epoch": 0.76, "grad_norm": 0.5703125, "learning_rate": 3.2132749772506176e-05, "loss": 0.9695, "step": 53240 }, { "epoch": 0.76, "grad_norm": 0.5234375, "learning_rate": 3.2114364285842104e-05, "loss": 0.8746, "step": 53245 }, { "epoch": 0.76, "grad_norm": 0.4921875, "learning_rate": 3.2095983054326964e-05, "loss": 1.1472, "step": 53250 }, { "epoch": 0.76, "grad_norm": 0.6015625, "learning_rate": 3.2077606079112934e-05, "loss": 1.0815, "step": 53255 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.205923336135195e-05, "loss": 1.0107, "step": 53260 }, { "epoch": 0.76, "grad_norm": 0.5703125, "learning_rate": 3.2040864902195556e-05, "loss": 1.0251, "step": 53265 }, { "epoch": 0.76, "grad_norm": 0.6328125, "learning_rate": 3.2022500702795156e-05, "loss": 0.8614, "step": 53270 }, { "epoch": 0.76, "grad_norm": 0.53515625, "learning_rate": 3.200414076430186e-05, "loss": 0.9612, "step": 53275 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.198578508786643e-05, "loss": 0.9162, "step": 53280 }, { "epoch": 0.76, "grad_norm": 0.62890625, "learning_rate": 3.196743367463946e-05, "loss": 1.081, "step": 53285 }, { "epoch": 0.76, "grad_norm": 0.60546875, "learning_rate": 3.194908652577126e-05, "loss": 1.0349, "step": 53290 }, { "epoch": 0.76, "grad_norm": 0.51953125, "learning_rate": 3.193074364241181e-05, "loss": 0.8826, "step": 53295 }, { "epoch": 0.76, "grad_norm": 0.57421875, "learning_rate": 3.191240502571085e-05, "loss": 1.0065, "step": 53300 }, { "epoch": 0.76, "grad_norm": 0.5078125, "learning_rate": 3.1894070676817876e-05, "loss": 0.9465, "step": 53305 }, { "epoch": 0.76, "grad_norm": 0.50390625, "learning_rate": 3.187574059688216e-05, "loss": 0.8663, "step": 53310 }, { "epoch": 0.76, "grad_norm": 0.56640625, "learning_rate": 3.185741478705259e-05, "loss": 1.1527, "step": 53315 }, { "epoch": 0.76, "grad_norm": 0.640625, "learning_rate": 3.1839093248477846e-05, "loss": 1.0791, "step": 53320 }, { "epoch": 0.76, "grad_norm": 0.58984375, "learning_rate": 3.1820775982306417e-05, "loss": 0.922, "step": 53325 }, { "epoch": 0.76, "grad_norm": 0.51953125, "learning_rate": 3.1802462989686355e-05, "loss": 0.9416, "step": 53330 }, { "epoch": 0.77, "grad_norm": 0.67578125, "learning_rate": 3.178415427176559e-05, "loss": 0.9373, "step": 53335 }, { "epoch": 0.77, "grad_norm": 0.6796875, "learning_rate": 3.1765849829691785e-05, "loss": 1.1576, "step": 53340 }, { "epoch": 0.77, "grad_norm": 0.54296875, "learning_rate": 3.1747549664612165e-05, "loss": 0.9918, "step": 53345 }, { "epoch": 0.77, "grad_norm": 0.5546875, "learning_rate": 3.1729253777673864e-05, "loss": 1.1734, "step": 53350 }, { "epoch": 0.77, "grad_norm": 0.51953125, "learning_rate": 3.1710962170023715e-05, "loss": 0.8821, "step": 53355 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 3.169267484280821e-05, "loss": 0.8931, "step": 53360 }, { "epoch": 0.77, "grad_norm": 0.59375, "learning_rate": 3.167439179717364e-05, "loss": 0.9137, "step": 53365 }, { "epoch": 0.77, "grad_norm": 0.6171875, "learning_rate": 3.1656113034266046e-05, "loss": 1.0828, "step": 53370 }, { "epoch": 0.77, "grad_norm": 0.54296875, "learning_rate": 3.163783855523108e-05, "loss": 0.9159, "step": 53375 }, { "epoch": 0.77, "grad_norm": 0.72265625, "learning_rate": 3.1619568361214256e-05, "loss": 1.0501, "step": 53380 }, { "epoch": 0.77, "grad_norm": 0.5, "learning_rate": 3.1601302453360796e-05, "loss": 1.0275, "step": 53385 }, { "epoch": 0.77, "grad_norm": 0.50390625, "learning_rate": 3.158304083281557e-05, "loss": 0.9749, "step": 53390 }, { "epoch": 0.77, "grad_norm": 0.65234375, "learning_rate": 3.1564783500723296e-05, "loss": 0.9175, "step": 53395 }, { "epoch": 0.77, "grad_norm": 0.494140625, "learning_rate": 3.154653045822829e-05, "loss": 1.0527, "step": 53400 }, { "epoch": 0.77, "grad_norm": 0.466796875, "learning_rate": 3.152828170647477e-05, "loss": 0.933, "step": 53405 }, { "epoch": 0.77, "grad_norm": 0.5234375, "learning_rate": 3.1510037246606496e-05, "loss": 0.9635, "step": 53410 }, { "epoch": 0.77, "grad_norm": 0.59375, "learning_rate": 3.1491797079767086e-05, "loss": 1.0825, "step": 53415 }, { "epoch": 0.77, "grad_norm": 0.58984375, "learning_rate": 3.147356120709989e-05, "loss": 0.8809, "step": 53420 }, { "epoch": 0.77, "grad_norm": 0.9453125, "learning_rate": 3.1455329629747884e-05, "loss": 1.0626, "step": 53425 }, { "epoch": 0.77, "grad_norm": 0.56640625, "learning_rate": 3.14371023488539e-05, "loss": 0.8831, "step": 53430 }, { "epoch": 0.77, "grad_norm": 0.5703125, "learning_rate": 3.141887936556045e-05, "loss": 0.9965, "step": 53435 }, { "epoch": 0.77, "grad_norm": 0.5703125, "learning_rate": 3.140066068100972e-05, "loss": 0.9626, "step": 53440 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 3.138244629634374e-05, "loss": 1.0613, "step": 53445 }, { "epoch": 0.77, "grad_norm": 0.58203125, "learning_rate": 3.136423621270417e-05, "loss": 0.9647, "step": 53450 }, { "epoch": 0.77, "grad_norm": 0.60546875, "learning_rate": 3.134603043123241e-05, "loss": 0.8799, "step": 53455 }, { "epoch": 0.77, "grad_norm": 0.5703125, "learning_rate": 3.132782895306966e-05, "loss": 0.9257, "step": 53460 }, { "epoch": 0.77, "grad_norm": 0.51171875, "learning_rate": 3.130963177935683e-05, "loss": 0.9144, "step": 53465 }, { "epoch": 0.77, "grad_norm": 0.6328125, "learning_rate": 3.1291438911234474e-05, "loss": 1.0308, "step": 53470 }, { "epoch": 0.77, "grad_norm": 0.55859375, "learning_rate": 3.1273250349842985e-05, "loss": 1.0441, "step": 53475 }, { "epoch": 0.77, "grad_norm": 0.49609375, "learning_rate": 3.125506609632247e-05, "loss": 0.8567, "step": 53480 }, { "epoch": 0.77, "grad_norm": 0.56640625, "learning_rate": 3.123688615181267e-05, "loss": 1.1832, "step": 53485 }, { "epoch": 0.77, "grad_norm": 0.52734375, "learning_rate": 3.121871051745317e-05, "loss": 0.9365, "step": 53490 }, { "epoch": 0.77, "grad_norm": 0.6015625, "learning_rate": 3.120053919438326e-05, "loss": 0.8913, "step": 53495 }, { "epoch": 0.77, "grad_norm": 0.6171875, "learning_rate": 3.11823721837419e-05, "loss": 1.0006, "step": 53500 }, { "epoch": 0.77, "grad_norm": 0.515625, "learning_rate": 3.116420948666781e-05, "loss": 0.9045, "step": 53505 }, { "epoch": 0.77, "grad_norm": 0.546875, "learning_rate": 3.114605110429946e-05, "loss": 0.926, "step": 53510 }, { "epoch": 0.77, "grad_norm": 0.51171875, "learning_rate": 3.1127897037775077e-05, "loss": 0.9321, "step": 53515 }, { "epoch": 0.77, "grad_norm": 0.6171875, "learning_rate": 3.1109747288232505e-05, "loss": 0.9293, "step": 53520 }, { "epoch": 0.77, "grad_norm": 0.703125, "learning_rate": 3.109160185680945e-05, "loss": 0.9626, "step": 53525 }, { "epoch": 0.77, "grad_norm": 0.50390625, "learning_rate": 3.107346074464329e-05, "loss": 0.8654, "step": 53530 }, { "epoch": 0.77, "grad_norm": 0.6796875, "learning_rate": 3.1055323952871086e-05, "loss": 0.8872, "step": 53535 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 3.1037191482629694e-05, "loss": 1.0243, "step": 53540 }, { "epoch": 0.77, "grad_norm": 0.51953125, "learning_rate": 3.101906333505571e-05, "loss": 0.9513, "step": 53545 }, { "epoch": 0.77, "grad_norm": 0.5859375, "learning_rate": 3.100093951128541e-05, "loss": 0.9745, "step": 53550 }, { "epoch": 0.77, "grad_norm": 0.51171875, "learning_rate": 3.098282001245476e-05, "loss": 1.0569, "step": 53555 }, { "epoch": 0.77, "grad_norm": 0.6328125, "learning_rate": 3.0964704839699596e-05, "loss": 1.0132, "step": 53560 }, { "epoch": 0.77, "grad_norm": 0.59765625, "learning_rate": 3.094659399415533e-05, "loss": 0.9108, "step": 53565 }, { "epoch": 0.77, "grad_norm": 0.5, "learning_rate": 3.092848747695718e-05, "loss": 0.8874, "step": 53570 }, { "epoch": 0.77, "grad_norm": 0.58984375, "learning_rate": 3.091038528924015e-05, "loss": 1.0389, "step": 53575 }, { "epoch": 0.77, "grad_norm": 0.5859375, "learning_rate": 3.089228743213881e-05, "loss": 0.8209, "step": 53580 }, { "epoch": 0.77, "grad_norm": 0.546875, "learning_rate": 3.0874193906787616e-05, "loss": 1.1522, "step": 53585 }, { "epoch": 0.77, "grad_norm": 0.625, "learning_rate": 3.08561047143207e-05, "loss": 0.919, "step": 53590 }, { "epoch": 0.77, "grad_norm": 0.6015625, "learning_rate": 3.0838019855871855e-05, "loss": 1.0189, "step": 53595 }, { "epoch": 0.77, "grad_norm": 0.458984375, "learning_rate": 3.08199393325747e-05, "loss": 1.0154, "step": 53600 }, { "epoch": 0.77, "grad_norm": 0.6171875, "learning_rate": 3.080186314556256e-05, "loss": 0.9181, "step": 53605 }, { "epoch": 0.77, "grad_norm": 0.484375, "learning_rate": 3.0783791295968454e-05, "loss": 0.8568, "step": 53610 }, { "epoch": 0.77, "grad_norm": 0.59765625, "learning_rate": 3.0765723784925116e-05, "loss": 1.0142, "step": 53615 }, { "epoch": 0.77, "grad_norm": 0.56640625, "learning_rate": 3.0747660613565054e-05, "loss": 1.0121, "step": 53620 }, { "epoch": 0.77, "grad_norm": 0.515625, "learning_rate": 3.072960178302052e-05, "loss": 0.8851, "step": 53625 }, { "epoch": 0.77, "grad_norm": 0.67578125, "learning_rate": 3.071154729442343e-05, "loss": 0.9259, "step": 53630 }, { "epoch": 0.77, "grad_norm": 0.5546875, "learning_rate": 3.069349714890545e-05, "loss": 1.1588, "step": 53635 }, { "epoch": 0.77, "grad_norm": 0.578125, "learning_rate": 3.0675451347598036e-05, "loss": 1.0215, "step": 53640 }, { "epoch": 0.77, "grad_norm": 0.5703125, "learning_rate": 3.065740989163225e-05, "loss": 0.9885, "step": 53645 }, { "epoch": 0.77, "grad_norm": 0.48828125, "learning_rate": 3.0639372782138995e-05, "loss": 0.9443, "step": 53650 }, { "epoch": 0.77, "grad_norm": 0.55859375, "learning_rate": 3.062134002024891e-05, "loss": 0.8408, "step": 53655 }, { "epoch": 0.77, "grad_norm": 0.57421875, "learning_rate": 3.060331160709219e-05, "loss": 0.9833, "step": 53660 }, { "epoch": 0.77, "grad_norm": 0.64453125, "learning_rate": 3.0585287543798924e-05, "loss": 0.9061, "step": 53665 }, { "epoch": 0.77, "grad_norm": 0.4921875, "learning_rate": 3.0567267831498937e-05, "loss": 0.989, "step": 53670 }, { "epoch": 0.77, "grad_norm": 0.5078125, "learning_rate": 3.0549252471321635e-05, "loss": 0.9358, "step": 53675 }, { "epoch": 0.77, "grad_norm": 0.64453125, "learning_rate": 3.0531241464396285e-05, "loss": 0.8769, "step": 53680 }, { "epoch": 0.77, "grad_norm": 0.5546875, "learning_rate": 3.051323481185189e-05, "loss": 0.8853, "step": 53685 }, { "epoch": 0.77, "grad_norm": 0.6171875, "learning_rate": 3.0495232514817016e-05, "loss": 0.966, "step": 53690 }, { "epoch": 0.77, "grad_norm": 0.6171875, "learning_rate": 3.0477234574420143e-05, "loss": 1.0596, "step": 53695 }, { "epoch": 0.77, "grad_norm": 0.5234375, "learning_rate": 3.045924099178942e-05, "loss": 0.9349, "step": 53700 }, { "epoch": 0.77, "grad_norm": 0.5625, "learning_rate": 3.0441251768052636e-05, "loss": 0.882, "step": 53705 }, { "epoch": 0.77, "grad_norm": 0.5546875, "learning_rate": 3.0423266904337444e-05, "loss": 0.9613, "step": 53710 }, { "epoch": 0.77, "grad_norm": 0.52734375, "learning_rate": 3.0405286401771095e-05, "loss": 0.9987, "step": 53715 }, { "epoch": 0.77, "grad_norm": 0.54296875, "learning_rate": 3.0387310261480684e-05, "loss": 0.9974, "step": 53720 }, { "epoch": 0.77, "grad_norm": 0.515625, "learning_rate": 3.0369338484592914e-05, "loss": 0.8701, "step": 53725 }, { "epoch": 0.77, "grad_norm": 0.51953125, "learning_rate": 3.0351371072234315e-05, "loss": 0.9919, "step": 53730 }, { "epoch": 0.77, "grad_norm": 0.59375, "learning_rate": 3.033340802553113e-05, "loss": 0.8744, "step": 53735 }, { "epoch": 0.77, "grad_norm": 0.54296875, "learning_rate": 3.0315449345609248e-05, "loss": 0.9822, "step": 53740 }, { "epoch": 0.77, "grad_norm": 0.5625, "learning_rate": 3.029749503359436e-05, "loss": 1.0412, "step": 53745 }, { "epoch": 0.77, "grad_norm": 0.5234375, "learning_rate": 3.0279545090611895e-05, "loss": 0.9188, "step": 53750 }, { "epoch": 0.77, "grad_norm": 0.61328125, "learning_rate": 3.026159951778692e-05, "loss": 0.99, "step": 53755 }, { "epoch": 0.77, "grad_norm": 0.66015625, "learning_rate": 3.024365831624435e-05, "loss": 0.9298, "step": 53760 }, { "epoch": 0.77, "grad_norm": 0.66796875, "learning_rate": 3.0225721487108683e-05, "loss": 1.0029, "step": 53765 }, { "epoch": 0.77, "grad_norm": 0.640625, "learning_rate": 3.0207789031504295e-05, "loss": 1.0409, "step": 53770 }, { "epoch": 0.77, "grad_norm": 0.62890625, "learning_rate": 3.018986095055515e-05, "loss": 0.9551, "step": 53775 }, { "epoch": 0.77, "grad_norm": 0.703125, "learning_rate": 3.017193724538505e-05, "loss": 1.0299, "step": 53780 }, { "epoch": 0.77, "grad_norm": 0.53125, "learning_rate": 3.015401791711743e-05, "loss": 0.8979, "step": 53785 }, { "epoch": 0.77, "grad_norm": 0.54296875, "learning_rate": 3.0136102966875502e-05, "loss": 1.1295, "step": 53790 }, { "epoch": 0.77, "grad_norm": 0.62109375, "learning_rate": 3.011819239578225e-05, "loss": 0.917, "step": 53795 }, { "epoch": 0.77, "grad_norm": 0.9375, "learning_rate": 3.0100286204960247e-05, "loss": 0.9848, "step": 53800 }, { "epoch": 0.77, "grad_norm": 0.53125, "learning_rate": 3.0082384395531926e-05, "loss": 0.8899, "step": 53805 }, { "epoch": 0.77, "grad_norm": 0.546875, "learning_rate": 3.00644869686194e-05, "loss": 1.1058, "step": 53810 }, { "epoch": 0.77, "grad_norm": 0.60546875, "learning_rate": 3.004659392534449e-05, "loss": 1.0225, "step": 53815 }, { "epoch": 0.77, "grad_norm": 0.59375, "learning_rate": 3.0028705266828704e-05, "loss": 0.9437, "step": 53820 }, { "epoch": 0.77, "grad_norm": 0.51953125, "learning_rate": 3.0010820994193367e-05, "loss": 0.9064, "step": 53825 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 2.9992941108559514e-05, "loss": 0.8512, "step": 53830 }, { "epoch": 0.77, "grad_norm": 0.546875, "learning_rate": 2.9975065611047804e-05, "loss": 0.9165, "step": 53835 }, { "epoch": 0.77, "grad_norm": 0.50390625, "learning_rate": 2.995719450277874e-05, "loss": 0.8977, "step": 53840 }, { "epoch": 0.77, "grad_norm": 0.59375, "learning_rate": 2.993932778487253e-05, "loss": 0.9242, "step": 53845 }, { "epoch": 0.77, "grad_norm": 0.57421875, "learning_rate": 2.9921465458449006e-05, "loss": 0.8031, "step": 53850 }, { "epoch": 0.77, "grad_norm": 0.49609375, "learning_rate": 2.9903607524627852e-05, "loss": 0.7719, "step": 53855 }, { "epoch": 0.77, "grad_norm": 0.546875, "learning_rate": 2.988575398452843e-05, "loss": 1.0093, "step": 53860 }, { "epoch": 0.77, "grad_norm": 0.5, "learning_rate": 2.9867904839269812e-05, "loss": 1.0583, "step": 53865 }, { "epoch": 0.77, "grad_norm": 0.63671875, "learning_rate": 2.9850060089970753e-05, "loss": 0.9877, "step": 53870 }, { "epoch": 0.77, "grad_norm": 0.51171875, "learning_rate": 2.9832219737749835e-05, "loss": 0.8176, "step": 53875 }, { "epoch": 0.77, "grad_norm": 0.5859375, "learning_rate": 2.981438378372532e-05, "loss": 0.8607, "step": 53880 }, { "epoch": 0.77, "grad_norm": 0.58984375, "learning_rate": 2.9796552229015128e-05, "loss": 0.9302, "step": 53885 }, { "epoch": 0.77, "grad_norm": 0.5546875, "learning_rate": 2.9778725074737002e-05, "loss": 1.0279, "step": 53890 }, { "epoch": 0.77, "grad_norm": 0.51953125, "learning_rate": 2.9760902322008398e-05, "loss": 0.9389, "step": 53895 }, { "epoch": 0.77, "grad_norm": 0.57421875, "learning_rate": 2.9743083971946407e-05, "loss": 0.9887, "step": 53900 }, { "epoch": 0.77, "grad_norm": 0.5078125, "learning_rate": 2.9725270025667917e-05, "loss": 0.9757, "step": 53905 }, { "epoch": 0.77, "grad_norm": 0.65625, "learning_rate": 2.9707460484289573e-05, "loss": 0.9174, "step": 53910 }, { "epoch": 0.77, "grad_norm": 0.55078125, "learning_rate": 2.9689655348927625e-05, "loss": 1.0818, "step": 53915 }, { "epoch": 0.77, "grad_norm": 0.6171875, "learning_rate": 2.9671854620698192e-05, "loss": 0.884, "step": 53920 }, { "epoch": 0.77, "grad_norm": 0.5703125, "learning_rate": 2.9654058300717015e-05, "loss": 1.0192, "step": 53925 }, { "epoch": 0.77, "grad_norm": 0.5546875, "learning_rate": 2.9636266390099543e-05, "loss": 0.7995, "step": 53930 }, { "epoch": 0.77, "grad_norm": 0.52734375, "learning_rate": 2.9618478889961032e-05, "loss": 0.9835, "step": 53935 }, { "epoch": 0.77, "grad_norm": 0.56640625, "learning_rate": 2.9600695801416454e-05, "loss": 0.912, "step": 53940 }, { "epoch": 0.77, "grad_norm": 0.5546875, "learning_rate": 2.958291712558041e-05, "loss": 0.9939, "step": 53945 }, { "epoch": 0.77, "grad_norm": 0.5625, "learning_rate": 2.956514286356732e-05, "loss": 0.8653, "step": 53950 }, { "epoch": 0.77, "grad_norm": 0.6171875, "learning_rate": 2.954737301649132e-05, "loss": 1.0124, "step": 53955 }, { "epoch": 0.77, "grad_norm": 0.4921875, "learning_rate": 2.95296075854662e-05, "loss": 0.9432, "step": 53960 }, { "epoch": 0.77, "grad_norm": 0.58203125, "learning_rate": 2.9511846571605517e-05, "loss": 0.8652, "step": 53965 }, { "epoch": 0.77, "grad_norm": 0.57421875, "learning_rate": 2.949408997602262e-05, "loss": 0.9531, "step": 53970 }, { "epoch": 0.77, "grad_norm": 0.6015625, "learning_rate": 2.9476337799830456e-05, "loss": 0.9282, "step": 53975 }, { "epoch": 0.77, "grad_norm": 0.498046875, "learning_rate": 2.945859004414172e-05, "loss": 1.1041, "step": 53980 }, { "epoch": 0.77, "grad_norm": 0.55859375, "learning_rate": 2.9440846710068902e-05, "loss": 0.948, "step": 53985 }, { "epoch": 0.77, "grad_norm": 0.6328125, "learning_rate": 2.9423107798724214e-05, "loss": 0.9718, "step": 53990 }, { "epoch": 0.77, "grad_norm": 0.53125, "learning_rate": 2.9405373311219465e-05, "loss": 0.9747, "step": 53995 }, { "epoch": 0.77, "grad_norm": 0.59375, "learning_rate": 2.938764324866632e-05, "loss": 0.8916, "step": 54000 }, { "epoch": 0.77, "grad_norm": 0.55859375, "learning_rate": 2.9369917612176157e-05, "loss": 0.951, "step": 54005 }, { "epoch": 0.77, "grad_norm": 0.53125, "learning_rate": 2.9352196402859955e-05, "loss": 1.0411, "step": 54010 }, { "epoch": 0.77, "grad_norm": 0.6171875, "learning_rate": 2.9334479621828558e-05, "loss": 0.8444, "step": 54015 }, { "epoch": 0.77, "grad_norm": 0.6328125, "learning_rate": 2.931676727019248e-05, "loss": 1.1292, "step": 54020 }, { "epoch": 0.77, "grad_norm": 0.51953125, "learning_rate": 2.9299059349061942e-05, "loss": 0.9727, "step": 54025 }, { "epoch": 0.78, "grad_norm": 0.7734375, "learning_rate": 2.9281355859546843e-05, "loss": 0.8993, "step": 54030 }, { "epoch": 0.78, "grad_norm": 0.62890625, "learning_rate": 2.9263656802756935e-05, "loss": 1.0242, "step": 54035 }, { "epoch": 0.78, "grad_norm": 0.53125, "learning_rate": 2.9245962179801544e-05, "loss": 0.9757, "step": 54040 }, { "epoch": 0.78, "grad_norm": 0.62109375, "learning_rate": 2.9228271991789834e-05, "loss": 1.0083, "step": 54045 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.9210586239830673e-05, "loss": 0.9915, "step": 54050 }, { "epoch": 0.78, "grad_norm": 0.50390625, "learning_rate": 2.919290492503255e-05, "loss": 1.0139, "step": 54055 }, { "epoch": 0.78, "grad_norm": 0.88671875, "learning_rate": 2.9175228048503788e-05, "loss": 1.1561, "step": 54060 }, { "epoch": 0.78, "grad_norm": 0.5234375, "learning_rate": 2.9157555611352427e-05, "loss": 1.0815, "step": 54065 }, { "epoch": 0.78, "grad_norm": 0.6015625, "learning_rate": 2.9139887614686134e-05, "loss": 0.9104, "step": 54070 }, { "epoch": 0.78, "grad_norm": 0.65234375, "learning_rate": 2.912222405961239e-05, "loss": 1.1101, "step": 54075 }, { "epoch": 0.78, "grad_norm": 0.703125, "learning_rate": 2.910456494723839e-05, "loss": 1.0586, "step": 54080 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.908691027867101e-05, "loss": 0.7989, "step": 54085 }, { "epoch": 0.78, "grad_norm": 0.48046875, "learning_rate": 2.9069260055016813e-05, "loss": 0.9496, "step": 54090 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.905161427738219e-05, "loss": 0.9066, "step": 54095 }, { "epoch": 0.78, "grad_norm": 0.7421875, "learning_rate": 2.9033972946873212e-05, "loss": 0.9965, "step": 54100 }, { "epoch": 0.78, "grad_norm": 0.490234375, "learning_rate": 2.9016336064595607e-05, "loss": 0.8419, "step": 54105 }, { "epoch": 0.78, "grad_norm": 0.55859375, "learning_rate": 2.89987036316549e-05, "loss": 0.7921, "step": 54110 }, { "epoch": 0.78, "grad_norm": 0.59375, "learning_rate": 2.898107564915634e-05, "loss": 0.8745, "step": 54115 }, { "epoch": 0.78, "grad_norm": 0.5234375, "learning_rate": 2.8963452118204827e-05, "loss": 0.7942, "step": 54120 }, { "epoch": 0.78, "grad_norm": 0.60546875, "learning_rate": 2.8945833039905023e-05, "loss": 0.8932, "step": 54125 }, { "epoch": 0.78, "grad_norm": 0.59375, "learning_rate": 2.8928218415361397e-05, "loss": 0.8994, "step": 54130 }, { "epoch": 0.78, "grad_norm": 0.58984375, "learning_rate": 2.8910608245677918e-05, "loss": 1.0687, "step": 54135 }, { "epoch": 0.78, "grad_norm": 0.5625, "learning_rate": 2.8893002531958478e-05, "loss": 0.8616, "step": 54140 }, { "epoch": 0.78, "grad_norm": 0.55078125, "learning_rate": 2.8875401275306657e-05, "loss": 0.9944, "step": 54145 }, { "epoch": 0.78, "grad_norm": 0.55859375, "learning_rate": 2.8857804476825655e-05, "loss": 0.7736, "step": 54150 }, { "epoch": 0.78, "grad_norm": 0.6640625, "learning_rate": 2.8840212137618495e-05, "loss": 0.9387, "step": 54155 }, { "epoch": 0.78, "grad_norm": 0.55078125, "learning_rate": 2.882262425878791e-05, "loss": 0.8779, "step": 54160 }, { "epoch": 0.78, "grad_norm": 0.59375, "learning_rate": 2.880504084143627e-05, "loss": 1.0307, "step": 54165 }, { "epoch": 0.78, "grad_norm": 0.5390625, "learning_rate": 2.8787461886665756e-05, "loss": 0.8971, "step": 54170 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.8769887395578276e-05, "loss": 0.8709, "step": 54175 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.875231736927534e-05, "loss": 0.997, "step": 54180 }, { "epoch": 0.78, "grad_norm": 0.5234375, "learning_rate": 2.8734751808858318e-05, "loss": 0.9288, "step": 54185 }, { "epoch": 0.78, "grad_norm": 0.54296875, "learning_rate": 2.87171907154282e-05, "loss": 1.0063, "step": 54190 }, { "epoch": 0.78, "grad_norm": 0.57421875, "learning_rate": 2.869963409008577e-05, "loss": 1.0611, "step": 54195 }, { "epoch": 0.78, "grad_norm": 0.50390625, "learning_rate": 2.868208193393146e-05, "loss": 0.9429, "step": 54200 }, { "epoch": 0.78, "grad_norm": 0.57421875, "learning_rate": 2.8664534248065467e-05, "loss": 0.8201, "step": 54205 }, { "epoch": 0.78, "grad_norm": 0.57421875, "learning_rate": 2.8646991033587754e-05, "loss": 0.8655, "step": 54210 }, { "epoch": 0.78, "grad_norm": 0.55078125, "learning_rate": 2.8629452291597868e-05, "loss": 1.0149, "step": 54215 }, { "epoch": 0.78, "grad_norm": 0.66015625, "learning_rate": 2.8611918023195193e-05, "loss": 0.9014, "step": 54220 }, { "epoch": 0.78, "grad_norm": 0.6015625, "learning_rate": 2.8594388229478843e-05, "loss": 0.933, "step": 54225 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.857686291154753e-05, "loss": 1.0044, "step": 54230 }, { "epoch": 0.78, "grad_norm": 0.53125, "learning_rate": 2.8559342070499817e-05, "loss": 0.9215, "step": 54235 }, { "epoch": 0.78, "grad_norm": 0.5703125, "learning_rate": 2.854182570743391e-05, "loss": 0.8283, "step": 54240 }, { "epoch": 0.78, "grad_norm": 0.65625, "learning_rate": 2.8524313823447724e-05, "loss": 0.8395, "step": 54245 }, { "epoch": 0.78, "grad_norm": 0.58203125, "learning_rate": 2.8506806419638953e-05, "loss": 1.0228, "step": 54250 }, { "epoch": 0.78, "grad_norm": 0.57421875, "learning_rate": 2.8489303497105012e-05, "loss": 1.0182, "step": 54255 }, { "epoch": 0.78, "grad_norm": 0.640625, "learning_rate": 2.8471805056942936e-05, "loss": 1.0948, "step": 54260 }, { "epoch": 0.78, "grad_norm": 0.5625, "learning_rate": 2.8454311100249588e-05, "loss": 0.9233, "step": 54265 }, { "epoch": 0.78, "grad_norm": 0.60546875, "learning_rate": 2.8436821628121523e-05, "loss": 1.0684, "step": 54270 }, { "epoch": 0.78, "grad_norm": 0.609375, "learning_rate": 2.8419336641654958e-05, "loss": 0.9943, "step": 54275 }, { "epoch": 0.78, "grad_norm": 0.453125, "learning_rate": 2.8401856141945893e-05, "loss": 0.8644, "step": 54280 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.8384380130090072e-05, "loss": 0.99, "step": 54285 }, { "epoch": 0.78, "grad_norm": 0.5859375, "learning_rate": 2.8366908607182852e-05, "loss": 1.0081, "step": 54290 }, { "epoch": 0.78, "grad_norm": 0.53125, "learning_rate": 2.834944157431936e-05, "loss": 1.066, "step": 54295 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.833197903259447e-05, "loss": 0.9283, "step": 54300 }, { "epoch": 0.78, "grad_norm": 0.53125, "learning_rate": 2.83145209831028e-05, "loss": 0.8595, "step": 54305 }, { "epoch": 0.78, "grad_norm": 0.515625, "learning_rate": 2.829706742693855e-05, "loss": 1.1134, "step": 54310 }, { "epoch": 0.78, "grad_norm": 0.5390625, "learning_rate": 2.827961836519578e-05, "loss": 0.8888, "step": 54315 }, { "epoch": 0.78, "grad_norm": 0.55078125, "learning_rate": 2.826217379896825e-05, "loss": 1.1328, "step": 54320 }, { "epoch": 0.78, "grad_norm": 0.5, "learning_rate": 2.8244733729349338e-05, "loss": 0.8952, "step": 54325 }, { "epoch": 0.78, "grad_norm": 0.5625, "learning_rate": 2.8227298157432235e-05, "loss": 0.9766, "step": 54330 }, { "epoch": 0.78, "grad_norm": 0.55078125, "learning_rate": 2.820986708430986e-05, "loss": 0.9542, "step": 54335 }, { "epoch": 0.78, "grad_norm": 0.470703125, "learning_rate": 2.8192440511074782e-05, "loss": 0.964, "step": 54340 }, { "epoch": 0.78, "grad_norm": 0.59375, "learning_rate": 2.817501843881928e-05, "loss": 1.0103, "step": 54345 }, { "epoch": 0.78, "grad_norm": 0.5859375, "learning_rate": 2.8157600868635448e-05, "loss": 0.9626, "step": 54350 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.8140187801614993e-05, "loss": 0.8749, "step": 54355 }, { "epoch": 0.78, "grad_norm": 0.5546875, "learning_rate": 2.8122779238849407e-05, "loss": 1.015, "step": 54360 }, { "epoch": 0.78, "grad_norm": 0.5234375, "learning_rate": 2.8105375181429906e-05, "loss": 0.9016, "step": 54365 }, { "epoch": 0.78, "grad_norm": 0.58984375, "learning_rate": 2.808797563044734e-05, "loss": 0.9122, "step": 54370 }, { "epoch": 0.78, "grad_norm": 0.51953125, "learning_rate": 2.8070580586992356e-05, "loss": 0.8866, "step": 54375 }, { "epoch": 0.78, "grad_norm": 0.62890625, "learning_rate": 2.8053190052155343e-05, "loss": 1.0144, "step": 54380 }, { "epoch": 0.78, "grad_norm": 0.55859375, "learning_rate": 2.803580402702628e-05, "loss": 1.0065, "step": 54385 }, { "epoch": 0.78, "grad_norm": 0.5703125, "learning_rate": 2.8018422512694985e-05, "loss": 0.976, "step": 54390 }, { "epoch": 0.78, "grad_norm": 0.64453125, "learning_rate": 2.800104551025098e-05, "loss": 1.0068, "step": 54395 }, { "epoch": 0.78, "grad_norm": 0.58984375, "learning_rate": 2.798367302078344e-05, "loss": 0.9593, "step": 54400 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.7966305045381268e-05, "loss": 1.1082, "step": 54405 }, { "epoch": 0.78, "grad_norm": 0.51171875, "learning_rate": 2.7948941585133126e-05, "loss": 0.9987, "step": 54410 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.793158264112743e-05, "loss": 0.8221, "step": 54415 }, { "epoch": 0.78, "grad_norm": 0.5, "learning_rate": 2.7914228214452165e-05, "loss": 0.7934, "step": 54420 }, { "epoch": 0.78, "grad_norm": 0.57421875, "learning_rate": 2.7896878306195184e-05, "loss": 0.9969, "step": 54425 }, { "epoch": 0.78, "grad_norm": 0.486328125, "learning_rate": 2.787953291744402e-05, "loss": 0.9567, "step": 54430 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.786219204928584e-05, "loss": 0.923, "step": 54435 }, { "epoch": 0.78, "grad_norm": 0.6015625, "learning_rate": 2.7844855702807616e-05, "loss": 1.0718, "step": 54440 }, { "epoch": 0.78, "grad_norm": 0.5625, "learning_rate": 2.7827523879096052e-05, "loss": 0.9112, "step": 54445 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.781019657923748e-05, "loss": 0.9836, "step": 54450 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.7792873804317976e-05, "loss": 0.8287, "step": 54455 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.7775555555423416e-05, "loss": 1.008, "step": 54460 }, { "epoch": 0.78, "grad_norm": 0.54296875, "learning_rate": 2.7758241833639252e-05, "loss": 0.9603, "step": 54465 }, { "epoch": 0.78, "grad_norm": 0.56640625, "learning_rate": 2.7740932640050775e-05, "loss": 0.88, "step": 54470 }, { "epoch": 0.78, "grad_norm": 0.5078125, "learning_rate": 2.7723627975742962e-05, "loss": 1.0548, "step": 54475 }, { "epoch": 0.78, "grad_norm": 0.54296875, "learning_rate": 2.7706327841800428e-05, "loss": 1.0476, "step": 54480 }, { "epoch": 0.78, "grad_norm": 0.640625, "learning_rate": 2.768903223930761e-05, "loss": 0.9371, "step": 54485 }, { "epoch": 0.78, "grad_norm": 0.5546875, "learning_rate": 2.7671741169348632e-05, "loss": 0.9731, "step": 54490 }, { "epoch": 0.78, "grad_norm": 0.65625, "learning_rate": 2.7654454633007266e-05, "loss": 1.1529, "step": 54495 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.7637172631367104e-05, "loss": 0.9571, "step": 54500 }, { "epoch": 0.78, "grad_norm": 0.6015625, "learning_rate": 2.7619895165511345e-05, "loss": 0.9497, "step": 54505 }, { "epoch": 0.78, "grad_norm": 0.50390625, "learning_rate": 2.7602622236523023e-05, "loss": 0.8806, "step": 54510 }, { "epoch": 0.78, "grad_norm": 0.57421875, "learning_rate": 2.7585353845484764e-05, "loss": 0.9026, "step": 54515 }, { "epoch": 0.78, "grad_norm": 0.59375, "learning_rate": 2.7568089993478995e-05, "loss": 0.8992, "step": 54520 }, { "epoch": 0.78, "grad_norm": 0.6015625, "learning_rate": 2.755083068158787e-05, "loss": 1.0083, "step": 54525 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.7533575910893162e-05, "loss": 1.0233, "step": 54530 }, { "epoch": 0.78, "grad_norm": 0.58984375, "learning_rate": 2.7516325682476453e-05, "loss": 0.9418, "step": 54535 }, { "epoch": 0.78, "grad_norm": 0.5703125, "learning_rate": 2.749907999741902e-05, "loss": 1.0514, "step": 54540 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.74818388568018e-05, "loss": 0.9402, "step": 54545 }, { "epoch": 0.78, "grad_norm": 0.5859375, "learning_rate": 2.746460226170554e-05, "loss": 1.0042, "step": 54550 }, { "epoch": 0.78, "grad_norm": 0.58984375, "learning_rate": 2.74473702132106e-05, "loss": 0.9899, "step": 54555 }, { "epoch": 0.78, "grad_norm": 0.59765625, "learning_rate": 2.7430142712397157e-05, "loss": 0.8437, "step": 54560 }, { "epoch": 0.78, "grad_norm": 0.5234375, "learning_rate": 2.7412919760344978e-05, "loss": 0.9307, "step": 54565 }, { "epoch": 0.78, "grad_norm": 0.462890625, "learning_rate": 2.7395701358133664e-05, "loss": 0.8752, "step": 54570 }, { "epoch": 0.78, "grad_norm": 0.58984375, "learning_rate": 2.73784875068425e-05, "loss": 0.8699, "step": 54575 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.7361278207550423e-05, "loss": 0.9109, "step": 54580 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.734407346133616e-05, "loss": 1.0958, "step": 54585 }, { "epoch": 0.78, "grad_norm": 0.5078125, "learning_rate": 2.7326873269278154e-05, "loss": 0.8842, "step": 54590 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.7309677632454454e-05, "loss": 0.9275, "step": 54595 }, { "epoch": 0.78, "grad_norm": 0.50390625, "learning_rate": 2.7292486551942987e-05, "loss": 0.8883, "step": 54600 }, { "epoch": 0.78, "grad_norm": 0.65625, "learning_rate": 2.727530002882127e-05, "loss": 0.96, "step": 54605 }, { "epoch": 0.78, "grad_norm": 0.5625, "learning_rate": 2.725811806416655e-05, "loss": 0.9566, "step": 54610 }, { "epoch": 0.78, "grad_norm": 0.498046875, "learning_rate": 2.7240940659055826e-05, "loss": 0.8742, "step": 54615 }, { "epoch": 0.78, "grad_norm": 0.55078125, "learning_rate": 2.7223767814565837e-05, "loss": 0.8872, "step": 54620 }, { "epoch": 0.78, "grad_norm": 0.5390625, "learning_rate": 2.7206599531772948e-05, "loss": 0.8923, "step": 54625 }, { "epoch": 0.78, "grad_norm": 0.62890625, "learning_rate": 2.7189435811753293e-05, "loss": 0.899, "step": 54630 }, { "epoch": 0.78, "grad_norm": 0.5234375, "learning_rate": 2.717227665558276e-05, "loss": 0.8567, "step": 54635 }, { "epoch": 0.78, "grad_norm": 0.53125, "learning_rate": 2.7155122064336835e-05, "loss": 0.9793, "step": 54640 }, { "epoch": 0.78, "grad_norm": 0.65234375, "learning_rate": 2.713797203909083e-05, "loss": 0.938, "step": 54645 }, { "epoch": 0.78, "grad_norm": 0.5859375, "learning_rate": 2.7120826580919746e-05, "loss": 0.9868, "step": 54650 }, { "epoch": 0.78, "grad_norm": 0.466796875, "learning_rate": 2.710368569089826e-05, "loss": 0.8248, "step": 54655 }, { "epoch": 0.78, "grad_norm": 0.6484375, "learning_rate": 2.7086549370100746e-05, "loss": 0.8802, "step": 54660 }, { "epoch": 0.78, "grad_norm": 0.6796875, "learning_rate": 2.7069417619601343e-05, "loss": 0.855, "step": 54665 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.705229044047396e-05, "loss": 1.0715, "step": 54670 }, { "epoch": 0.78, "grad_norm": 0.6640625, "learning_rate": 2.703516783379204e-05, "loss": 0.8297, "step": 54675 }, { "epoch": 0.78, "grad_norm": 0.51953125, "learning_rate": 2.701804980062891e-05, "loss": 0.9352, "step": 54680 }, { "epoch": 0.78, "grad_norm": 0.55078125, "learning_rate": 2.700093634205757e-05, "loss": 0.9531, "step": 54685 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.6983827459150646e-05, "loss": 0.9047, "step": 54690 }, { "epoch": 0.78, "grad_norm": 0.486328125, "learning_rate": 2.6966723152980578e-05, "loss": 0.8579, "step": 54695 }, { "epoch": 0.78, "grad_norm": 0.53125, "learning_rate": 2.6949623424619507e-05, "loss": 1.0137, "step": 54700 }, { "epoch": 0.78, "grad_norm": 0.51953125, "learning_rate": 2.6932528275139212e-05, "loss": 1.0013, "step": 54705 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.691543770561129e-05, "loss": 0.9706, "step": 54710 }, { "epoch": 0.78, "grad_norm": 0.703125, "learning_rate": 2.6898351717106962e-05, "loss": 1.0458, "step": 54715 }, { "epoch": 0.78, "grad_norm": 0.5546875, "learning_rate": 2.688127031069718e-05, "loss": 1.0411, "step": 54720 }, { "epoch": 0.79, "grad_norm": 0.59765625, "learning_rate": 2.686419348745265e-05, "loss": 0.9726, "step": 54725 }, { "epoch": 0.79, "grad_norm": 0.59375, "learning_rate": 2.6847121248443796e-05, "loss": 1.1117, "step": 54730 }, { "epoch": 0.79, "grad_norm": 0.49609375, "learning_rate": 2.6830053594740668e-05, "loss": 0.9083, "step": 54735 }, { "epoch": 0.79, "grad_norm": 0.56640625, "learning_rate": 2.6812990527413116e-05, "loss": 0.9351, "step": 54740 }, { "epoch": 0.79, "grad_norm": 0.578125, "learning_rate": 2.6795932047530705e-05, "loss": 1.0094, "step": 54745 }, { "epoch": 0.79, "grad_norm": 0.625, "learning_rate": 2.67788781561626e-05, "loss": 0.935, "step": 54750 }, { "epoch": 0.79, "grad_norm": 0.51953125, "learning_rate": 2.6761828854377812e-05, "loss": 0.8739, "step": 54755 }, { "epoch": 0.79, "grad_norm": 0.515625, "learning_rate": 2.674478414324504e-05, "loss": 0.9497, "step": 54760 }, { "epoch": 0.79, "grad_norm": 0.5625, "learning_rate": 2.6727744023832622e-05, "loss": 1.0655, "step": 54765 }, { "epoch": 0.79, "grad_norm": 0.58984375, "learning_rate": 2.6710708497208625e-05, "loss": 1.0258, "step": 54770 }, { "epoch": 0.79, "grad_norm": 0.66015625, "learning_rate": 2.66936775644409e-05, "loss": 1.1597, "step": 54775 }, { "epoch": 0.79, "grad_norm": 0.474609375, "learning_rate": 2.667665122659697e-05, "loss": 0.8485, "step": 54780 }, { "epoch": 0.79, "grad_norm": 0.453125, "learning_rate": 2.665962948474403e-05, "loss": 0.8748, "step": 54785 }, { "epoch": 0.79, "grad_norm": 0.5390625, "learning_rate": 2.6642612339949037e-05, "loss": 1.0183, "step": 54790 }, { "epoch": 0.79, "grad_norm": 0.58203125, "learning_rate": 2.6625599793278676e-05, "loss": 0.9696, "step": 54795 }, { "epoch": 0.79, "grad_norm": 0.62109375, "learning_rate": 2.6608591845799268e-05, "loss": 0.9695, "step": 54800 }, { "epoch": 0.79, "grad_norm": 0.5234375, "learning_rate": 2.6591588498576903e-05, "loss": 0.9984, "step": 54805 }, { "epoch": 0.79, "grad_norm": 0.7890625, "learning_rate": 2.6574589752677405e-05, "loss": 0.927, "step": 54810 }, { "epoch": 0.79, "grad_norm": 0.546875, "learning_rate": 2.6557595609166242e-05, "loss": 0.9817, "step": 54815 }, { "epoch": 0.79, "grad_norm": 0.49609375, "learning_rate": 2.65406060691086e-05, "loss": 0.7142, "step": 54820 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.6523621133569464e-05, "loss": 0.8814, "step": 54825 }, { "epoch": 0.79, "grad_norm": 0.51953125, "learning_rate": 2.6506640803613412e-05, "loss": 1.0073, "step": 54830 }, { "epoch": 0.79, "grad_norm": 0.55078125, "learning_rate": 2.6489665080304814e-05, "loss": 0.9157, "step": 54835 }, { "epoch": 0.79, "grad_norm": 0.59375, "learning_rate": 2.647269396470776e-05, "loss": 0.9138, "step": 54840 }, { "epoch": 0.79, "grad_norm": 0.5390625, "learning_rate": 2.645572745788597e-05, "loss": 0.8308, "step": 54845 }, { "epoch": 0.79, "grad_norm": 0.59375, "learning_rate": 2.6438765560902933e-05, "loss": 0.9832, "step": 54850 }, { "epoch": 0.79, "grad_norm": 0.47265625, "learning_rate": 2.642180827482188e-05, "loss": 0.8653, "step": 54855 }, { "epoch": 0.79, "grad_norm": 0.5, "learning_rate": 2.6404855600705648e-05, "loss": 0.8943, "step": 54860 }, { "epoch": 0.79, "grad_norm": 0.56640625, "learning_rate": 2.638790753961692e-05, "loss": 0.835, "step": 54865 }, { "epoch": 0.79, "grad_norm": 0.6484375, "learning_rate": 2.6370964092617957e-05, "loss": 0.9696, "step": 54870 }, { "epoch": 0.79, "grad_norm": 0.62890625, "learning_rate": 2.6354025260770843e-05, "loss": 0.9426, "step": 54875 }, { "epoch": 0.79, "grad_norm": 0.462890625, "learning_rate": 2.6337091045137275e-05, "loss": 0.9313, "step": 54880 }, { "epoch": 0.79, "grad_norm": 0.56640625, "learning_rate": 2.6320161446778735e-05, "loss": 1.0948, "step": 54885 }, { "epoch": 0.79, "grad_norm": 0.6015625, "learning_rate": 2.630323646675642e-05, "loss": 1.0268, "step": 54890 }, { "epoch": 0.79, "grad_norm": 0.5390625, "learning_rate": 2.6286316106131148e-05, "loss": 0.7687, "step": 54895 }, { "epoch": 0.79, "grad_norm": 0.578125, "learning_rate": 2.6269400365963536e-05, "loss": 0.9385, "step": 54900 }, { "epoch": 0.79, "grad_norm": 0.55859375, "learning_rate": 2.6252489247313915e-05, "loss": 0.915, "step": 54905 }, { "epoch": 0.79, "grad_norm": 0.53515625, "learning_rate": 2.6235582751242226e-05, "loss": 0.8307, "step": 54910 }, { "epoch": 0.79, "grad_norm": 0.5703125, "learning_rate": 2.621868087880822e-05, "loss": 0.8782, "step": 54915 }, { "epoch": 0.79, "grad_norm": 0.62890625, "learning_rate": 2.6201783631071385e-05, "loss": 1.0213, "step": 54920 }, { "epoch": 0.79, "grad_norm": 0.48046875, "learning_rate": 2.618489100909074e-05, "loss": 0.9641, "step": 54925 }, { "epoch": 0.79, "grad_norm": 0.59765625, "learning_rate": 2.61680030139252e-05, "loss": 0.8366, "step": 54930 }, { "epoch": 0.79, "grad_norm": 0.57421875, "learning_rate": 2.6151119646633347e-05, "loss": 1.034, "step": 54935 }, { "epoch": 0.79, "grad_norm": 0.5390625, "learning_rate": 2.613424090827339e-05, "loss": 0.9292, "step": 54940 }, { "epoch": 0.79, "grad_norm": 0.59765625, "learning_rate": 2.611736679990334e-05, "loss": 0.8975, "step": 54945 }, { "epoch": 0.79, "grad_norm": 0.55859375, "learning_rate": 2.6100497322580908e-05, "loss": 0.9267, "step": 54950 }, { "epoch": 0.79, "grad_norm": 0.578125, "learning_rate": 2.6083632477363452e-05, "loss": 1.0463, "step": 54955 }, { "epoch": 0.79, "grad_norm": 0.53125, "learning_rate": 2.6066772265308083e-05, "loss": 0.9473, "step": 54960 }, { "epoch": 0.79, "grad_norm": 0.5703125, "learning_rate": 2.6049916687471666e-05, "loss": 0.8829, "step": 54965 }, { "epoch": 0.79, "grad_norm": 0.625, "learning_rate": 2.603306574491069e-05, "loss": 1.0004, "step": 54970 }, { "epoch": 0.79, "grad_norm": 0.58984375, "learning_rate": 2.6016219438681367e-05, "loss": 0.9185, "step": 54975 }, { "epoch": 0.79, "grad_norm": 0.5625, "learning_rate": 2.599937776983967e-05, "loss": 1.0157, "step": 54980 }, { "epoch": 0.79, "grad_norm": 0.578125, "learning_rate": 2.5982540739441284e-05, "loss": 1.0383, "step": 54985 }, { "epoch": 0.79, "grad_norm": 0.578125, "learning_rate": 2.5965708348541518e-05, "loss": 1.072, "step": 54990 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.5948880598195467e-05, "loss": 1.0174, "step": 54995 }, { "epoch": 0.79, "grad_norm": 0.5078125, "learning_rate": 2.593205748945795e-05, "loss": 1.1266, "step": 55000 }, { "epoch": 0.79, "grad_norm": 0.62890625, "learning_rate": 2.5915239023383387e-05, "loss": 0.9785, "step": 55005 }, { "epoch": 0.79, "grad_norm": 0.515625, "learning_rate": 2.5898425201026022e-05, "loss": 0.8967, "step": 55010 }, { "epoch": 0.79, "grad_norm": 0.53515625, "learning_rate": 2.588161602343979e-05, "loss": 0.9996, "step": 55015 }, { "epoch": 0.79, "grad_norm": 0.5234375, "learning_rate": 2.5864811491678255e-05, "loss": 0.9827, "step": 55020 }, { "epoch": 0.79, "grad_norm": 0.6484375, "learning_rate": 2.5848011606794797e-05, "loss": 0.9682, "step": 55025 }, { "epoch": 0.79, "grad_norm": 0.55859375, "learning_rate": 2.5831216369842416e-05, "loss": 0.803, "step": 55030 }, { "epoch": 0.79, "grad_norm": 0.5703125, "learning_rate": 2.581442578187384e-05, "loss": 1.03, "step": 55035 }, { "epoch": 0.79, "grad_norm": 0.57421875, "learning_rate": 2.5797639843941547e-05, "loss": 1.0129, "step": 55040 }, { "epoch": 0.79, "grad_norm": 0.5078125, "learning_rate": 2.578085855709773e-05, "loss": 0.8681, "step": 55045 }, { "epoch": 0.79, "grad_norm": 0.54296875, "learning_rate": 2.576408192239419e-05, "loss": 0.8666, "step": 55050 }, { "epoch": 0.79, "grad_norm": 0.56640625, "learning_rate": 2.5747309940882546e-05, "loss": 0.995, "step": 55055 }, { "epoch": 0.79, "grad_norm": 0.5, "learning_rate": 2.5730542613614118e-05, "loss": 0.904, "step": 55060 }, { "epoch": 0.79, "grad_norm": 0.58203125, "learning_rate": 2.5713779941639826e-05, "loss": 0.9581, "step": 55065 }, { "epoch": 0.79, "grad_norm": 0.625, "learning_rate": 2.5697021926010413e-05, "loss": 0.8722, "step": 55070 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.5680268567776323e-05, "loss": 1.021, "step": 55075 }, { "epoch": 0.79, "grad_norm": 0.62890625, "learning_rate": 2.566351986798764e-05, "loss": 0.8893, "step": 55080 }, { "epoch": 0.79, "grad_norm": 0.65234375, "learning_rate": 2.564677582769416e-05, "loss": 1.1019, "step": 55085 }, { "epoch": 0.79, "grad_norm": 0.65234375, "learning_rate": 2.563003644794546e-05, "loss": 1.16, "step": 55090 }, { "epoch": 0.79, "grad_norm": 0.59765625, "learning_rate": 2.5613301729790794e-05, "loss": 1.0489, "step": 55095 }, { "epoch": 0.79, "grad_norm": 0.7109375, "learning_rate": 2.559657167427908e-05, "loss": 0.954, "step": 55100 }, { "epoch": 0.79, "grad_norm": 0.50390625, "learning_rate": 2.5579846282458987e-05, "loss": 0.9067, "step": 55105 }, { "epoch": 0.79, "grad_norm": 0.5390625, "learning_rate": 2.5563125555378908e-05, "loss": 0.8201, "step": 55110 }, { "epoch": 0.79, "grad_norm": 0.62890625, "learning_rate": 2.5546409494086876e-05, "loss": 0.9355, "step": 55115 }, { "epoch": 0.79, "grad_norm": 0.51953125, "learning_rate": 2.5529698099630694e-05, "loss": 0.951, "step": 55120 }, { "epoch": 0.79, "grad_norm": 0.59765625, "learning_rate": 2.551299137305787e-05, "loss": 0.9268, "step": 55125 }, { "epoch": 0.79, "grad_norm": 0.5546875, "learning_rate": 2.549628931541559e-05, "loss": 0.9385, "step": 55130 }, { "epoch": 0.79, "grad_norm": 0.59375, "learning_rate": 2.5479591927750713e-05, "loss": 0.9118, "step": 55135 }, { "epoch": 0.79, "grad_norm": 0.58203125, "learning_rate": 2.5462899211109915e-05, "loss": 0.9354, "step": 55140 }, { "epoch": 0.79, "grad_norm": 0.55078125, "learning_rate": 2.5446211166539448e-05, "loss": 0.993, "step": 55145 }, { "epoch": 0.79, "grad_norm": 0.57421875, "learning_rate": 2.5429527795085384e-05, "loss": 1.1194, "step": 55150 }, { "epoch": 0.79, "grad_norm": 0.58203125, "learning_rate": 2.541284909779348e-05, "loss": 0.9855, "step": 55155 }, { "epoch": 0.79, "grad_norm": 0.6015625, "learning_rate": 2.5396175075709107e-05, "loss": 1.1419, "step": 55160 }, { "epoch": 0.79, "grad_norm": 0.56640625, "learning_rate": 2.537950572987744e-05, "loss": 1.0015, "step": 55165 }, { "epoch": 0.79, "grad_norm": 0.515625, "learning_rate": 2.536284106134338e-05, "loss": 0.985, "step": 55170 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.534618107115141e-05, "loss": 0.9552, "step": 55175 }, { "epoch": 0.79, "grad_norm": 0.6015625, "learning_rate": 2.5329525760345875e-05, "loss": 0.9298, "step": 55180 }, { "epoch": 0.79, "grad_norm": 0.56640625, "learning_rate": 2.531287512997067e-05, "loss": 0.8846, "step": 55185 }, { "epoch": 0.79, "grad_norm": 0.66015625, "learning_rate": 2.529622918106954e-05, "loss": 0.9645, "step": 55190 }, { "epoch": 0.79, "grad_norm": 0.58203125, "learning_rate": 2.527958791468582e-05, "loss": 0.9765, "step": 55195 }, { "epoch": 0.79, "grad_norm": 0.63671875, "learning_rate": 2.5262951331862628e-05, "loss": 1.0036, "step": 55200 }, { "epoch": 0.79, "grad_norm": 0.55859375, "learning_rate": 2.5246319433642794e-05, "loss": 1.0886, "step": 55205 }, { "epoch": 0.79, "grad_norm": 0.6328125, "learning_rate": 2.5229692221068767e-05, "loss": 0.9325, "step": 55210 }, { "epoch": 0.79, "grad_norm": 0.5234375, "learning_rate": 2.521306969518279e-05, "loss": 1.0222, "step": 55215 }, { "epoch": 0.79, "grad_norm": 0.5546875, "learning_rate": 2.5196451857026805e-05, "loss": 1.0714, "step": 55220 }, { "epoch": 0.79, "grad_norm": 0.578125, "learning_rate": 2.517983870764238e-05, "loss": 0.9616, "step": 55225 }, { "epoch": 0.79, "grad_norm": 0.59375, "learning_rate": 2.516323024807088e-05, "loss": 0.9596, "step": 55230 }, { "epoch": 0.79, "grad_norm": 0.5546875, "learning_rate": 2.5146626479353375e-05, "loss": 0.8699, "step": 55235 }, { "epoch": 0.79, "grad_norm": 0.578125, "learning_rate": 2.5130027402530564e-05, "loss": 0.8693, "step": 55240 }, { "epoch": 0.79, "grad_norm": 0.55859375, "learning_rate": 2.5113433018642883e-05, "loss": 0.921, "step": 55245 }, { "epoch": 0.79, "grad_norm": 0.66015625, "learning_rate": 2.5096843328730503e-05, "loss": 1.1023, "step": 55250 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.508025833383333e-05, "loss": 0.9598, "step": 55255 }, { "epoch": 0.79, "grad_norm": 0.51953125, "learning_rate": 2.5063678034990855e-05, "loss": 0.9467, "step": 55260 }, { "epoch": 0.79, "grad_norm": 0.57421875, "learning_rate": 2.5047102433242385e-05, "loss": 0.8873, "step": 55265 }, { "epoch": 0.79, "grad_norm": 0.60546875, "learning_rate": 2.5030531529626932e-05, "loss": 0.9983, "step": 55270 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.501396532518311e-05, "loss": 0.9313, "step": 55275 }, { "epoch": 0.79, "grad_norm": 0.53515625, "learning_rate": 2.499740382094937e-05, "loss": 1.0263, "step": 55280 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.4980847017963792e-05, "loss": 1.1104, "step": 55285 }, { "epoch": 0.79, "grad_norm": 0.458984375, "learning_rate": 2.496429491726413e-05, "loss": 1.0691, "step": 55290 }, { "epoch": 0.79, "grad_norm": 0.55078125, "learning_rate": 2.4947747519887922e-05, "loss": 1.0045, "step": 55295 }, { "epoch": 0.79, "grad_norm": 0.54296875, "learning_rate": 2.49312048268724e-05, "loss": 0.7354, "step": 55300 }, { "epoch": 0.79, "grad_norm": 0.6015625, "learning_rate": 2.491466683925443e-05, "loss": 0.9239, "step": 55305 }, { "epoch": 0.79, "grad_norm": 0.51953125, "learning_rate": 2.4898133558070647e-05, "loss": 1.1663, "step": 55310 }, { "epoch": 0.79, "grad_norm": 0.5859375, "learning_rate": 2.4881604984357432e-05, "loss": 0.8713, "step": 55315 }, { "epoch": 0.79, "grad_norm": 0.53125, "learning_rate": 2.4865081119150734e-05, "loss": 0.8766, "step": 55320 }, { "epoch": 0.79, "grad_norm": 0.49609375, "learning_rate": 2.4848561963486318e-05, "loss": 0.9103, "step": 55325 }, { "epoch": 0.79, "grad_norm": 0.5703125, "learning_rate": 2.483204751839966e-05, "loss": 0.9963, "step": 55330 }, { "epoch": 0.79, "grad_norm": 0.53515625, "learning_rate": 2.4815537784925846e-05, "loss": 0.9386, "step": 55335 }, { "epoch": 0.79, "grad_norm": 0.54296875, "learning_rate": 2.4799032764099784e-05, "loss": 0.9169, "step": 55340 }, { "epoch": 0.79, "grad_norm": 0.59375, "learning_rate": 2.4782532456955955e-05, "loss": 0.9657, "step": 55345 }, { "epoch": 0.79, "grad_norm": 0.64453125, "learning_rate": 2.4766036864528696e-05, "loss": 0.8921, "step": 55350 }, { "epoch": 0.79, "grad_norm": 0.484375, "learning_rate": 2.4749545987851897e-05, "loss": 0.8551, "step": 55355 }, { "epoch": 0.79, "grad_norm": 0.5859375, "learning_rate": 2.473305982795926e-05, "loss": 0.9152, "step": 55360 }, { "epoch": 0.79, "grad_norm": 0.6015625, "learning_rate": 2.4716578385884182e-05, "loss": 1.1017, "step": 55365 }, { "epoch": 0.79, "grad_norm": 0.51953125, "learning_rate": 2.4700101662659692e-05, "loss": 1.047, "step": 55370 }, { "epoch": 0.79, "grad_norm": 0.56640625, "learning_rate": 2.4683629659318575e-05, "loss": 0.8259, "step": 55375 }, { "epoch": 0.79, "grad_norm": 0.53125, "learning_rate": 2.466716237689337e-05, "loss": 0.8812, "step": 55380 }, { "epoch": 0.79, "grad_norm": 0.5546875, "learning_rate": 2.465069981641619e-05, "loss": 0.9734, "step": 55385 }, { "epoch": 0.79, "grad_norm": 0.56640625, "learning_rate": 2.463424197891898e-05, "loss": 0.9448, "step": 55390 }, { "epoch": 0.79, "grad_norm": 0.609375, "learning_rate": 2.4617788865433332e-05, "loss": 1.0034, "step": 55395 }, { "epoch": 0.79, "grad_norm": 0.546875, "learning_rate": 2.4601340476990498e-05, "loss": 0.9685, "step": 55400 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.4584896814621518e-05, "loss": 0.979, "step": 55405 }, { "epoch": 0.79, "grad_norm": 0.609375, "learning_rate": 2.456845787935712e-05, "loss": 0.9138, "step": 55410 }, { "epoch": 0.79, "grad_norm": 0.5234375, "learning_rate": 2.4552023672227666e-05, "loss": 1.0635, "step": 55415 }, { "epoch": 0.79, "grad_norm": 0.55078125, "learning_rate": 2.453559419426329e-05, "loss": 0.9942, "step": 55420 }, { "epoch": 0.8, "grad_norm": 0.48828125, "learning_rate": 2.451916944649385e-05, "loss": 0.8659, "step": 55425 }, { "epoch": 0.8, "grad_norm": 0.5390625, "learning_rate": 2.45027494299488e-05, "loss": 0.8897, "step": 55430 }, { "epoch": 0.8, "grad_norm": 0.51171875, "learning_rate": 2.4486334145657408e-05, "loss": 1.0097, "step": 55435 }, { "epoch": 0.8, "grad_norm": 0.60546875, "learning_rate": 2.4469923594648626e-05, "loss": 0.8444, "step": 55440 }, { "epoch": 0.8, "grad_norm": 0.69140625, "learning_rate": 2.445351777795104e-05, "loss": 0.9892, "step": 55445 }, { "epoch": 0.8, "grad_norm": 0.59765625, "learning_rate": 2.4437116696592988e-05, "loss": 0.8611, "step": 55450 }, { "epoch": 0.8, "grad_norm": 0.62109375, "learning_rate": 2.4420720351602512e-05, "loss": 0.912, "step": 55455 }, { "epoch": 0.8, "grad_norm": 0.51171875, "learning_rate": 2.4404328744007387e-05, "loss": 0.8703, "step": 55460 }, { "epoch": 0.8, "grad_norm": 0.578125, "learning_rate": 2.4387941874835008e-05, "loss": 0.9272, "step": 55465 }, { "epoch": 0.8, "grad_norm": 0.5546875, "learning_rate": 2.437155974511255e-05, "loss": 0.8864, "step": 55470 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.435518235586688e-05, "loss": 0.9701, "step": 55475 }, { "epoch": 0.8, "grad_norm": 0.484375, "learning_rate": 2.433880970812451e-05, "loss": 0.9989, "step": 55480 }, { "epoch": 0.8, "grad_norm": 0.578125, "learning_rate": 2.432244180291172e-05, "loss": 0.9397, "step": 55485 }, { "epoch": 0.8, "grad_norm": 0.6015625, "learning_rate": 2.4306078641254493e-05, "loss": 0.9598, "step": 55490 }, { "epoch": 0.8, "grad_norm": 0.5625, "learning_rate": 2.428972022417847e-05, "loss": 1.0931, "step": 55495 }, { "epoch": 0.8, "grad_norm": 0.67578125, "learning_rate": 2.4273366552708975e-05, "loss": 1.1147, "step": 55500 }, { "epoch": 0.8, "grad_norm": 0.609375, "learning_rate": 2.425701762787115e-05, "loss": 0.9353, "step": 55505 }, { "epoch": 0.8, "grad_norm": 0.57421875, "learning_rate": 2.4240673450689676e-05, "loss": 0.9107, "step": 55510 }, { "epoch": 0.8, "grad_norm": 0.53125, "learning_rate": 2.422433402218909e-05, "loss": 0.9921, "step": 55515 }, { "epoch": 0.8, "grad_norm": 0.56640625, "learning_rate": 2.4207999343393574e-05, "loss": 0.9116, "step": 55520 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.4191669415326945e-05, "loss": 0.9435, "step": 55525 }, { "epoch": 0.8, "grad_norm": 0.58203125, "learning_rate": 2.4175344239012822e-05, "loss": 0.9898, "step": 55530 }, { "epoch": 0.8, "grad_norm": 0.671875, "learning_rate": 2.4159023815474502e-05, "loss": 0.932, "step": 55535 }, { "epoch": 0.8, "grad_norm": 0.5078125, "learning_rate": 2.4142708145734916e-05, "loss": 0.8182, "step": 55540 }, { "epoch": 0.8, "grad_norm": 0.546875, "learning_rate": 2.4126397230816778e-05, "loss": 0.8407, "step": 55545 }, { "epoch": 0.8, "grad_norm": 0.5546875, "learning_rate": 2.41100910717425e-05, "loss": 1.0069, "step": 55550 }, { "epoch": 0.8, "grad_norm": 0.515625, "learning_rate": 2.409378966953415e-05, "loss": 0.9041, "step": 55555 }, { "epoch": 0.8, "grad_norm": 0.58984375, "learning_rate": 2.4077493025213483e-05, "loss": 0.893, "step": 55560 }, { "epoch": 0.8, "grad_norm": 0.578125, "learning_rate": 2.4061201139802024e-05, "loss": 1.0359, "step": 55565 }, { "epoch": 0.8, "grad_norm": 0.48046875, "learning_rate": 2.4044914014320995e-05, "loss": 0.8965, "step": 55570 }, { "epoch": 0.8, "grad_norm": 0.51953125, "learning_rate": 2.4028631649791233e-05, "loss": 0.8205, "step": 55575 }, { "epoch": 0.8, "grad_norm": 0.63671875, "learning_rate": 2.4012354047233354e-05, "loss": 0.8566, "step": 55580 }, { "epoch": 0.8, "grad_norm": 0.5859375, "learning_rate": 2.3996081207667698e-05, "loss": 0.8672, "step": 55585 }, { "epoch": 0.8, "grad_norm": 0.57421875, "learning_rate": 2.3979813132114215e-05, "loss": 0.9229, "step": 55590 }, { "epoch": 0.8, "grad_norm": 0.6015625, "learning_rate": 2.3963549821592613e-05, "loss": 1.1278, "step": 55595 }, { "epoch": 0.8, "grad_norm": 0.69921875, "learning_rate": 2.3947291277122362e-05, "loss": 1.1218, "step": 55600 }, { "epoch": 0.8, "grad_norm": 0.62109375, "learning_rate": 2.3931037499722466e-05, "loss": 1.045, "step": 55605 }, { "epoch": 0.8, "grad_norm": 0.4609375, "learning_rate": 2.3914788490411765e-05, "loss": 0.9761, "step": 55610 }, { "epoch": 0.8, "grad_norm": 0.625, "learning_rate": 2.3898544250208808e-05, "loss": 0.9598, "step": 55615 }, { "epoch": 0.8, "grad_norm": 0.61328125, "learning_rate": 2.3882304780131738e-05, "loss": 0.9488, "step": 55620 }, { "epoch": 0.8, "grad_norm": 0.58203125, "learning_rate": 2.38660700811985e-05, "loss": 0.888, "step": 55625 }, { "epoch": 0.8, "grad_norm": 0.625, "learning_rate": 2.3849840154426716e-05, "loss": 1.1086, "step": 55630 }, { "epoch": 0.8, "grad_norm": 0.55859375, "learning_rate": 2.3833615000833666e-05, "loss": 0.9319, "step": 55635 }, { "epoch": 0.8, "grad_norm": 0.5859375, "learning_rate": 2.381739462143636e-05, "loss": 0.9932, "step": 55640 }, { "epoch": 0.8, "grad_norm": 0.5546875, "learning_rate": 2.380117901725156e-05, "loss": 0.7757, "step": 55645 }, { "epoch": 0.8, "grad_norm": 0.57421875, "learning_rate": 2.378496818929561e-05, "loss": 1.0451, "step": 55650 }, { "epoch": 0.8, "grad_norm": 0.6171875, "learning_rate": 2.3768762138584688e-05, "loss": 0.9445, "step": 55655 }, { "epoch": 0.8, "grad_norm": 0.51953125, "learning_rate": 2.3752560866134544e-05, "loss": 0.8609, "step": 55660 }, { "epoch": 0.8, "grad_norm": 0.53515625, "learning_rate": 2.3736364372960763e-05, "loss": 0.9798, "step": 55665 }, { "epoch": 0.8, "grad_norm": 0.765625, "learning_rate": 2.3720172660078478e-05, "loss": 1.0855, "step": 55670 }, { "epoch": 0.8, "grad_norm": 0.63671875, "learning_rate": 2.370398572850264e-05, "loss": 1.0424, "step": 55675 }, { "epoch": 0.8, "grad_norm": 0.474609375, "learning_rate": 2.3687803579247915e-05, "loss": 0.9406, "step": 55680 }, { "epoch": 0.8, "grad_norm": 0.5078125, "learning_rate": 2.3671626213328534e-05, "loss": 1.0556, "step": 55685 }, { "epoch": 0.8, "grad_norm": 0.5390625, "learning_rate": 2.365545363175856e-05, "loss": 1.0199, "step": 55690 }, { "epoch": 0.8, "grad_norm": 0.6484375, "learning_rate": 2.363928583555173e-05, "loss": 0.9645, "step": 55695 }, { "epoch": 0.8, "grad_norm": 0.6015625, "learning_rate": 2.36231228257214e-05, "loss": 0.9919, "step": 55700 }, { "epoch": 0.8, "grad_norm": 0.55859375, "learning_rate": 2.3606964603280746e-05, "loss": 0.9985, "step": 55705 }, { "epoch": 0.8, "grad_norm": 0.6484375, "learning_rate": 2.3590811169242554e-05, "loss": 0.9128, "step": 55710 }, { "epoch": 0.8, "grad_norm": 0.51953125, "learning_rate": 2.3574662524619318e-05, "loss": 1.0251, "step": 55715 }, { "epoch": 0.8, "grad_norm": 0.546875, "learning_rate": 2.3558518670423268e-05, "loss": 0.9368, "step": 55720 }, { "epoch": 0.8, "grad_norm": 0.5234375, "learning_rate": 2.3542379607666365e-05, "loss": 1.0604, "step": 55725 }, { "epoch": 0.8, "grad_norm": 0.4921875, "learning_rate": 2.3526245337360153e-05, "loss": 1.0838, "step": 55730 }, { "epoch": 0.8, "grad_norm": 0.56640625, "learning_rate": 2.351011586051598e-05, "loss": 0.9904, "step": 55735 }, { "epoch": 0.8, "grad_norm": 0.546875, "learning_rate": 2.349399117814488e-05, "loss": 1.0388, "step": 55740 }, { "epoch": 0.8, "grad_norm": 0.60546875, "learning_rate": 2.3477871291257525e-05, "loss": 1.089, "step": 55745 }, { "epoch": 0.8, "grad_norm": 0.51171875, "learning_rate": 2.346175620086435e-05, "loss": 0.7987, "step": 55750 }, { "epoch": 0.8, "grad_norm": 0.6015625, "learning_rate": 2.3445645907975488e-05, "loss": 1.0289, "step": 55755 }, { "epoch": 0.8, "grad_norm": 0.625, "learning_rate": 2.3429540413600736e-05, "loss": 0.9907, "step": 55760 }, { "epoch": 0.8, "grad_norm": 0.57421875, "learning_rate": 2.3413439718749562e-05, "loss": 0.9355, "step": 55765 }, { "epoch": 0.8, "grad_norm": 0.5546875, "learning_rate": 2.3397343824431216e-05, "loss": 0.9169, "step": 55770 }, { "epoch": 0.8, "grad_norm": 0.53515625, "learning_rate": 2.3381252731654633e-05, "loss": 0.97, "step": 55775 }, { "epoch": 0.8, "grad_norm": 0.474609375, "learning_rate": 2.3365166441428366e-05, "loss": 0.9656, "step": 55780 }, { "epoch": 0.8, "grad_norm": 0.546875, "learning_rate": 2.3349084954760735e-05, "loss": 1.0667, "step": 55785 }, { "epoch": 0.8, "grad_norm": 0.5390625, "learning_rate": 2.3333008272659784e-05, "loss": 0.8738, "step": 55790 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.331693639613317e-05, "loss": 1.0558, "step": 55795 }, { "epoch": 0.8, "grad_norm": 0.59375, "learning_rate": 2.3300869326188313e-05, "loss": 0.9356, "step": 55800 }, { "epoch": 0.8, "grad_norm": 0.53125, "learning_rate": 2.328480706383236e-05, "loss": 0.8565, "step": 55805 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.3268749610072062e-05, "loss": 0.884, "step": 55810 }, { "epoch": 0.8, "grad_norm": 0.5625, "learning_rate": 2.325269696591389e-05, "loss": 0.8756, "step": 55815 }, { "epoch": 0.8, "grad_norm": 0.48046875, "learning_rate": 2.323664913236412e-05, "loss": 0.7658, "step": 55820 }, { "epoch": 0.8, "grad_norm": 0.5703125, "learning_rate": 2.322060611042858e-05, "loss": 0.882, "step": 55825 }, { "epoch": 0.8, "grad_norm": 0.50390625, "learning_rate": 2.3204567901112895e-05, "loss": 0.9241, "step": 55830 }, { "epoch": 0.8, "grad_norm": 0.65234375, "learning_rate": 2.3188534505422377e-05, "loss": 1.0328, "step": 55835 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.3172505924361976e-05, "loss": 0.8019, "step": 55840 }, { "epoch": 0.8, "grad_norm": 0.4921875, "learning_rate": 2.31564821589364e-05, "loss": 0.9576, "step": 55845 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.3140463210150065e-05, "loss": 1.0144, "step": 55850 }, { "epoch": 0.8, "grad_norm": 0.55859375, "learning_rate": 2.3124449079007004e-05, "loss": 0.8785, "step": 55855 }, { "epoch": 0.8, "grad_norm": 0.59765625, "learning_rate": 2.3108439766511038e-05, "loss": 1.0206, "step": 55860 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.3092435273665657e-05, "loss": 0.9757, "step": 55865 }, { "epoch": 0.8, "grad_norm": 0.51953125, "learning_rate": 2.3076435601474024e-05, "loss": 0.9286, "step": 55870 }, { "epoch": 0.8, "grad_norm": 0.49609375, "learning_rate": 2.3060440750938994e-05, "loss": 0.9818, "step": 55875 }, { "epoch": 0.8, "grad_norm": 0.61328125, "learning_rate": 2.304445072306316e-05, "loss": 0.8368, "step": 55880 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.3028465518848828e-05, "loss": 0.9012, "step": 55885 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.301248513929791e-05, "loss": 1.0057, "step": 55890 }, { "epoch": 0.8, "grad_norm": 0.5859375, "learning_rate": 2.2996509585412117e-05, "loss": 0.8797, "step": 55895 }, { "epoch": 0.8, "grad_norm": 0.484375, "learning_rate": 2.2980538858192825e-05, "loss": 0.9028, "step": 55900 }, { "epoch": 0.8, "grad_norm": 0.56640625, "learning_rate": 2.296457295864104e-05, "loss": 0.9417, "step": 55905 }, { "epoch": 0.8, "grad_norm": 0.58984375, "learning_rate": 2.2948611887757566e-05, "loss": 0.9962, "step": 55910 }, { "epoch": 0.8, "grad_norm": 0.5625, "learning_rate": 2.2932655646542876e-05, "loss": 0.9032, "step": 55915 }, { "epoch": 0.8, "grad_norm": 0.6484375, "learning_rate": 2.29167042359971e-05, "loss": 1.0349, "step": 55920 }, { "epoch": 0.8, "grad_norm": 0.5, "learning_rate": 2.2900757657120075e-05, "loss": 0.9714, "step": 55925 }, { "epoch": 0.8, "grad_norm": 0.6953125, "learning_rate": 2.288481591091136e-05, "loss": 1.1025, "step": 55930 }, { "epoch": 0.8, "grad_norm": 0.5078125, "learning_rate": 2.2868878998370247e-05, "loss": 0.916, "step": 55935 }, { "epoch": 0.8, "grad_norm": 0.578125, "learning_rate": 2.2852946920495612e-05, "loss": 0.92, "step": 55940 }, { "epoch": 0.8, "grad_norm": 0.6015625, "learning_rate": 2.2837019678286143e-05, "loss": 0.9217, "step": 55945 }, { "epoch": 0.8, "grad_norm": 0.63671875, "learning_rate": 2.2821097272740143e-05, "loss": 0.9212, "step": 55950 }, { "epoch": 0.8, "grad_norm": 0.55078125, "learning_rate": 2.2805179704855674e-05, "loss": 0.8215, "step": 55955 }, { "epoch": 0.8, "grad_norm": 0.5546875, "learning_rate": 2.2789266975630474e-05, "loss": 0.967, "step": 55960 }, { "epoch": 0.8, "grad_norm": 0.62890625, "learning_rate": 2.2773359086061928e-05, "loss": 0.8872, "step": 55965 }, { "epoch": 0.8, "grad_norm": 0.57421875, "learning_rate": 2.275745603714723e-05, "loss": 0.9856, "step": 55970 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.274155782988312e-05, "loss": 0.8701, "step": 55975 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.2725664465266182e-05, "loss": 0.9997, "step": 55980 }, { "epoch": 0.8, "grad_norm": 0.55859375, "learning_rate": 2.2709775944292576e-05, "loss": 1.0167, "step": 55985 }, { "epoch": 0.8, "grad_norm": 0.640625, "learning_rate": 2.269389226795825e-05, "loss": 0.924, "step": 55990 }, { "epoch": 0.8, "grad_norm": 0.515625, "learning_rate": 2.2678013437258815e-05, "loss": 1.0758, "step": 55995 }, { "epoch": 0.8, "grad_norm": 0.55859375, "learning_rate": 2.2662139453189547e-05, "loss": 0.9348, "step": 56000 }, { "epoch": 0.8, "grad_norm": 0.55078125, "learning_rate": 2.2646270316745445e-05, "loss": 0.8797, "step": 56005 }, { "epoch": 0.8, "grad_norm": 0.53515625, "learning_rate": 2.2630406028921258e-05, "loss": 0.9258, "step": 56010 }, { "epoch": 0.8, "grad_norm": 0.90625, "learning_rate": 2.2614546590711295e-05, "loss": 1.0569, "step": 56015 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.259869200310972e-05, "loss": 1.0872, "step": 56020 }, { "epoch": 0.8, "grad_norm": 0.53125, "learning_rate": 2.258284226711026e-05, "loss": 0.9533, "step": 56025 }, { "epoch": 0.8, "grad_norm": 0.58203125, "learning_rate": 2.2566997383706445e-05, "loss": 0.9205, "step": 56030 }, { "epoch": 0.8, "grad_norm": 0.51953125, "learning_rate": 2.2551157353891393e-05, "loss": 1.0329, "step": 56035 }, { "epoch": 0.8, "grad_norm": 0.58203125, "learning_rate": 2.2535322178658003e-05, "loss": 0.7919, "step": 56040 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.2519491858998875e-05, "loss": 0.9221, "step": 56045 }, { "epoch": 0.8, "grad_norm": 0.53515625, "learning_rate": 2.2503666395906208e-05, "loss": 0.9291, "step": 56050 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.2487845790371998e-05, "loss": 0.9941, "step": 56055 }, { "epoch": 0.8, "grad_norm": 0.59765625, "learning_rate": 2.2472030043387914e-05, "loss": 0.9139, "step": 56060 }, { "epoch": 0.8, "grad_norm": 0.490234375, "learning_rate": 2.2456219155945256e-05, "loss": 0.9235, "step": 56065 }, { "epoch": 0.8, "grad_norm": 0.6015625, "learning_rate": 2.244041312903511e-05, "loss": 0.9313, "step": 56070 }, { "epoch": 0.8, "grad_norm": 0.59765625, "learning_rate": 2.2424611963648244e-05, "loss": 0.9421, "step": 56075 }, { "epoch": 0.8, "grad_norm": 0.5625, "learning_rate": 2.2408815660774995e-05, "loss": 0.9214, "step": 56080 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.2393024221405555e-05, "loss": 0.999, "step": 56085 }, { "epoch": 0.8, "grad_norm": 0.5546875, "learning_rate": 2.237723764652977e-05, "loss": 0.8612, "step": 56090 }, { "epoch": 0.8, "grad_norm": 0.56640625, "learning_rate": 2.236145593713711e-05, "loss": 0.9955, "step": 56095 }, { "epoch": 0.8, "grad_norm": 0.455078125, "learning_rate": 2.234567909421681e-05, "loss": 0.8391, "step": 56100 }, { "epoch": 0.8, "grad_norm": 0.546875, "learning_rate": 2.2329907118757807e-05, "loss": 0.9794, "step": 56105 }, { "epoch": 0.8, "grad_norm": 0.55859375, "learning_rate": 2.2314140011748662e-05, "loss": 1.0065, "step": 56110 }, { "epoch": 0.8, "grad_norm": 0.59375, "learning_rate": 2.2298377774177702e-05, "loss": 1.0236, "step": 56115 }, { "epoch": 0.81, "grad_norm": 0.486328125, "learning_rate": 2.228262040703294e-05, "loss": 1.0006, "step": 56120 }, { "epoch": 0.81, "grad_norm": 0.546875, "learning_rate": 2.2266867911302048e-05, "loss": 0.7434, "step": 56125 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.2251120287972384e-05, "loss": 0.8261, "step": 56130 }, { "epoch": 0.81, "grad_norm": 0.5546875, "learning_rate": 2.2235377538031033e-05, "loss": 0.8794, "step": 56135 }, { "epoch": 0.81, "grad_norm": 0.59375, "learning_rate": 2.221963966246482e-05, "loss": 0.9186, "step": 56140 }, { "epoch": 0.81, "grad_norm": 0.58203125, "learning_rate": 2.2203906662260156e-05, "loss": 0.6777, "step": 56145 }, { "epoch": 0.81, "grad_norm": 0.52734375, "learning_rate": 2.2188178538403213e-05, "loss": 0.9449, "step": 56150 }, { "epoch": 0.81, "grad_norm": 0.52734375, "learning_rate": 2.21724552918799e-05, "loss": 0.9514, "step": 56155 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.2156736923675693e-05, "loss": 0.8412, "step": 56160 }, { "epoch": 0.81, "grad_norm": 0.48828125, "learning_rate": 2.2141023434775866e-05, "loss": 0.9219, "step": 56165 }, { "epoch": 0.81, "grad_norm": 0.5390625, "learning_rate": 2.21253148261654e-05, "loss": 0.8796, "step": 56170 }, { "epoch": 0.81, "grad_norm": 0.498046875, "learning_rate": 2.2109611098828863e-05, "loss": 0.9103, "step": 56175 }, { "epoch": 0.81, "grad_norm": 0.48828125, "learning_rate": 2.209391225375064e-05, "loss": 0.8296, "step": 56180 }, { "epoch": 0.81, "grad_norm": 0.609375, "learning_rate": 2.207821829191472e-05, "loss": 1.0703, "step": 56185 }, { "epoch": 0.81, "grad_norm": 0.5390625, "learning_rate": 2.2062529214304804e-05, "loss": 1.0274, "step": 56190 }, { "epoch": 0.81, "grad_norm": 0.5546875, "learning_rate": 2.2046845021904316e-05, "loss": 1.0084, "step": 56195 }, { "epoch": 0.81, "grad_norm": 0.478515625, "learning_rate": 2.203116571569639e-05, "loss": 0.9722, "step": 56200 }, { "epoch": 0.81, "grad_norm": 0.5546875, "learning_rate": 2.201549129666377e-05, "loss": 0.8741, "step": 56205 }, { "epoch": 0.81, "grad_norm": 0.5078125, "learning_rate": 2.1999821765788965e-05, "loss": 1.1016, "step": 56210 }, { "epoch": 0.81, "grad_norm": 0.55078125, "learning_rate": 2.1984157124054207e-05, "loss": 0.8361, "step": 56215 }, { "epoch": 0.81, "grad_norm": 0.5703125, "learning_rate": 2.1968497372441288e-05, "loss": 0.8822, "step": 56220 }, { "epoch": 0.81, "grad_norm": 0.4765625, "learning_rate": 2.195284251193184e-05, "loss": 0.9323, "step": 56225 }, { "epoch": 0.81, "grad_norm": 0.59765625, "learning_rate": 2.1937192543507136e-05, "loss": 0.9843, "step": 56230 }, { "epoch": 0.81, "grad_norm": 0.5078125, "learning_rate": 2.1921547468148105e-05, "loss": 0.8911, "step": 56235 }, { "epoch": 0.81, "grad_norm": 0.6328125, "learning_rate": 2.190590728683537e-05, "loss": 1.0064, "step": 56240 }, { "epoch": 0.81, "grad_norm": 0.57421875, "learning_rate": 2.1890272000549317e-05, "loss": 1.0378, "step": 56245 }, { "epoch": 0.81, "grad_norm": 0.5859375, "learning_rate": 2.1874641610270008e-05, "loss": 0.9608, "step": 56250 }, { "epoch": 0.81, "grad_norm": 0.484375, "learning_rate": 2.1859016116977106e-05, "loss": 0.8337, "step": 56255 }, { "epoch": 0.81, "grad_norm": 0.515625, "learning_rate": 2.1843395521650056e-05, "loss": 0.9049, "step": 56260 }, { "epoch": 0.81, "grad_norm": 0.546875, "learning_rate": 2.1827779825268036e-05, "loss": 0.9015, "step": 56265 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.181216902880977e-05, "loss": 0.9828, "step": 56270 }, { "epoch": 0.81, "grad_norm": 0.6171875, "learning_rate": 2.179656313325379e-05, "loss": 0.9996, "step": 56275 }, { "epoch": 0.81, "grad_norm": 0.51953125, "learning_rate": 2.178096213957834e-05, "loss": 1.0021, "step": 56280 }, { "epoch": 0.81, "grad_norm": 0.5703125, "learning_rate": 2.176536604876126e-05, "loss": 1.0434, "step": 56285 }, { "epoch": 0.81, "grad_norm": 0.47265625, "learning_rate": 2.1749774861780115e-05, "loss": 1.0075, "step": 56290 }, { "epoch": 0.81, "grad_norm": 0.515625, "learning_rate": 2.1734188579612225e-05, "loss": 0.8832, "step": 56295 }, { "epoch": 0.81, "grad_norm": 0.4765625, "learning_rate": 2.171860720323451e-05, "loss": 0.857, "step": 56300 }, { "epoch": 0.81, "grad_norm": 0.515625, "learning_rate": 2.1703030733623642e-05, "loss": 0.8859, "step": 56305 }, { "epoch": 0.81, "grad_norm": 0.515625, "learning_rate": 2.1687459171756008e-05, "loss": 0.8828, "step": 56310 }, { "epoch": 0.81, "grad_norm": 0.53125, "learning_rate": 2.1671892518607607e-05, "loss": 0.9719, "step": 56315 }, { "epoch": 0.81, "grad_norm": 0.55859375, "learning_rate": 2.1656330775154175e-05, "loss": 0.9589, "step": 56320 }, { "epoch": 0.81, "grad_norm": 0.625, "learning_rate": 2.1640773942371195e-05, "loss": 1.0611, "step": 56325 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.1625222021233714e-05, "loss": 0.9626, "step": 56330 }, { "epoch": 0.81, "grad_norm": 0.5703125, "learning_rate": 2.1609675012716613e-05, "loss": 0.9591, "step": 56335 }, { "epoch": 0.81, "grad_norm": 0.53125, "learning_rate": 2.159413291779433e-05, "loss": 0.918, "step": 56340 }, { "epoch": 0.81, "grad_norm": 0.6484375, "learning_rate": 2.157859573744112e-05, "loss": 1.0512, "step": 56345 }, { "epoch": 0.81, "grad_norm": 0.609375, "learning_rate": 2.1563063472630818e-05, "loss": 1.0652, "step": 56350 }, { "epoch": 0.81, "grad_norm": 0.5, "learning_rate": 2.1547536124337032e-05, "loss": 0.9323, "step": 56355 }, { "epoch": 0.81, "grad_norm": 0.5546875, "learning_rate": 2.153201369353306e-05, "loss": 0.9307, "step": 56360 }, { "epoch": 0.81, "grad_norm": 0.53125, "learning_rate": 2.15164961811918e-05, "loss": 0.9425, "step": 56365 }, { "epoch": 0.81, "grad_norm": 0.58203125, "learning_rate": 2.150098358828595e-05, "loss": 0.9132, "step": 56370 }, { "epoch": 0.81, "grad_norm": 0.58203125, "learning_rate": 2.148547591578788e-05, "loss": 0.8788, "step": 56375 }, { "epoch": 0.81, "grad_norm": 0.52734375, "learning_rate": 2.1469973164669567e-05, "loss": 0.8095, "step": 56380 }, { "epoch": 0.81, "grad_norm": 0.54296875, "learning_rate": 2.1454475335902778e-05, "loss": 1.05, "step": 56385 }, { "epoch": 0.81, "grad_norm": 0.6015625, "learning_rate": 2.1438982430458986e-05, "loss": 0.9308, "step": 56390 }, { "epoch": 0.81, "grad_norm": 0.58984375, "learning_rate": 2.14234944493092e-05, "loss": 1.1513, "step": 56395 }, { "epoch": 0.81, "grad_norm": 0.703125, "learning_rate": 2.1408011393424265e-05, "loss": 1.1842, "step": 56400 }, { "epoch": 0.81, "grad_norm": 0.494140625, "learning_rate": 2.1392533263774716e-05, "loss": 1.0473, "step": 56405 }, { "epoch": 0.81, "grad_norm": 0.6328125, "learning_rate": 2.1377060061330677e-05, "loss": 1.0914, "step": 56410 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.1361591787062064e-05, "loss": 0.9997, "step": 56415 }, { "epoch": 0.81, "grad_norm": 0.5, "learning_rate": 2.1346128441938463e-05, "loss": 0.8518, "step": 56420 }, { "epoch": 0.81, "grad_norm": 0.62890625, "learning_rate": 2.133067002692908e-05, "loss": 0.9078, "step": 56425 }, { "epoch": 0.81, "grad_norm": 0.6328125, "learning_rate": 2.13152165430029e-05, "loss": 1.0808, "step": 56430 }, { "epoch": 0.81, "grad_norm": 0.56640625, "learning_rate": 2.129976799112858e-05, "loss": 0.9562, "step": 56435 }, { "epoch": 0.81, "grad_norm": 0.68359375, "learning_rate": 2.1284324372274454e-05, "loss": 1.0333, "step": 56440 }, { "epoch": 0.81, "grad_norm": 0.5546875, "learning_rate": 2.1268885687408478e-05, "loss": 0.9312, "step": 56445 }, { "epoch": 0.81, "grad_norm": 0.4765625, "learning_rate": 2.1253451937498426e-05, "loss": 0.8903, "step": 56450 }, { "epoch": 0.81, "grad_norm": 0.51171875, "learning_rate": 2.123802312351172e-05, "loss": 1.0886, "step": 56455 }, { "epoch": 0.81, "grad_norm": 0.57421875, "learning_rate": 2.12225992464154e-05, "loss": 0.9818, "step": 56460 }, { "epoch": 0.81, "grad_norm": 0.51171875, "learning_rate": 2.1207180307176266e-05, "loss": 0.9473, "step": 56465 }, { "epoch": 0.81, "grad_norm": 0.56640625, "learning_rate": 2.1191766306760852e-05, "loss": 0.9522, "step": 56470 }, { "epoch": 0.81, "grad_norm": 0.5390625, "learning_rate": 2.1176357246135247e-05, "loss": 0.819, "step": 56475 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.1160953126265336e-05, "loss": 1.0077, "step": 56480 }, { "epoch": 0.81, "grad_norm": 0.5, "learning_rate": 2.11455539481167e-05, "loss": 0.9595, "step": 56485 }, { "epoch": 0.81, "grad_norm": 0.55859375, "learning_rate": 2.113015971265454e-05, "loss": 0.9976, "step": 56490 }, { "epoch": 0.81, "grad_norm": 0.66015625, "learning_rate": 2.111477042084381e-05, "loss": 1.0494, "step": 56495 }, { "epoch": 0.81, "grad_norm": 0.48828125, "learning_rate": 2.1099386073649106e-05, "loss": 0.9497, "step": 56500 }, { "epoch": 0.81, "grad_norm": 0.578125, "learning_rate": 2.1084006672034727e-05, "loss": 1.1865, "step": 56505 }, { "epoch": 0.81, "grad_norm": 0.51953125, "learning_rate": 2.106863221696468e-05, "loss": 0.9653, "step": 56510 }, { "epoch": 0.81, "grad_norm": 0.51953125, "learning_rate": 2.10532627094027e-05, "loss": 0.9337, "step": 56515 }, { "epoch": 0.81, "grad_norm": 0.6328125, "learning_rate": 2.1037898150312087e-05, "loss": 0.8808, "step": 56520 }, { "epoch": 0.81, "grad_norm": 0.478515625, "learning_rate": 2.1022538540655955e-05, "loss": 0.9955, "step": 56525 }, { "epoch": 0.81, "grad_norm": 0.56640625, "learning_rate": 2.1007183881397075e-05, "loss": 1.0113, "step": 56530 }, { "epoch": 0.81, "grad_norm": 0.51953125, "learning_rate": 2.0991834173497848e-05, "loss": 0.9069, "step": 56535 }, { "epoch": 0.81, "grad_norm": 0.51171875, "learning_rate": 2.0976489417920443e-05, "loss": 0.8756, "step": 56540 }, { "epoch": 0.81, "grad_norm": 0.64453125, "learning_rate": 2.0961149615626706e-05, "loss": 0.9911, "step": 56545 }, { "epoch": 0.81, "grad_norm": 0.6171875, "learning_rate": 2.094581476757813e-05, "loss": 0.896, "step": 56550 }, { "epoch": 0.81, "grad_norm": 0.49609375, "learning_rate": 2.09304848747359e-05, "loss": 0.87, "step": 56555 }, { "epoch": 0.81, "grad_norm": 0.48828125, "learning_rate": 2.0915159938060926e-05, "loss": 0.9616, "step": 56560 }, { "epoch": 0.81, "grad_norm": 0.58984375, "learning_rate": 2.0899839958513812e-05, "loss": 0.8914, "step": 56565 }, { "epoch": 0.81, "grad_norm": 0.6015625, "learning_rate": 2.0884524937054805e-05, "loss": 0.8992, "step": 56570 }, { "epoch": 0.81, "grad_norm": 0.55078125, "learning_rate": 2.086921487464387e-05, "loss": 0.9412, "step": 56575 }, { "epoch": 0.81, "grad_norm": 0.578125, "learning_rate": 2.08539097722407e-05, "loss": 1.0388, "step": 56580 }, { "epoch": 0.81, "grad_norm": 0.5546875, "learning_rate": 2.0838609630804584e-05, "loss": 1.0003, "step": 56585 }, { "epoch": 0.81, "grad_norm": 1.046875, "learning_rate": 2.0823314451294563e-05, "loss": 0.969, "step": 56590 }, { "epoch": 0.81, "grad_norm": 0.50390625, "learning_rate": 2.0808024234669398e-05, "loss": 0.9585, "step": 56595 }, { "epoch": 0.81, "grad_norm": 0.6640625, "learning_rate": 2.0792738981887473e-05, "loss": 0.9976, "step": 56600 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.0777458693906837e-05, "loss": 0.8886, "step": 56605 }, { "epoch": 0.81, "grad_norm": 0.6640625, "learning_rate": 2.0762183371685328e-05, "loss": 1.0712, "step": 56610 }, { "epoch": 0.81, "grad_norm": 0.55859375, "learning_rate": 2.0746913016180435e-05, "loss": 0.9209, "step": 56615 }, { "epoch": 0.81, "grad_norm": 0.5390625, "learning_rate": 2.0731647628349273e-05, "loss": 0.9952, "step": 56620 }, { "epoch": 0.81, "grad_norm": 0.62890625, "learning_rate": 2.0716387209148737e-05, "loss": 0.9298, "step": 56625 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.070113175953532e-05, "loss": 0.9394, "step": 56630 }, { "epoch": 0.81, "grad_norm": 0.58984375, "learning_rate": 2.068588128046528e-05, "loss": 1.0072, "step": 56635 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.0670635772894553e-05, "loss": 0.9542, "step": 56640 }, { "epoch": 0.81, "grad_norm": 0.546875, "learning_rate": 2.0655395237778708e-05, "loss": 0.9364, "step": 56645 }, { "epoch": 0.81, "grad_norm": 0.53125, "learning_rate": 2.064015967607308e-05, "loss": 0.9768, "step": 56650 }, { "epoch": 0.81, "grad_norm": 0.6015625, "learning_rate": 2.0624929088732592e-05, "loss": 1.021, "step": 56655 }, { "epoch": 0.81, "grad_norm": 0.7265625, "learning_rate": 2.0609703476711984e-05, "loss": 1.0188, "step": 56660 }, { "epoch": 0.81, "grad_norm": 0.53125, "learning_rate": 2.0594482840965547e-05, "loss": 0.791, "step": 56665 }, { "epoch": 0.81, "grad_norm": 0.58984375, "learning_rate": 2.057926718244737e-05, "loss": 1.0041, "step": 56670 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.0564056502111195e-05, "loss": 1.1466, "step": 56675 }, { "epoch": 0.81, "grad_norm": 0.68359375, "learning_rate": 2.0548850800910413e-05, "loss": 1.0553, "step": 56680 }, { "epoch": 0.81, "grad_norm": 0.59375, "learning_rate": 2.053365007979814e-05, "loss": 0.9049, "step": 56685 }, { "epoch": 0.81, "grad_norm": 0.50390625, "learning_rate": 2.051845433972721e-05, "loss": 0.8279, "step": 56690 }, { "epoch": 0.81, "grad_norm": 0.63671875, "learning_rate": 2.0503263581650067e-05, "loss": 1.0301, "step": 56695 }, { "epoch": 0.81, "grad_norm": 0.57421875, "learning_rate": 2.0488077806518902e-05, "loss": 1.1201, "step": 56700 }, { "epoch": 0.81, "grad_norm": 0.54296875, "learning_rate": 2.0472897015285597e-05, "loss": 0.9476, "step": 56705 }, { "epoch": 0.81, "grad_norm": 0.5078125, "learning_rate": 2.0457721208901682e-05, "loss": 0.9026, "step": 56710 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.044255038831837e-05, "loss": 0.8703, "step": 56715 }, { "epoch": 0.81, "grad_norm": 0.515625, "learning_rate": 2.0427384554486595e-05, "loss": 1.0137, "step": 56720 }, { "epoch": 0.81, "grad_norm": 0.5390625, "learning_rate": 2.0412223708357025e-05, "loss": 0.9533, "step": 56725 }, { "epoch": 0.81, "grad_norm": 0.58984375, "learning_rate": 2.039706785087988e-05, "loss": 0.9821, "step": 56730 }, { "epoch": 0.81, "grad_norm": 0.546875, "learning_rate": 2.038191698300519e-05, "loss": 0.9189, "step": 56735 }, { "epoch": 0.81, "grad_norm": 0.62109375, "learning_rate": 2.0366771105682637e-05, "loss": 1.097, "step": 56740 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.035163021986154e-05, "loss": 0.8491, "step": 56745 }, { "epoch": 0.81, "grad_norm": 0.5078125, "learning_rate": 2.0336494326490985e-05, "loss": 0.8807, "step": 56750 }, { "epoch": 0.81, "grad_norm": 0.54296875, "learning_rate": 2.0321363426519734e-05, "loss": 0.9252, "step": 56755 }, { "epoch": 0.81, "grad_norm": 0.54296875, "learning_rate": 2.030623752089612e-05, "loss": 0.9657, "step": 56760 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.0291116610568304e-05, "loss": 0.9351, "step": 56765 }, { "epoch": 0.81, "grad_norm": 0.65625, "learning_rate": 2.027600069648411e-05, "loss": 1.0192, "step": 56770 }, { "epoch": 0.81, "grad_norm": 0.55859375, "learning_rate": 2.0260889779590962e-05, "loss": 0.8669, "step": 56775 }, { "epoch": 0.81, "grad_norm": 0.54296875, "learning_rate": 2.0245783860836053e-05, "loss": 0.7708, "step": 56780 }, { "epoch": 0.81, "grad_norm": 0.6015625, "learning_rate": 2.023068294116628e-05, "loss": 0.9595, "step": 56785 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.0215587021528116e-05, "loss": 0.996, "step": 56790 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.0200496102867818e-05, "loss": 0.9928, "step": 56795 }, { "epoch": 0.81, "grad_norm": 0.58984375, "learning_rate": 2.018541018613135e-05, "loss": 0.956, "step": 56800 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.017032927226423e-05, "loss": 0.994, "step": 56805 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.0155253362211822e-05, "loss": 1.0376, "step": 56810 }, { "epoch": 0.81, "grad_norm": 0.53125, "learning_rate": 2.0140182456919053e-05, "loss": 1.0575, "step": 56815 }, { "epoch": 0.82, "grad_norm": 0.59765625, "learning_rate": 2.0125116557330615e-05, "loss": 0.8795, "step": 56820 }, { "epoch": 0.82, "grad_norm": 0.478515625, "learning_rate": 2.0110055664390813e-05, "loss": 0.876, "step": 56825 }, { "epoch": 0.82, "grad_norm": 0.55078125, "learning_rate": 2.009499977904372e-05, "loss": 1.074, "step": 56830 }, { "epoch": 0.82, "grad_norm": 0.61328125, "learning_rate": 2.007994890223306e-05, "loss": 1.0949, "step": 56835 }, { "epoch": 0.82, "grad_norm": 0.58203125, "learning_rate": 2.0064903034902206e-05, "loss": 0.8713, "step": 56840 }, { "epoch": 0.82, "grad_norm": 0.671875, "learning_rate": 2.0049862177994262e-05, "loss": 0.8938, "step": 56845 }, { "epoch": 0.82, "grad_norm": 0.59765625, "learning_rate": 2.003482633245205e-05, "loss": 0.9963, "step": 56850 }, { "epoch": 0.82, "grad_norm": 0.65234375, "learning_rate": 2.0019795499217953e-05, "loss": 0.9014, "step": 56855 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 2.0004769679234203e-05, "loss": 0.8635, "step": 56860 }, { "epoch": 0.82, "grad_norm": 0.55078125, "learning_rate": 1.998974887344259e-05, "loss": 1.0966, "step": 56865 }, { "epoch": 0.82, "grad_norm": 0.54296875, "learning_rate": 1.9974733082784623e-05, "loss": 0.8467, "step": 56870 }, { "epoch": 0.82, "grad_norm": 0.55078125, "learning_rate": 1.995972230820152e-05, "loss": 0.8615, "step": 56875 }, { "epoch": 0.82, "grad_norm": 0.5625, "learning_rate": 1.9944716550634214e-05, "loss": 0.8607, "step": 56880 }, { "epoch": 0.82, "grad_norm": 0.6640625, "learning_rate": 1.992971581102322e-05, "loss": 1.0601, "step": 56885 }, { "epoch": 0.82, "grad_norm": 0.60546875, "learning_rate": 1.9914720090308826e-05, "loss": 0.981, "step": 56890 }, { "epoch": 0.82, "grad_norm": 0.63671875, "learning_rate": 1.9899729389431022e-05, "loss": 0.9211, "step": 56895 }, { "epoch": 0.82, "grad_norm": 0.59375, "learning_rate": 1.988474370932937e-05, "loss": 0.9536, "step": 56900 }, { "epoch": 0.82, "grad_norm": 0.63671875, "learning_rate": 1.9869763050943234e-05, "loss": 1.1827, "step": 56905 }, { "epoch": 0.82, "grad_norm": 0.5234375, "learning_rate": 1.985478741521163e-05, "loss": 1.0555, "step": 56910 }, { "epoch": 0.82, "grad_norm": 0.494140625, "learning_rate": 1.983981680307323e-05, "loss": 1.0235, "step": 56915 }, { "epoch": 0.82, "grad_norm": 0.56640625, "learning_rate": 1.9824851215466388e-05, "loss": 0.9067, "step": 56920 }, { "epoch": 0.82, "grad_norm": 0.51953125, "learning_rate": 1.9809890653329178e-05, "loss": 0.8933, "step": 56925 }, { "epoch": 0.82, "grad_norm": 0.5703125, "learning_rate": 1.979493511759938e-05, "loss": 0.9839, "step": 56930 }, { "epoch": 0.82, "grad_norm": 0.56640625, "learning_rate": 1.9779984609214363e-05, "loss": 0.934, "step": 56935 }, { "epoch": 0.82, "grad_norm": 0.57421875, "learning_rate": 1.9765039129111285e-05, "loss": 1.0783, "step": 56940 }, { "epoch": 0.82, "grad_norm": 0.50390625, "learning_rate": 1.975009867822695e-05, "loss": 0.9056, "step": 56945 }, { "epoch": 0.82, "grad_norm": 0.5390625, "learning_rate": 1.97351632574978e-05, "loss": 0.9473, "step": 56950 }, { "epoch": 0.82, "grad_norm": 0.5546875, "learning_rate": 1.9720232867860033e-05, "loss": 1.1167, "step": 56955 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.970530751024954e-05, "loss": 1.1607, "step": 56960 }, { "epoch": 0.82, "grad_norm": 0.5546875, "learning_rate": 1.96903871856018e-05, "loss": 0.8377, "step": 56965 }, { "epoch": 0.82, "grad_norm": 0.5546875, "learning_rate": 1.967547189485204e-05, "loss": 0.9362, "step": 56970 }, { "epoch": 0.82, "grad_norm": 0.56640625, "learning_rate": 1.9660561638935217e-05, "loss": 0.9683, "step": 56975 }, { "epoch": 0.82, "grad_norm": 0.58203125, "learning_rate": 1.9645656418785852e-05, "loss": 0.8118, "step": 56980 }, { "epoch": 0.82, "grad_norm": 0.578125, "learning_rate": 1.9630756235338253e-05, "loss": 0.824, "step": 56985 }, { "epoch": 0.82, "grad_norm": 0.61328125, "learning_rate": 1.9615861089526422e-05, "loss": 0.8573, "step": 56990 }, { "epoch": 0.82, "grad_norm": 0.56640625, "learning_rate": 1.9600970982283952e-05, "loss": 0.9875, "step": 56995 }, { "epoch": 0.82, "grad_norm": 0.67578125, "learning_rate": 1.958608591454417e-05, "loss": 0.9674, "step": 57000 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.957120588724014e-05, "loss": 0.9661, "step": 57005 }, { "epoch": 0.82, "grad_norm": 0.48828125, "learning_rate": 1.95563309013045e-05, "loss": 0.8551, "step": 57010 }, { "epoch": 0.82, "grad_norm": 0.55078125, "learning_rate": 1.9541460957669656e-05, "loss": 0.8241, "step": 57015 }, { "epoch": 0.82, "grad_norm": 0.66015625, "learning_rate": 1.9526596057267697e-05, "loss": 1.1419, "step": 57020 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.951173620103035e-05, "loss": 0.9015, "step": 57025 }, { "epoch": 0.82, "grad_norm": 0.55859375, "learning_rate": 1.9496881389889023e-05, "loss": 0.8603, "step": 57030 }, { "epoch": 0.82, "grad_norm": 0.5078125, "learning_rate": 1.9482031624774855e-05, "loss": 0.9216, "step": 57035 }, { "epoch": 0.82, "grad_norm": 0.6015625, "learning_rate": 1.9467186906618673e-05, "loss": 0.9155, "step": 57040 }, { "epoch": 0.82, "grad_norm": 0.625, "learning_rate": 1.94523472363509e-05, "loss": 0.9163, "step": 57045 }, { "epoch": 0.82, "grad_norm": 0.55859375, "learning_rate": 1.9437512614901753e-05, "loss": 0.95, "step": 57050 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.9422683043201086e-05, "loss": 0.854, "step": 57055 }, { "epoch": 0.82, "grad_norm": 0.515625, "learning_rate": 1.940785852217839e-05, "loss": 1.1324, "step": 57060 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.9393039052762908e-05, "loss": 0.9357, "step": 57065 }, { "epoch": 0.82, "grad_norm": 0.8125, "learning_rate": 1.93782246358836e-05, "loss": 1.1118, "step": 57070 }, { "epoch": 0.82, "grad_norm": 0.6171875, "learning_rate": 1.9363415272468933e-05, "loss": 1.1094, "step": 57075 }, { "epoch": 0.82, "grad_norm": 0.5390625, "learning_rate": 1.9348610963447235e-05, "loss": 0.9506, "step": 57080 }, { "epoch": 0.82, "grad_norm": 0.5546875, "learning_rate": 1.9333811709746485e-05, "loss": 0.9978, "step": 57085 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 1.9319017512294257e-05, "loss": 0.9095, "step": 57090 }, { "epoch": 0.82, "grad_norm": 0.640625, "learning_rate": 1.9304228372017908e-05, "loss": 1.0865, "step": 57095 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.9289444289844448e-05, "loss": 0.9566, "step": 57100 }, { "epoch": 0.82, "grad_norm": 0.5703125, "learning_rate": 1.927466526670052e-05, "loss": 0.9192, "step": 57105 }, { "epoch": 0.82, "grad_norm": 0.6484375, "learning_rate": 1.9259891303512512e-05, "loss": 1.0266, "step": 57110 }, { "epoch": 0.82, "grad_norm": 0.5234375, "learning_rate": 1.9245122401206493e-05, "loss": 0.923, "step": 57115 }, { "epoch": 0.82, "grad_norm": 0.61328125, "learning_rate": 1.923035856070815e-05, "loss": 0.9628, "step": 57120 }, { "epoch": 0.82, "grad_norm": 0.6328125, "learning_rate": 1.921559978294295e-05, "loss": 1.0032, "step": 57125 }, { "epoch": 0.82, "grad_norm": 0.58203125, "learning_rate": 1.920084606883593e-05, "loss": 1.112, "step": 57130 }, { "epoch": 0.82, "grad_norm": 0.7578125, "learning_rate": 1.9186097419311932e-05, "loss": 0.9425, "step": 57135 }, { "epoch": 0.82, "grad_norm": 0.69140625, "learning_rate": 1.917135383529537e-05, "loss": 1.1636, "step": 57140 }, { "epoch": 0.82, "grad_norm": 0.515625, "learning_rate": 1.915661531771039e-05, "loss": 0.9695, "step": 57145 }, { "epoch": 0.82, "grad_norm": 0.5390625, "learning_rate": 1.914188186748087e-05, "loss": 0.8936, "step": 57150 }, { "epoch": 0.82, "grad_norm": 0.5625, "learning_rate": 1.9127153485530246e-05, "loss": 1.08, "step": 57155 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.911243017278176e-05, "loss": 0.9949, "step": 57160 }, { "epoch": 0.82, "grad_norm": 0.5546875, "learning_rate": 1.9097711930158303e-05, "loss": 0.9987, "step": 57165 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 1.908299875858237e-05, "loss": 0.8954, "step": 57170 }, { "epoch": 0.82, "grad_norm": 0.62890625, "learning_rate": 1.9068290658976252e-05, "loss": 1.0304, "step": 57175 }, { "epoch": 0.82, "grad_norm": 0.57421875, "learning_rate": 1.905358763226186e-05, "loss": 0.8348, "step": 57180 }, { "epoch": 0.82, "grad_norm": 0.6015625, "learning_rate": 1.903888967936075e-05, "loss": 0.9374, "step": 57185 }, { "epoch": 0.82, "grad_norm": 0.578125, "learning_rate": 1.902419680119425e-05, "loss": 0.998, "step": 57190 }, { "epoch": 0.82, "grad_norm": 0.51953125, "learning_rate": 1.9009508998683334e-05, "loss": 0.9541, "step": 57195 }, { "epoch": 0.82, "grad_norm": 0.609375, "learning_rate": 1.899482627274861e-05, "loss": 0.9615, "step": 57200 }, { "epoch": 0.82, "grad_norm": 0.57421875, "learning_rate": 1.8980148624310444e-05, "loss": 0.9093, "step": 57205 }, { "epoch": 0.82, "grad_norm": 0.58984375, "learning_rate": 1.8965476054288857e-05, "loss": 0.8685, "step": 57210 }, { "epoch": 0.82, "grad_norm": 0.59375, "learning_rate": 1.8950808563603485e-05, "loss": 0.963, "step": 57215 }, { "epoch": 0.82, "grad_norm": 0.54296875, "learning_rate": 1.893614615317375e-05, "loss": 0.8768, "step": 57220 }, { "epoch": 0.82, "grad_norm": 0.5703125, "learning_rate": 1.892148882391872e-05, "loss": 1.043, "step": 57225 }, { "epoch": 0.82, "grad_norm": 0.5234375, "learning_rate": 1.8906836576757116e-05, "loss": 0.9636, "step": 57230 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.889218941260732e-05, "loss": 1.0872, "step": 57235 }, { "epoch": 0.82, "grad_norm": 0.6171875, "learning_rate": 1.8877547332387467e-05, "loss": 1.0531, "step": 57240 }, { "epoch": 0.82, "grad_norm": 0.453125, "learning_rate": 1.886291033701537e-05, "loss": 0.8688, "step": 57245 }, { "epoch": 0.82, "grad_norm": 0.6328125, "learning_rate": 1.8848278427408438e-05, "loss": 1.0525, "step": 57250 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.8833651604483828e-05, "loss": 0.8974, "step": 57255 }, { "epoch": 0.82, "grad_norm": 0.5703125, "learning_rate": 1.88190298691584e-05, "loss": 0.9064, "step": 57260 }, { "epoch": 0.82, "grad_norm": 0.578125, "learning_rate": 1.8804413222348617e-05, "loss": 1.0215, "step": 57265 }, { "epoch": 0.82, "grad_norm": 0.5625, "learning_rate": 1.8789801664970686e-05, "loss": 0.9896, "step": 57270 }, { "epoch": 0.82, "grad_norm": 0.57421875, "learning_rate": 1.877519519794051e-05, "loss": 0.9436, "step": 57275 }, { "epoch": 0.82, "grad_norm": 0.62890625, "learning_rate": 1.87605938221736e-05, "loss": 1.0005, "step": 57280 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.874599753858517e-05, "loss": 0.8156, "step": 57285 }, { "epoch": 0.82, "grad_norm": 0.58984375, "learning_rate": 1.8731406348090153e-05, "loss": 1.0979, "step": 57290 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 1.8716820251603163e-05, "loss": 0.9682, "step": 57295 }, { "epoch": 0.82, "grad_norm": 0.5625, "learning_rate": 1.8702239250038433e-05, "loss": 0.917, "step": 57300 }, { "epoch": 0.82, "grad_norm": 0.51171875, "learning_rate": 1.8687663344309958e-05, "loss": 1.0256, "step": 57305 }, { "epoch": 0.82, "grad_norm": 0.5234375, "learning_rate": 1.867309253533134e-05, "loss": 0.8435, "step": 57310 }, { "epoch": 0.82, "grad_norm": 0.51171875, "learning_rate": 1.8658526824015886e-05, "loss": 0.9523, "step": 57315 }, { "epoch": 0.82, "grad_norm": 0.5703125, "learning_rate": 1.8643966211276655e-05, "loss": 1.0054, "step": 57320 }, { "epoch": 0.82, "grad_norm": 0.609375, "learning_rate": 1.862941069802624e-05, "loss": 1.081, "step": 57325 }, { "epoch": 0.82, "grad_norm": 0.578125, "learning_rate": 1.8614860285177038e-05, "loss": 0.965, "step": 57330 }, { "epoch": 0.82, "grad_norm": 0.58203125, "learning_rate": 1.860031497364112e-05, "loss": 0.9633, "step": 57335 }, { "epoch": 0.82, "grad_norm": 0.5234375, "learning_rate": 1.8585774764330154e-05, "loss": 0.9439, "step": 57340 }, { "epoch": 0.82, "grad_norm": 0.66015625, "learning_rate": 1.8571239658155526e-05, "loss": 1.0464, "step": 57345 }, { "epoch": 0.82, "grad_norm": 0.58984375, "learning_rate": 1.855670965602834e-05, "loss": 0.9419, "step": 57350 }, { "epoch": 0.82, "grad_norm": 0.53515625, "learning_rate": 1.8542184758859372e-05, "loss": 0.9403, "step": 57355 }, { "epoch": 0.82, "grad_norm": 0.44921875, "learning_rate": 1.8527664967559e-05, "loss": 0.8075, "step": 57360 }, { "epoch": 0.82, "grad_norm": 0.62109375, "learning_rate": 1.8513150283037395e-05, "loss": 1.0399, "step": 57365 }, { "epoch": 0.82, "grad_norm": 0.65234375, "learning_rate": 1.849864070620435e-05, "loss": 0.9282, "step": 57370 }, { "epoch": 0.82, "grad_norm": 0.478515625, "learning_rate": 1.848413623796931e-05, "loss": 0.8879, "step": 57375 }, { "epoch": 0.82, "grad_norm": 0.66015625, "learning_rate": 1.8469636879241438e-05, "loss": 1.0933, "step": 57380 }, { "epoch": 0.82, "grad_norm": 0.61328125, "learning_rate": 1.845514263092961e-05, "loss": 0.9499, "step": 57385 }, { "epoch": 0.82, "grad_norm": 0.61328125, "learning_rate": 1.8440653493942316e-05, "loss": 0.9202, "step": 57390 }, { "epoch": 0.82, "grad_norm": 0.58203125, "learning_rate": 1.8426169469187726e-05, "loss": 0.9033, "step": 57395 }, { "epoch": 0.82, "grad_norm": 0.51953125, "learning_rate": 1.8411690557573747e-05, "loss": 1.0041, "step": 57400 }, { "epoch": 0.82, "grad_norm": 0.5859375, "learning_rate": 1.839721676000794e-05, "loss": 0.9258, "step": 57405 }, { "epoch": 0.82, "grad_norm": 0.59375, "learning_rate": 1.8382748077397494e-05, "loss": 0.9638, "step": 57410 }, { "epoch": 0.82, "grad_norm": 0.5078125, "learning_rate": 1.8368284510649358e-05, "loss": 0.8681, "step": 57415 }, { "epoch": 0.82, "grad_norm": 0.55078125, "learning_rate": 1.8353826060670153e-05, "loss": 1.0304, "step": 57420 }, { "epoch": 0.82, "grad_norm": 0.55078125, "learning_rate": 1.8339372728366077e-05, "loss": 0.7835, "step": 57425 }, { "epoch": 0.82, "grad_norm": 0.6640625, "learning_rate": 1.8324924514643138e-05, "loss": 0.8806, "step": 57430 }, { "epoch": 0.82, "grad_norm": 0.5703125, "learning_rate": 1.8310481420406967e-05, "loss": 0.9688, "step": 57435 }, { "epoch": 0.82, "grad_norm": 0.5234375, "learning_rate": 1.829604344656286e-05, "loss": 1.1028, "step": 57440 }, { "epoch": 0.82, "grad_norm": 0.515625, "learning_rate": 1.8281610594015775e-05, "loss": 0.8831, "step": 57445 }, { "epoch": 0.82, "grad_norm": 0.60546875, "learning_rate": 1.826718286367043e-05, "loss": 1.0611, "step": 57450 }, { "epoch": 0.82, "grad_norm": 0.5078125, "learning_rate": 1.8252760256431123e-05, "loss": 0.9923, "step": 57455 }, { "epoch": 0.82, "grad_norm": 0.54296875, "learning_rate": 1.823834277320191e-05, "loss": 0.8988, "step": 57460 }, { "epoch": 0.82, "grad_norm": 0.6015625, "learning_rate": 1.8223930414886515e-05, "loss": 1.0085, "step": 57465 }, { "epoch": 0.82, "grad_norm": 0.65625, "learning_rate": 1.8209523182388276e-05, "loss": 0.9161, "step": 57470 }, { "epoch": 0.82, "grad_norm": 0.6796875, "learning_rate": 1.8195121076610266e-05, "loss": 1.0192, "step": 57475 }, { "epoch": 0.82, "grad_norm": 0.6171875, "learning_rate": 1.818072409845527e-05, "loss": 0.8755, "step": 57480 }, { "epoch": 0.82, "grad_norm": 0.48828125, "learning_rate": 1.8166332248825645e-05, "loss": 0.8561, "step": 57485 }, { "epoch": 0.82, "grad_norm": 0.5859375, "learning_rate": 1.8151945528623536e-05, "loss": 1.0005, "step": 57490 }, { "epoch": 0.82, "grad_norm": 0.51953125, "learning_rate": 1.8137563938750667e-05, "loss": 0.9342, "step": 57495 }, { "epoch": 0.82, "grad_norm": 0.54296875, "learning_rate": 1.812318748010856e-05, "loss": 0.9835, "step": 57500 }, { "epoch": 0.82, "grad_norm": 0.56640625, "learning_rate": 1.8108816153598284e-05, "loss": 1.023, "step": 57505 }, { "epoch": 0.82, "grad_norm": 0.61328125, "learning_rate": 1.8094449960120673e-05, "loss": 0.9161, "step": 57510 }, { "epoch": 0.83, "grad_norm": 0.57421875, "learning_rate": 1.808008890057625e-05, "loss": 0.8931, "step": 57515 }, { "epoch": 0.83, "grad_norm": 0.466796875, "learning_rate": 1.8065732975865134e-05, "loss": 0.8917, "step": 57520 }, { "epoch": 0.83, "grad_norm": 0.59765625, "learning_rate": 1.805138218688719e-05, "loss": 1.1648, "step": 57525 }, { "epoch": 0.83, "grad_norm": 0.5234375, "learning_rate": 1.8037036534541963e-05, "loss": 0.8815, "step": 57530 }, { "epoch": 0.83, "grad_norm": 0.66015625, "learning_rate": 1.8022696019728602e-05, "loss": 1.0268, "step": 57535 }, { "epoch": 0.83, "grad_norm": 0.52734375, "learning_rate": 1.8008360643346022e-05, "loss": 0.901, "step": 57540 }, { "epoch": 0.83, "grad_norm": 0.5546875, "learning_rate": 1.7994030406292837e-05, "loss": 1.0224, "step": 57545 }, { "epoch": 0.83, "grad_norm": 0.64453125, "learning_rate": 1.7979705309467164e-05, "loss": 1.0273, "step": 57550 }, { "epoch": 0.83, "grad_norm": 0.5234375, "learning_rate": 1.7965385353766985e-05, "loss": 0.8829, "step": 57555 }, { "epoch": 0.83, "grad_norm": 0.53515625, "learning_rate": 1.7951070540089898e-05, "loss": 1.1264, "step": 57560 }, { "epoch": 0.83, "grad_norm": 0.52734375, "learning_rate": 1.7936760869333137e-05, "loss": 0.9873, "step": 57565 }, { "epoch": 0.83, "grad_norm": 0.515625, "learning_rate": 1.7922456342393658e-05, "loss": 0.9088, "step": 57570 }, { "epoch": 0.83, "grad_norm": 0.5546875, "learning_rate": 1.790815696016812e-05, "loss": 0.9321, "step": 57575 }, { "epoch": 0.83, "grad_norm": 0.52734375, "learning_rate": 1.789386272355278e-05, "loss": 1.0373, "step": 57580 }, { "epoch": 0.83, "grad_norm": 0.6015625, "learning_rate": 1.7879573633443625e-05, "loss": 1.0655, "step": 57585 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.7865289690736364e-05, "loss": 0.8181, "step": 57590 }, { "epoch": 0.83, "grad_norm": 0.61328125, "learning_rate": 1.785101089632627e-05, "loss": 0.8864, "step": 57595 }, { "epoch": 0.83, "grad_norm": 0.59375, "learning_rate": 1.783673725110836e-05, "loss": 0.9482, "step": 57600 }, { "epoch": 0.83, "grad_norm": 0.5625, "learning_rate": 1.782246875597733e-05, "loss": 0.834, "step": 57605 }, { "epoch": 0.83, "grad_norm": 0.58203125, "learning_rate": 1.7808205411827582e-05, "loss": 0.8415, "step": 57610 }, { "epoch": 0.83, "grad_norm": 0.65625, "learning_rate": 1.77939472195531e-05, "loss": 0.8928, "step": 57615 }, { "epoch": 0.83, "grad_norm": 0.54296875, "learning_rate": 1.7779694180047623e-05, "loss": 0.9688, "step": 57620 }, { "epoch": 0.83, "grad_norm": 0.5, "learning_rate": 1.7765446294204592e-05, "loss": 1.0195, "step": 57625 }, { "epoch": 0.83, "grad_norm": 0.6640625, "learning_rate": 1.7751203562917018e-05, "loss": 0.9816, "step": 57630 }, { "epoch": 0.83, "grad_norm": 0.5, "learning_rate": 1.773696598707767e-05, "loss": 0.9331, "step": 57635 }, { "epoch": 0.83, "grad_norm": 0.609375, "learning_rate": 1.772273356757902e-05, "loss": 0.8634, "step": 57640 }, { "epoch": 0.83, "grad_norm": 0.64453125, "learning_rate": 1.77085063053131e-05, "loss": 0.9065, "step": 57645 }, { "epoch": 0.83, "grad_norm": 0.83984375, "learning_rate": 1.7694284201171752e-05, "loss": 0.9567, "step": 57650 }, { "epoch": 0.83, "grad_norm": 0.59375, "learning_rate": 1.768006725604642e-05, "loss": 1.1036, "step": 57655 }, { "epoch": 0.83, "grad_norm": 0.54296875, "learning_rate": 1.7665855470828197e-05, "loss": 1.0417, "step": 57660 }, { "epoch": 0.83, "grad_norm": 0.625, "learning_rate": 1.765164884640792e-05, "loss": 0.9475, "step": 57665 }, { "epoch": 0.83, "grad_norm": 0.51953125, "learning_rate": 1.763744738367611e-05, "loss": 0.8683, "step": 57670 }, { "epoch": 0.83, "grad_norm": 0.55078125, "learning_rate": 1.7623251083522863e-05, "loss": 1.0274, "step": 57675 }, { "epoch": 0.83, "grad_norm": 0.6171875, "learning_rate": 1.760905994683807e-05, "loss": 0.9602, "step": 57680 }, { "epoch": 0.83, "grad_norm": 0.64453125, "learning_rate": 1.7594873974511263e-05, "loss": 0.9219, "step": 57685 }, { "epoch": 0.83, "grad_norm": 0.6328125, "learning_rate": 1.7580693167431573e-05, "loss": 0.9376, "step": 57690 }, { "epoch": 0.83, "grad_norm": 0.52734375, "learning_rate": 1.7566517526487903e-05, "loss": 1.0304, "step": 57695 }, { "epoch": 0.83, "grad_norm": 0.5390625, "learning_rate": 1.755234705256883e-05, "loss": 0.9291, "step": 57700 }, { "epoch": 0.83, "grad_norm": 0.59375, "learning_rate": 1.7538181746562543e-05, "loss": 1.069, "step": 57705 }, { "epoch": 0.83, "grad_norm": 0.4609375, "learning_rate": 1.752402160935691e-05, "loss": 1.0669, "step": 57710 }, { "epoch": 0.83, "grad_norm": 0.5859375, "learning_rate": 1.7509866641839534e-05, "loss": 1.0158, "step": 57715 }, { "epoch": 0.83, "grad_norm": 0.5234375, "learning_rate": 1.7495716844897692e-05, "loss": 0.8566, "step": 57720 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.7481572219418263e-05, "loss": 0.9092, "step": 57725 }, { "epoch": 0.83, "grad_norm": 0.5703125, "learning_rate": 1.746743276628786e-05, "loss": 0.9913, "step": 57730 }, { "epoch": 0.83, "grad_norm": 0.52734375, "learning_rate": 1.74532984863928e-05, "loss": 0.8428, "step": 57735 }, { "epoch": 0.83, "grad_norm": 0.48828125, "learning_rate": 1.743916938061898e-05, "loss": 0.8582, "step": 57740 }, { "epoch": 0.83, "grad_norm": 0.5703125, "learning_rate": 1.7425045449852053e-05, "loss": 1.0616, "step": 57745 }, { "epoch": 0.83, "grad_norm": 0.6015625, "learning_rate": 1.7410926694977337e-05, "loss": 0.8835, "step": 57750 }, { "epoch": 0.83, "grad_norm": 0.56640625, "learning_rate": 1.7396813116879794e-05, "loss": 1.0282, "step": 57755 }, { "epoch": 0.83, "grad_norm": 0.51171875, "learning_rate": 1.7382704716444075e-05, "loss": 0.9069, "step": 57760 }, { "epoch": 0.83, "grad_norm": 0.62890625, "learning_rate": 1.7368601494554526e-05, "loss": 0.9493, "step": 57765 }, { "epoch": 0.83, "grad_norm": 0.57421875, "learning_rate": 1.7354503452095128e-05, "loss": 0.9655, "step": 57770 }, { "epoch": 0.83, "grad_norm": 0.5234375, "learning_rate": 1.7340410589949572e-05, "loss": 0.9934, "step": 57775 }, { "epoch": 0.83, "grad_norm": 0.498046875, "learning_rate": 1.7326322909001258e-05, "loss": 0.9102, "step": 57780 }, { "epoch": 0.83, "grad_norm": 0.59375, "learning_rate": 1.7312240410133153e-05, "loss": 1.1948, "step": 57785 }, { "epoch": 0.83, "grad_norm": 0.546875, "learning_rate": 1.7298163094227982e-05, "loss": 0.8848, "step": 57790 }, { "epoch": 0.83, "grad_norm": 0.59375, "learning_rate": 1.7284090962168176e-05, "loss": 0.8271, "step": 57795 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.7270024014835716e-05, "loss": 1.039, "step": 57800 }, { "epoch": 0.83, "grad_norm": 0.478515625, "learning_rate": 1.7255962253112413e-05, "loss": 0.8683, "step": 57805 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.72419056778796e-05, "loss": 1.0126, "step": 57810 }, { "epoch": 0.83, "grad_norm": 0.59765625, "learning_rate": 1.722785429001842e-05, "loss": 0.933, "step": 57815 }, { "epoch": 0.83, "grad_norm": 0.6171875, "learning_rate": 1.7213808090409566e-05, "loss": 0.963, "step": 57820 }, { "epoch": 0.83, "grad_norm": 0.59375, "learning_rate": 1.7199767079933527e-05, "loss": 0.8474, "step": 57825 }, { "epoch": 0.83, "grad_norm": 0.65234375, "learning_rate": 1.7185731259470395e-05, "loss": 1.1649, "step": 57830 }, { "epoch": 0.83, "grad_norm": 0.4921875, "learning_rate": 1.7171700629899934e-05, "loss": 0.9088, "step": 57835 }, { "epoch": 0.83, "grad_norm": 0.59765625, "learning_rate": 1.715767519210161e-05, "loss": 1.0726, "step": 57840 }, { "epoch": 0.83, "grad_norm": 0.546875, "learning_rate": 1.714365494695457e-05, "loss": 1.0465, "step": 57845 }, { "epoch": 0.83, "grad_norm": 0.5546875, "learning_rate": 1.712963989533758e-05, "loss": 0.9767, "step": 57850 }, { "epoch": 0.83, "grad_norm": 0.484375, "learning_rate": 1.711563003812915e-05, "loss": 0.9515, "step": 57855 }, { "epoch": 0.83, "grad_norm": 0.546875, "learning_rate": 1.7101625376207465e-05, "loss": 0.9817, "step": 57860 }, { "epoch": 0.83, "grad_norm": 0.498046875, "learning_rate": 1.7087625910450277e-05, "loss": 1.0111, "step": 57865 }, { "epoch": 0.83, "grad_norm": 0.53515625, "learning_rate": 1.7073631641735122e-05, "loss": 0.9127, "step": 57870 }, { "epoch": 0.83, "grad_norm": 0.431640625, "learning_rate": 1.7059642570939204e-05, "loss": 0.89, "step": 57875 }, { "epoch": 0.83, "grad_norm": 0.87109375, "learning_rate": 1.7045658698939327e-05, "loss": 0.9156, "step": 57880 }, { "epoch": 0.83, "grad_norm": 0.66796875, "learning_rate": 1.703168002661204e-05, "loss": 0.839, "step": 57885 }, { "epoch": 0.83, "grad_norm": 0.50390625, "learning_rate": 1.7017706554833568e-05, "loss": 0.8972, "step": 57890 }, { "epoch": 0.83, "grad_norm": 0.58984375, "learning_rate": 1.700373828447973e-05, "loss": 0.909, "step": 57895 }, { "epoch": 0.83, "grad_norm": 0.56640625, "learning_rate": 1.6989775216426106e-05, "loss": 1.077, "step": 57900 }, { "epoch": 0.83, "grad_norm": 0.67578125, "learning_rate": 1.697581735154793e-05, "loss": 1.0366, "step": 57905 }, { "epoch": 0.83, "grad_norm": 0.58203125, "learning_rate": 1.6961864690720087e-05, "loss": 1.0014, "step": 57910 }, { "epoch": 0.83, "grad_norm": 0.57421875, "learning_rate": 1.6947917234817114e-05, "loss": 1.0623, "step": 57915 }, { "epoch": 0.83, "grad_norm": 0.48046875, "learning_rate": 1.6933974984713263e-05, "loss": 0.9165, "step": 57920 }, { "epoch": 0.83, "grad_norm": 0.51171875, "learning_rate": 1.692003794128251e-05, "loss": 0.8876, "step": 57925 }, { "epoch": 0.83, "grad_norm": 0.6171875, "learning_rate": 1.6906106105398356e-05, "loss": 0.8691, "step": 57930 }, { "epoch": 0.83, "grad_norm": 0.55859375, "learning_rate": 1.6892179477934112e-05, "loss": 0.9261, "step": 57935 }, { "epoch": 0.83, "grad_norm": 0.5703125, "learning_rate": 1.687825805976274e-05, "loss": 0.9677, "step": 57940 }, { "epoch": 0.83, "grad_norm": 0.5703125, "learning_rate": 1.686434185175679e-05, "loss": 0.9245, "step": 57945 }, { "epoch": 0.83, "grad_norm": 0.515625, "learning_rate": 1.685043085478858e-05, "loss": 0.9576, "step": 57950 }, { "epoch": 0.83, "grad_norm": 0.5625, "learning_rate": 1.683652506973008e-05, "loss": 0.9034, "step": 57955 }, { "epoch": 0.83, "grad_norm": 0.6171875, "learning_rate": 1.6822624497452888e-05, "loss": 0.967, "step": 57960 }, { "epoch": 0.83, "grad_norm": 0.609375, "learning_rate": 1.680872913882835e-05, "loss": 0.9726, "step": 57965 }, { "epoch": 0.83, "grad_norm": 0.44921875, "learning_rate": 1.679483899472739e-05, "loss": 1.0131, "step": 57970 }, { "epoch": 0.83, "grad_norm": 0.56640625, "learning_rate": 1.6780954066020704e-05, "loss": 1.1013, "step": 57975 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.6767074353578572e-05, "loss": 0.9656, "step": 57980 }, { "epoch": 0.83, "grad_norm": 0.5078125, "learning_rate": 1.675319985827104e-05, "loss": 0.8297, "step": 57985 }, { "epoch": 0.83, "grad_norm": 0.51953125, "learning_rate": 1.6739330580967728e-05, "loss": 0.969, "step": 57990 }, { "epoch": 0.83, "grad_norm": 0.671875, "learning_rate": 1.6725466522538e-05, "loss": 0.8789, "step": 57995 }, { "epoch": 0.83, "grad_norm": 0.55859375, "learning_rate": 1.6711607683850905e-05, "loss": 0.8702, "step": 58000 }, { "epoch": 0.83, "grad_norm": 0.6875, "learning_rate": 1.669775406577506e-05, "loss": 0.9958, "step": 58005 }, { "epoch": 0.83, "grad_norm": 0.640625, "learning_rate": 1.6683905669178866e-05, "loss": 1.0337, "step": 58010 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.667006249493038e-05, "loss": 0.9877, "step": 58015 }, { "epoch": 0.83, "grad_norm": 0.5078125, "learning_rate": 1.665622454389729e-05, "loss": 1.002, "step": 58020 }, { "epoch": 0.83, "grad_norm": 0.50390625, "learning_rate": 1.6642391816946934e-05, "loss": 0.9392, "step": 58025 }, { "epoch": 0.83, "grad_norm": 0.4453125, "learning_rate": 1.6628564314946393e-05, "loss": 0.8782, "step": 58030 }, { "epoch": 0.83, "grad_norm": 0.53515625, "learning_rate": 1.661474203876242e-05, "loss": 0.9426, "step": 58035 }, { "epoch": 0.83, "grad_norm": 0.55859375, "learning_rate": 1.6600924989261368e-05, "loss": 1.0012, "step": 58040 }, { "epoch": 0.83, "grad_norm": 0.56640625, "learning_rate": 1.658711316730931e-05, "loss": 1.0189, "step": 58045 }, { "epoch": 0.83, "grad_norm": 0.56640625, "learning_rate": 1.657330657377202e-05, "loss": 0.9001, "step": 58050 }, { "epoch": 0.83, "grad_norm": 0.5703125, "learning_rate": 1.655950520951487e-05, "loss": 0.9475, "step": 58055 }, { "epoch": 0.83, "grad_norm": 0.54296875, "learning_rate": 1.6545709075402972e-05, "loss": 0.8767, "step": 58060 }, { "epoch": 0.83, "grad_norm": 0.50390625, "learning_rate": 1.6531918172301087e-05, "loss": 0.8723, "step": 58065 }, { "epoch": 0.83, "grad_norm": 0.51953125, "learning_rate": 1.6518132501073634e-05, "loss": 1.0125, "step": 58070 }, { "epoch": 0.83, "grad_norm": 0.8203125, "learning_rate": 1.6504352062584692e-05, "loss": 0.9769, "step": 58075 }, { "epoch": 0.83, "grad_norm": 0.64453125, "learning_rate": 1.649057685769806e-05, "loss": 0.991, "step": 58080 }, { "epoch": 0.83, "grad_norm": 0.5859375, "learning_rate": 1.6476806887277208e-05, "loss": 0.9772, "step": 58085 }, { "epoch": 0.83, "grad_norm": 0.5859375, "learning_rate": 1.6463042152185193e-05, "loss": 1.0976, "step": 58090 }, { "epoch": 0.83, "grad_norm": 0.6953125, "learning_rate": 1.6449282653284836e-05, "loss": 1.1015, "step": 58095 }, { "epoch": 0.83, "grad_norm": 0.546875, "learning_rate": 1.6435528391438626e-05, "loss": 0.9803, "step": 58100 }, { "epoch": 0.83, "grad_norm": 0.59375, "learning_rate": 1.642177936750865e-05, "loss": 1.1817, "step": 58105 }, { "epoch": 0.83, "grad_norm": 0.52734375, "learning_rate": 1.640803558235672e-05, "loss": 0.9719, "step": 58110 }, { "epoch": 0.83, "grad_norm": 0.6484375, "learning_rate": 1.6394297036844353e-05, "loss": 1.0251, "step": 58115 }, { "epoch": 0.83, "grad_norm": 0.5, "learning_rate": 1.6380563731832664e-05, "loss": 1.0194, "step": 58120 }, { "epoch": 0.83, "grad_norm": 0.486328125, "learning_rate": 1.6366835668182455e-05, "loss": 1.0195, "step": 58125 }, { "epoch": 0.83, "grad_norm": 0.5703125, "learning_rate": 1.6353112846754247e-05, "loss": 0.8529, "step": 58130 }, { "epoch": 0.83, "grad_norm": 0.58203125, "learning_rate": 1.6339395268408188e-05, "loss": 0.9441, "step": 58135 }, { "epoch": 0.83, "grad_norm": 0.5546875, "learning_rate": 1.6325682934004104e-05, "loss": 0.9558, "step": 58140 }, { "epoch": 0.83, "grad_norm": 0.58984375, "learning_rate": 1.6311975844401528e-05, "loss": 0.9949, "step": 58145 }, { "epoch": 0.83, "grad_norm": 0.55078125, "learning_rate": 1.6298274000459612e-05, "loss": 0.9409, "step": 58150 }, { "epoch": 0.83, "grad_norm": 0.443359375, "learning_rate": 1.6284577403037193e-05, "loss": 0.9242, "step": 58155 }, { "epoch": 0.83, "grad_norm": 0.62109375, "learning_rate": 1.627088605299284e-05, "loss": 1.1643, "step": 58160 }, { "epoch": 0.83, "grad_norm": 0.5703125, "learning_rate": 1.6257199951184686e-05, "loss": 1.0059, "step": 58165 }, { "epoch": 0.83, "grad_norm": 0.55078125, "learning_rate": 1.6243519098470606e-05, "loss": 0.9065, "step": 58170 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.6229843495708163e-05, "loss": 0.9551, "step": 58175 }, { "epoch": 0.83, "grad_norm": 0.54296875, "learning_rate": 1.6216173143754544e-05, "loss": 1.0303, "step": 58180 }, { "epoch": 0.83, "grad_norm": 0.443359375, "learning_rate": 1.620250804346659e-05, "loss": 0.7746, "step": 58185 }, { "epoch": 0.83, "grad_norm": 0.54296875, "learning_rate": 1.618884819570087e-05, "loss": 0.8097, "step": 58190 }, { "epoch": 0.83, "grad_norm": 0.4921875, "learning_rate": 1.617519360131361e-05, "loss": 1.0209, "step": 58195 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.6161544261160676e-05, "loss": 0.8574, "step": 58200 }, { "epoch": 0.83, "grad_norm": 0.5, "learning_rate": 1.614790017609762e-05, "loss": 0.9302, "step": 58205 }, { "epoch": 0.83, "grad_norm": 0.6171875, "learning_rate": 1.6134261346979707e-05, "loss": 1.0741, "step": 58210 }, { "epoch": 0.84, "grad_norm": 0.51953125, "learning_rate": 1.6120627774661788e-05, "loss": 0.9656, "step": 58215 }, { "epoch": 0.84, "grad_norm": 0.53125, "learning_rate": 1.610699945999844e-05, "loss": 0.8366, "step": 58220 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.6093376403843973e-05, "loss": 0.9303, "step": 58225 }, { "epoch": 0.84, "grad_norm": 0.765625, "learning_rate": 1.6079758607052176e-05, "loss": 0.9636, "step": 58230 }, { "epoch": 0.84, "grad_norm": 0.5625, "learning_rate": 1.6066146070476696e-05, "loss": 0.9858, "step": 58235 }, { "epoch": 0.84, "grad_norm": 0.451171875, "learning_rate": 1.6052538794970795e-05, "loss": 0.8403, "step": 58240 }, { "epoch": 0.84, "grad_norm": 0.5546875, "learning_rate": 1.6038936781387337e-05, "loss": 0.9222, "step": 58245 }, { "epoch": 0.84, "grad_norm": 0.50390625, "learning_rate": 1.602534003057895e-05, "loss": 0.7898, "step": 58250 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.601174854339792e-05, "loss": 1.1416, "step": 58255 }, { "epoch": 0.84, "grad_norm": 0.55859375, "learning_rate": 1.5998162320696118e-05, "loss": 0.9055, "step": 58260 }, { "epoch": 0.84, "grad_norm": 0.6171875, "learning_rate": 1.5984581363325168e-05, "loss": 0.9214, "step": 58265 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.5971005672136375e-05, "loss": 0.9064, "step": 58270 }, { "epoch": 0.84, "grad_norm": 0.5390625, "learning_rate": 1.595743524798061e-05, "loss": 1.0478, "step": 58275 }, { "epoch": 0.84, "grad_norm": 0.578125, "learning_rate": 1.5943870091708558e-05, "loss": 1.0135, "step": 58280 }, { "epoch": 0.84, "grad_norm": 0.70703125, "learning_rate": 1.5930310204170427e-05, "loss": 0.9129, "step": 58285 }, { "epoch": 0.84, "grad_norm": 0.50390625, "learning_rate": 1.5916755586216236e-05, "loss": 0.782, "step": 58290 }, { "epoch": 0.84, "grad_norm": 0.5078125, "learning_rate": 1.5903206238695535e-05, "loss": 0.8294, "step": 58295 }, { "epoch": 0.84, "grad_norm": 0.5703125, "learning_rate": 1.588966216245764e-05, "loss": 0.9915, "step": 58300 }, { "epoch": 0.84, "grad_norm": 0.65234375, "learning_rate": 1.5876123358351547e-05, "loss": 0.9133, "step": 58305 }, { "epoch": 0.84, "grad_norm": 0.59375, "learning_rate": 1.5862589827225827e-05, "loss": 1.0046, "step": 58310 }, { "epoch": 0.84, "grad_norm": 0.52734375, "learning_rate": 1.5849061569928802e-05, "loss": 1.0208, "step": 58315 }, { "epoch": 0.84, "grad_norm": 0.455078125, "learning_rate": 1.5835538587308453e-05, "loss": 0.836, "step": 58320 }, { "epoch": 0.84, "grad_norm": 0.578125, "learning_rate": 1.5822020880212383e-05, "loss": 0.9109, "step": 58325 }, { "epoch": 0.84, "grad_norm": 0.56640625, "learning_rate": 1.5808508449487935e-05, "loss": 0.8614, "step": 58330 }, { "epoch": 0.84, "grad_norm": 0.5390625, "learning_rate": 1.579500129598207e-05, "loss": 0.9432, "step": 58335 }, { "epoch": 0.84, "grad_norm": 0.515625, "learning_rate": 1.57814994205414e-05, "loss": 0.956, "step": 58340 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.5768002824012263e-05, "loss": 1.0127, "step": 58345 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.5754511507240666e-05, "loss": 0.9024, "step": 58350 }, { "epoch": 0.84, "grad_norm": 0.58984375, "learning_rate": 1.5741025471072202e-05, "loss": 1.0308, "step": 58355 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.5727544716352228e-05, "loss": 1.0369, "step": 58360 }, { "epoch": 0.84, "grad_norm": 0.5546875, "learning_rate": 1.5714069243925754e-05, "loss": 0.8761, "step": 58365 }, { "epoch": 0.84, "grad_norm": 0.52734375, "learning_rate": 1.57005990546374e-05, "loss": 0.9829, "step": 58370 }, { "epoch": 0.84, "grad_norm": 0.5703125, "learning_rate": 1.5687134149331496e-05, "loss": 0.9662, "step": 58375 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.5673674528852065e-05, "loss": 1.0646, "step": 58380 }, { "epoch": 0.84, "grad_norm": 0.5078125, "learning_rate": 1.5660220194042774e-05, "loss": 0.855, "step": 58385 }, { "epoch": 0.84, "grad_norm": 0.55859375, "learning_rate": 1.5646771145746897e-05, "loss": 0.9553, "step": 58390 }, { "epoch": 0.84, "grad_norm": 0.5546875, "learning_rate": 1.5633327384807473e-05, "loss": 0.9051, "step": 58395 }, { "epoch": 0.84, "grad_norm": 0.56640625, "learning_rate": 1.5619888912067205e-05, "loss": 1.0056, "step": 58400 }, { "epoch": 0.84, "grad_norm": 0.58203125, "learning_rate": 1.5606455728368375e-05, "loss": 0.9086, "step": 58405 }, { "epoch": 0.84, "grad_norm": 0.68359375, "learning_rate": 1.559302783455302e-05, "loss": 0.9692, "step": 58410 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.5579605231462835e-05, "loss": 1.0364, "step": 58415 }, { "epoch": 0.84, "grad_norm": 0.5390625, "learning_rate": 1.5566187919939114e-05, "loss": 0.8547, "step": 58420 }, { "epoch": 0.84, "grad_norm": 0.51953125, "learning_rate": 1.555277590082289e-05, "loss": 0.9497, "step": 58425 }, { "epoch": 0.84, "grad_norm": 0.6171875, "learning_rate": 1.5539369174954887e-05, "loss": 0.955, "step": 58430 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.552596774317541e-05, "loss": 0.9568, "step": 58435 }, { "epoch": 0.84, "grad_norm": 0.5625, "learning_rate": 1.5512571606324454e-05, "loss": 0.8585, "step": 58440 }, { "epoch": 0.84, "grad_norm": 0.486328125, "learning_rate": 1.549918076524176e-05, "loss": 0.9049, "step": 58445 }, { "epoch": 0.84, "grad_norm": 0.46484375, "learning_rate": 1.5485795220766642e-05, "loss": 0.9563, "step": 58450 }, { "epoch": 0.84, "grad_norm": 0.58203125, "learning_rate": 1.5472414973738115e-05, "loss": 0.9304, "step": 58455 }, { "epoch": 0.84, "grad_norm": 0.63671875, "learning_rate": 1.545904002499492e-05, "loss": 0.9151, "step": 58460 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.5445670375375354e-05, "loss": 0.8703, "step": 58465 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.543230602571747e-05, "loss": 0.9275, "step": 58470 }, { "epoch": 0.84, "grad_norm": 0.59375, "learning_rate": 1.5418946976858983e-05, "loss": 0.8973, "step": 58475 }, { "epoch": 0.84, "grad_norm": 0.63671875, "learning_rate": 1.5405593229637206e-05, "loss": 0.9844, "step": 58480 }, { "epoch": 0.84, "grad_norm": 0.734375, "learning_rate": 1.5392244784889188e-05, "loss": 0.8972, "step": 58485 }, { "epoch": 0.84, "grad_norm": 0.55859375, "learning_rate": 1.537890164345165e-05, "loss": 0.8179, "step": 58490 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.536556380616093e-05, "loss": 0.9699, "step": 58495 }, { "epoch": 0.84, "grad_norm": 0.48046875, "learning_rate": 1.5352231273853046e-05, "loss": 0.9691, "step": 58500 }, { "epoch": 0.84, "grad_norm": 0.56640625, "learning_rate": 1.53389040473637e-05, "loss": 0.9526, "step": 58505 }, { "epoch": 0.84, "grad_norm": 0.53125, "learning_rate": 1.5325582127528303e-05, "loss": 0.9293, "step": 58510 }, { "epoch": 0.84, "grad_norm": 0.67578125, "learning_rate": 1.5312265515181824e-05, "loss": 0.9441, "step": 58515 }, { "epoch": 0.84, "grad_norm": 0.57421875, "learning_rate": 1.5298954211159e-05, "loss": 0.9324, "step": 58520 }, { "epoch": 0.84, "grad_norm": 0.515625, "learning_rate": 1.5285648216294213e-05, "loss": 0.8834, "step": 58525 }, { "epoch": 0.84, "grad_norm": 0.51171875, "learning_rate": 1.527234753142145e-05, "loss": 0.9087, "step": 58530 }, { "epoch": 0.84, "grad_norm": 0.4921875, "learning_rate": 1.5259052157374442e-05, "loss": 0.9629, "step": 58535 }, { "epoch": 0.84, "grad_norm": 0.6015625, "learning_rate": 1.5245762094986581e-05, "loss": 0.9918, "step": 58540 }, { "epoch": 0.84, "grad_norm": 0.5703125, "learning_rate": 1.523247734509089e-05, "loss": 1.027, "step": 58545 }, { "epoch": 0.84, "grad_norm": 0.474609375, "learning_rate": 1.5219197908520023e-05, "loss": 1.0379, "step": 58550 }, { "epoch": 0.84, "grad_norm": 0.50390625, "learning_rate": 1.5205923786106414e-05, "loss": 1.0219, "step": 58555 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.5192654978682052e-05, "loss": 1.1855, "step": 58560 }, { "epoch": 0.84, "grad_norm": 0.51953125, "learning_rate": 1.5179391487078664e-05, "loss": 0.8573, "step": 58565 }, { "epoch": 0.84, "grad_norm": 0.58203125, "learning_rate": 1.516613331212765e-05, "loss": 1.0287, "step": 58570 }, { "epoch": 0.84, "grad_norm": 0.51953125, "learning_rate": 1.515288045465999e-05, "loss": 0.9057, "step": 58575 }, { "epoch": 0.84, "grad_norm": 0.5859375, "learning_rate": 1.5139632915506407e-05, "loss": 0.9238, "step": 58580 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.5126390695497317e-05, "loss": 0.8945, "step": 58585 }, { "epoch": 0.84, "grad_norm": 0.5859375, "learning_rate": 1.5113153795462687e-05, "loss": 0.9769, "step": 58590 }, { "epoch": 0.84, "grad_norm": 0.494140625, "learning_rate": 1.5099922216232288e-05, "loss": 0.8387, "step": 58595 }, { "epoch": 0.84, "grad_norm": 0.52734375, "learning_rate": 1.5086695958635432e-05, "loss": 0.8441, "step": 58600 }, { "epoch": 0.84, "grad_norm": 0.6328125, "learning_rate": 1.5073475023501204e-05, "loss": 1.0256, "step": 58605 }, { "epoch": 0.84, "grad_norm": 0.61328125, "learning_rate": 1.5060259411658261e-05, "loss": 1.1083, "step": 58610 }, { "epoch": 0.84, "grad_norm": 0.5234375, "learning_rate": 1.5047049123934987e-05, "loss": 1.0134, "step": 58615 }, { "epoch": 0.84, "grad_norm": 0.5859375, "learning_rate": 1.503384416115946e-05, "loss": 0.8517, "step": 58620 }, { "epoch": 0.84, "grad_norm": 0.5625, "learning_rate": 1.5020644524159333e-05, "loss": 0.8647, "step": 58625 }, { "epoch": 0.84, "grad_norm": 0.5703125, "learning_rate": 1.5007450213761976e-05, "loss": 0.953, "step": 58630 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.499426123079447e-05, "loss": 1.1321, "step": 58635 }, { "epoch": 0.84, "grad_norm": 0.53125, "learning_rate": 1.4981077576083457e-05, "loss": 1.0327, "step": 58640 }, { "epoch": 0.84, "grad_norm": 0.5, "learning_rate": 1.496789925045533e-05, "loss": 1.0104, "step": 58645 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.4954726254736174e-05, "loss": 0.9281, "step": 58650 }, { "epoch": 0.84, "grad_norm": 0.462890625, "learning_rate": 1.4941558589751581e-05, "loss": 0.9496, "step": 58655 }, { "epoch": 0.84, "grad_norm": 0.609375, "learning_rate": 1.4928396256326971e-05, "loss": 0.9333, "step": 58660 }, { "epoch": 0.84, "grad_norm": 0.53125, "learning_rate": 1.4915239255287395e-05, "loss": 0.9894, "step": 58665 }, { "epoch": 0.84, "grad_norm": 0.52734375, "learning_rate": 1.4902087587457502e-05, "loss": 0.9979, "step": 58670 }, { "epoch": 0.84, "grad_norm": 0.58203125, "learning_rate": 1.4888941253661681e-05, "loss": 0.957, "step": 58675 }, { "epoch": 0.84, "grad_norm": 0.6015625, "learning_rate": 1.4875800254723982e-05, "loss": 0.9186, "step": 58680 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.4862664591468034e-05, "loss": 0.7844, "step": 58685 }, { "epoch": 0.84, "grad_norm": 0.58984375, "learning_rate": 1.4849534264717246e-05, "loss": 0.8935, "step": 58690 }, { "epoch": 0.84, "grad_norm": 0.5546875, "learning_rate": 1.483640927529465e-05, "loss": 1.0858, "step": 58695 }, { "epoch": 0.84, "grad_norm": 0.609375, "learning_rate": 1.4823289624022907e-05, "loss": 1.0458, "step": 58700 }, { "epoch": 0.84, "grad_norm": 0.59765625, "learning_rate": 1.4810175311724361e-05, "loss": 1.0108, "step": 58705 }, { "epoch": 0.84, "grad_norm": 0.609375, "learning_rate": 1.4797066339221044e-05, "loss": 0.938, "step": 58710 }, { "epoch": 0.84, "grad_norm": 0.6171875, "learning_rate": 1.4783962707334675e-05, "loss": 1.0258, "step": 58715 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.4770864416886555e-05, "loss": 0.9121, "step": 58720 }, { "epoch": 0.84, "grad_norm": 0.5, "learning_rate": 1.4757771468697713e-05, "loss": 1.0701, "step": 58725 }, { "epoch": 0.84, "grad_norm": 0.51171875, "learning_rate": 1.474468386358887e-05, "loss": 0.9293, "step": 58730 }, { "epoch": 0.84, "grad_norm": 0.52734375, "learning_rate": 1.4731601602380307e-05, "loss": 0.9501, "step": 58735 }, { "epoch": 0.84, "grad_norm": 0.5703125, "learning_rate": 1.4718524685892077e-05, "loss": 0.9337, "step": 58740 }, { "epoch": 0.84, "grad_norm": 0.7890625, "learning_rate": 1.4705453114943868e-05, "loss": 0.9834, "step": 58745 }, { "epoch": 0.84, "grad_norm": 0.52734375, "learning_rate": 1.4692386890354981e-05, "loss": 0.9413, "step": 58750 }, { "epoch": 0.84, "grad_norm": 0.6015625, "learning_rate": 1.467932601294446e-05, "loss": 1.1198, "step": 58755 }, { "epoch": 0.84, "grad_norm": 0.52734375, "learning_rate": 1.466627048353093e-05, "loss": 0.9578, "step": 58760 }, { "epoch": 0.84, "grad_norm": 0.55859375, "learning_rate": 1.465322030293278e-05, "loss": 0.9431, "step": 58765 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.4640175471967965e-05, "loss": 0.964, "step": 58770 }, { "epoch": 0.84, "grad_norm": 0.62109375, "learning_rate": 1.4627135991454155e-05, "loss": 0.9486, "step": 58775 }, { "epoch": 0.84, "grad_norm": 0.57421875, "learning_rate": 1.4614101862208729e-05, "loss": 1.0664, "step": 58780 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.460107308504861e-05, "loss": 0.9023, "step": 58785 }, { "epoch": 0.84, "grad_norm": 0.62109375, "learning_rate": 1.4588049660790527e-05, "loss": 1.0412, "step": 58790 }, { "epoch": 0.84, "grad_norm": 0.49609375, "learning_rate": 1.4575031590250732e-05, "loss": 0.9061, "step": 58795 }, { "epoch": 0.84, "grad_norm": 0.49609375, "learning_rate": 1.4562018874245254e-05, "loss": 0.939, "step": 58800 }, { "epoch": 0.84, "grad_norm": 0.5546875, "learning_rate": 1.4549011513589762e-05, "loss": 0.8717, "step": 58805 }, { "epoch": 0.84, "grad_norm": 0.5546875, "learning_rate": 1.4536009509099547e-05, "loss": 0.9185, "step": 58810 }, { "epoch": 0.84, "grad_norm": 0.55859375, "learning_rate": 1.4523012861589568e-05, "loss": 0.9918, "step": 58815 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.4510021571874498e-05, "loss": 0.9828, "step": 58820 }, { "epoch": 0.84, "grad_norm": 0.6171875, "learning_rate": 1.4497035640768664e-05, "loss": 0.8807, "step": 58825 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.4484055069085989e-05, "loss": 0.9587, "step": 58830 }, { "epoch": 0.84, "grad_norm": 0.51171875, "learning_rate": 1.4471079857640134e-05, "loss": 0.9532, "step": 58835 }, { "epoch": 0.84, "grad_norm": 0.5234375, "learning_rate": 1.445811000724443e-05, "loss": 0.8421, "step": 58840 }, { "epoch": 0.84, "grad_norm": 0.64453125, "learning_rate": 1.4445145518711789e-05, "loss": 0.9329, "step": 58845 }, { "epoch": 0.84, "grad_norm": 0.49609375, "learning_rate": 1.4432186392854862e-05, "loss": 0.9504, "step": 58850 }, { "epoch": 0.84, "grad_norm": 0.65234375, "learning_rate": 1.441923263048598e-05, "loss": 0.8915, "step": 58855 }, { "epoch": 0.84, "grad_norm": 0.58203125, "learning_rate": 1.4406284232417056e-05, "loss": 1.0894, "step": 58860 }, { "epoch": 0.84, "grad_norm": 0.68359375, "learning_rate": 1.4393341199459698e-05, "loss": 0.9599, "step": 58865 }, { "epoch": 0.84, "grad_norm": 0.57421875, "learning_rate": 1.4380403532425212e-05, "loss": 0.8874, "step": 58870 }, { "epoch": 0.84, "grad_norm": 0.5703125, "learning_rate": 1.4367471232124575e-05, "loss": 1.1097, "step": 58875 }, { "epoch": 0.84, "grad_norm": 0.6640625, "learning_rate": 1.4354544299368333e-05, "loss": 0.9817, "step": 58880 }, { "epoch": 0.84, "grad_norm": 0.494140625, "learning_rate": 1.4341622734966797e-05, "loss": 0.9076, "step": 58885 }, { "epoch": 0.84, "grad_norm": 0.49609375, "learning_rate": 1.4328706539729941e-05, "loss": 0.8973, "step": 58890 }, { "epoch": 0.84, "grad_norm": 0.5859375, "learning_rate": 1.4315795714467294e-05, "loss": 1.0058, "step": 58895 }, { "epoch": 0.84, "grad_norm": 0.56640625, "learning_rate": 1.4302890259988167e-05, "loss": 0.8475, "step": 58900 }, { "epoch": 0.84, "grad_norm": 0.515625, "learning_rate": 1.4289990177101497e-05, "loss": 1.0302, "step": 58905 }, { "epoch": 0.85, "grad_norm": 0.431640625, "learning_rate": 1.4277095466615864e-05, "loss": 0.9015, "step": 58910 }, { "epoch": 0.85, "grad_norm": 0.5546875, "learning_rate": 1.4264206129339486e-05, "loss": 0.984, "step": 58915 }, { "epoch": 0.85, "grad_norm": 0.50390625, "learning_rate": 1.4251322166080339e-05, "loss": 1.0828, "step": 58920 }, { "epoch": 0.85, "grad_norm": 0.59765625, "learning_rate": 1.4238443577645955e-05, "loss": 0.8563, "step": 58925 }, { "epoch": 0.85, "grad_norm": 0.60546875, "learning_rate": 1.4225570364843599e-05, "loss": 1.044, "step": 58930 }, { "epoch": 0.85, "grad_norm": 0.66796875, "learning_rate": 1.4212702528480214e-05, "loss": 1.1616, "step": 58935 }, { "epoch": 0.85, "grad_norm": 0.5703125, "learning_rate": 1.4199840069362302e-05, "loss": 0.9844, "step": 58940 }, { "epoch": 0.85, "grad_norm": 0.59765625, "learning_rate": 1.418698298829615e-05, "loss": 0.9174, "step": 58945 }, { "epoch": 0.85, "grad_norm": 0.478515625, "learning_rate": 1.4174131286087645e-05, "loss": 1.0788, "step": 58950 }, { "epoch": 0.85, "grad_norm": 0.6171875, "learning_rate": 1.4161284963542321e-05, "loss": 1.0316, "step": 58955 }, { "epoch": 0.85, "grad_norm": 0.59375, "learning_rate": 1.4148444021465413e-05, "loss": 0.859, "step": 58960 }, { "epoch": 0.85, "grad_norm": 0.625, "learning_rate": 1.4135608460661842e-05, "loss": 0.9537, "step": 58965 }, { "epoch": 0.85, "grad_norm": 0.5, "learning_rate": 1.412277828193611e-05, "loss": 0.9823, "step": 58970 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.410995348609243e-05, "loss": 0.8578, "step": 58975 }, { "epoch": 0.85, "grad_norm": 0.51953125, "learning_rate": 1.4097134073934693e-05, "loss": 1.0997, "step": 58980 }, { "epoch": 0.85, "grad_norm": 0.5625, "learning_rate": 1.408432004626643e-05, "loss": 1.0181, "step": 58985 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.4071511403890824e-05, "loss": 1.0344, "step": 58990 }, { "epoch": 0.85, "grad_norm": 0.6171875, "learning_rate": 1.4058708147610755e-05, "loss": 0.9378, "step": 58995 }, { "epoch": 0.85, "grad_norm": 0.45703125, "learning_rate": 1.4045910278228757e-05, "loss": 0.7372, "step": 59000 }, { "epoch": 0.85, "grad_norm": 0.5078125, "learning_rate": 1.4033117796546969e-05, "loss": 0.9252, "step": 59005 }, { "epoch": 0.85, "grad_norm": 0.65625, "learning_rate": 1.4020330703367268e-05, "loss": 0.9874, "step": 59010 }, { "epoch": 0.85, "grad_norm": 0.53515625, "learning_rate": 1.4007548999491204e-05, "loss": 0.9918, "step": 59015 }, { "epoch": 0.85, "grad_norm": 0.5234375, "learning_rate": 1.399477268571986e-05, "loss": 0.9145, "step": 59020 }, { "epoch": 0.85, "grad_norm": 0.5625, "learning_rate": 1.3982001762854114e-05, "loss": 0.9774, "step": 59025 }, { "epoch": 0.85, "grad_norm": 0.5078125, "learning_rate": 1.3969236231694494e-05, "loss": 0.9343, "step": 59030 }, { "epoch": 0.85, "grad_norm": 0.54296875, "learning_rate": 1.3956476093041105e-05, "loss": 0.8558, "step": 59035 }, { "epoch": 0.85, "grad_norm": 0.6015625, "learning_rate": 1.3943721347693783e-05, "loss": 0.8362, "step": 59040 }, { "epoch": 0.85, "grad_norm": 0.57421875, "learning_rate": 1.3930971996452048e-05, "loss": 0.8882, "step": 59045 }, { "epoch": 0.85, "grad_norm": 0.58984375, "learning_rate": 1.3918228040114977e-05, "loss": 0.8783, "step": 59050 }, { "epoch": 0.85, "grad_norm": 0.578125, "learning_rate": 1.3905489479481425e-05, "loss": 0.8499, "step": 59055 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.3892756315349853e-05, "loss": 0.9287, "step": 59060 }, { "epoch": 0.85, "grad_norm": 0.56640625, "learning_rate": 1.3880028548518376e-05, "loss": 1.0537, "step": 59065 }, { "epoch": 0.85, "grad_norm": 0.52734375, "learning_rate": 1.3867306179784811e-05, "loss": 1.0329, "step": 59070 }, { "epoch": 0.85, "grad_norm": 0.73046875, "learning_rate": 1.3854589209946567e-05, "loss": 0.9988, "step": 59075 }, { "epoch": 0.85, "grad_norm": 0.5078125, "learning_rate": 1.3841877639800805e-05, "loss": 0.9891, "step": 59080 }, { "epoch": 0.85, "grad_norm": 0.55859375, "learning_rate": 1.3829171470144254e-05, "loss": 0.893, "step": 59085 }, { "epoch": 0.85, "grad_norm": 0.5078125, "learning_rate": 1.3816470701773377e-05, "loss": 1.0603, "step": 59090 }, { "epoch": 0.85, "grad_norm": 0.703125, "learning_rate": 1.3803775335484292e-05, "loss": 0.7809, "step": 59095 }, { "epoch": 0.85, "grad_norm": 0.53125, "learning_rate": 1.379108537207272e-05, "loss": 0.9883, "step": 59100 }, { "epoch": 0.85, "grad_norm": 0.64453125, "learning_rate": 1.3778400812334102e-05, "loss": 1.0164, "step": 59105 }, { "epoch": 0.85, "grad_norm": 0.6015625, "learning_rate": 1.3765721657063535e-05, "loss": 0.9677, "step": 59110 }, { "epoch": 0.85, "grad_norm": 1.140625, "learning_rate": 1.3753047907055727e-05, "loss": 0.9704, "step": 59115 }, { "epoch": 0.85, "grad_norm": 0.515625, "learning_rate": 1.3740379563105122e-05, "loss": 0.8938, "step": 59120 }, { "epoch": 0.85, "grad_norm": 0.55078125, "learning_rate": 1.372771662600577e-05, "loss": 1.0333, "step": 59125 }, { "epoch": 0.85, "grad_norm": 0.640625, "learning_rate": 1.3715059096551386e-05, "loss": 0.9952, "step": 59130 }, { "epoch": 0.85, "grad_norm": 0.5625, "learning_rate": 1.3702406975535353e-05, "loss": 0.8836, "step": 59135 }, { "epoch": 0.85, "grad_norm": 0.6171875, "learning_rate": 1.3689760263750762e-05, "loss": 0.8332, "step": 59140 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.3677118961990276e-05, "loss": 0.9347, "step": 59145 }, { "epoch": 0.85, "grad_norm": 0.5, "learning_rate": 1.3664483071046286e-05, "loss": 0.8312, "step": 59150 }, { "epoch": 0.85, "grad_norm": 0.474609375, "learning_rate": 1.3651852591710845e-05, "loss": 0.9411, "step": 59155 }, { "epoch": 0.85, "grad_norm": 0.515625, "learning_rate": 1.363922752477561e-05, "loss": 0.9272, "step": 59160 }, { "epoch": 0.85, "grad_norm": 0.56640625, "learning_rate": 1.3626607871031939e-05, "loss": 0.9742, "step": 59165 }, { "epoch": 0.85, "grad_norm": 0.62109375, "learning_rate": 1.3613993631270894e-05, "loss": 0.9807, "step": 59170 }, { "epoch": 0.85, "grad_norm": 0.58203125, "learning_rate": 1.3601384806283101e-05, "loss": 0.7687, "step": 59175 }, { "epoch": 0.85, "grad_norm": 0.55078125, "learning_rate": 1.3588781396858896e-05, "loss": 1.0959, "step": 59180 }, { "epoch": 0.85, "grad_norm": 0.44140625, "learning_rate": 1.3576183403788279e-05, "loss": 1.0129, "step": 59185 }, { "epoch": 0.85, "grad_norm": 0.5234375, "learning_rate": 1.3563590827860939e-05, "loss": 0.9644, "step": 59190 }, { "epoch": 0.85, "grad_norm": 0.578125, "learning_rate": 1.3551003669866146e-05, "loss": 0.9295, "step": 59195 }, { "epoch": 0.85, "grad_norm": 0.55078125, "learning_rate": 1.3538421930592903e-05, "loss": 0.9419, "step": 59200 }, { "epoch": 0.85, "grad_norm": 0.50390625, "learning_rate": 1.3525845610829868e-05, "loss": 1.0253, "step": 59205 }, { "epoch": 0.85, "grad_norm": 0.484375, "learning_rate": 1.3513274711365298e-05, "loss": 0.8688, "step": 59210 }, { "epoch": 0.85, "grad_norm": 0.546875, "learning_rate": 1.3500709232987163e-05, "loss": 0.9537, "step": 59215 }, { "epoch": 0.85, "grad_norm": 0.60546875, "learning_rate": 1.3488149176483112e-05, "loss": 0.9867, "step": 59220 }, { "epoch": 0.85, "grad_norm": 0.5703125, "learning_rate": 1.3475594542640391e-05, "loss": 0.9123, "step": 59225 }, { "epoch": 0.85, "grad_norm": 0.5, "learning_rate": 1.3463045332245939e-05, "loss": 1.1345, "step": 59230 }, { "epoch": 0.85, "grad_norm": 0.578125, "learning_rate": 1.345050154608637e-05, "loss": 0.9323, "step": 59235 }, { "epoch": 0.85, "grad_norm": 0.498046875, "learning_rate": 1.343796318494792e-05, "loss": 0.78, "step": 59240 }, { "epoch": 0.85, "grad_norm": 0.515625, "learning_rate": 1.3425430249616522e-05, "loss": 0.9499, "step": 59245 }, { "epoch": 0.85, "grad_norm": 0.49609375, "learning_rate": 1.3412902740877786e-05, "loss": 0.9186, "step": 59250 }, { "epoch": 0.85, "grad_norm": 0.52734375, "learning_rate": 1.3400380659516898e-05, "loss": 1.0809, "step": 59255 }, { "epoch": 0.85, "grad_norm": 0.57421875, "learning_rate": 1.3387864006318773e-05, "loss": 0.9911, "step": 59260 }, { "epoch": 0.85, "grad_norm": 0.515625, "learning_rate": 1.3375352782067996e-05, "loss": 0.9153, "step": 59265 }, { "epoch": 0.85, "grad_norm": 0.5546875, "learning_rate": 1.3362846987548738e-05, "loss": 0.9067, "step": 59270 }, { "epoch": 0.85, "grad_norm": 0.53125, "learning_rate": 1.3350346623544918e-05, "loss": 0.9464, "step": 59275 }, { "epoch": 0.85, "grad_norm": 0.61328125, "learning_rate": 1.3337851690840064e-05, "loss": 0.998, "step": 59280 }, { "epoch": 0.85, "grad_norm": 0.498046875, "learning_rate": 1.3325362190217371e-05, "loss": 0.8617, "step": 59285 }, { "epoch": 0.85, "grad_norm": 0.609375, "learning_rate": 1.331287812245967e-05, "loss": 0.9716, "step": 59290 }, { "epoch": 0.85, "grad_norm": 0.546875, "learning_rate": 1.3300399488349491e-05, "loss": 0.8445, "step": 59295 }, { "epoch": 0.85, "grad_norm": 0.5234375, "learning_rate": 1.328792628866905e-05, "loss": 0.9536, "step": 59300 }, { "epoch": 0.85, "grad_norm": 0.53125, "learning_rate": 1.3275458524200112e-05, "loss": 0.9947, "step": 59305 }, { "epoch": 0.85, "grad_norm": 0.56640625, "learning_rate": 1.3262996195724209e-05, "loss": 1.0093, "step": 59310 }, { "epoch": 0.85, "grad_norm": 0.63671875, "learning_rate": 1.3250539304022503e-05, "loss": 0.9308, "step": 59315 }, { "epoch": 0.85, "grad_norm": 0.5859375, "learning_rate": 1.323808784987578e-05, "loss": 0.9925, "step": 59320 }, { "epoch": 0.85, "grad_norm": 0.546875, "learning_rate": 1.3225641834064529e-05, "loss": 1.017, "step": 59325 }, { "epoch": 0.85, "grad_norm": 0.84765625, "learning_rate": 1.3213201257368913e-05, "loss": 0.7214, "step": 59330 }, { "epoch": 0.85, "grad_norm": 0.58203125, "learning_rate": 1.320076612056863e-05, "loss": 0.9199, "step": 59335 }, { "epoch": 0.85, "grad_norm": 0.5625, "learning_rate": 1.3188336424443193e-05, "loss": 0.9226, "step": 59340 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.317591216977172e-05, "loss": 0.8979, "step": 59345 }, { "epoch": 0.85, "grad_norm": 0.57421875, "learning_rate": 1.3163493357332934e-05, "loss": 1.0728, "step": 59350 }, { "epoch": 0.85, "grad_norm": 0.59765625, "learning_rate": 1.315107998790529e-05, "loss": 0.7297, "step": 59355 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.3138672062266876e-05, "loss": 0.9474, "step": 59360 }, { "epoch": 0.85, "grad_norm": 0.63671875, "learning_rate": 1.3126269581195416e-05, "loss": 0.9479, "step": 59365 }, { "epoch": 0.85, "grad_norm": 0.5859375, "learning_rate": 1.3113872545468308e-05, "loss": 0.8623, "step": 59370 }, { "epoch": 0.85, "grad_norm": 0.58984375, "learning_rate": 1.3101480955862644e-05, "loss": 0.8414, "step": 59375 }, { "epoch": 0.85, "grad_norm": 0.6328125, "learning_rate": 1.3089094813155112e-05, "loss": 0.9636, "step": 59380 }, { "epoch": 0.85, "grad_norm": 0.53125, "learning_rate": 1.3076714118122114e-05, "loss": 0.9034, "step": 59385 }, { "epoch": 0.85, "grad_norm": 0.5546875, "learning_rate": 1.306433887153965e-05, "loss": 0.8748, "step": 59390 }, { "epoch": 0.85, "grad_norm": 0.5625, "learning_rate": 1.3051969074183457e-05, "loss": 1.0434, "step": 59395 }, { "epoch": 0.85, "grad_norm": 0.63671875, "learning_rate": 1.3039604726828847e-05, "loss": 0.9884, "step": 59400 }, { "epoch": 0.85, "grad_norm": 0.5703125, "learning_rate": 1.3027245830250856e-05, "loss": 0.8497, "step": 59405 }, { "epoch": 0.85, "grad_norm": 0.578125, "learning_rate": 1.3014892385224175e-05, "loss": 0.9093, "step": 59410 }, { "epoch": 0.85, "grad_norm": 0.58203125, "learning_rate": 1.3002544392523076e-05, "loss": 1.004, "step": 59415 }, { "epoch": 0.85, "grad_norm": 0.50390625, "learning_rate": 1.299020185292158e-05, "loss": 0.9488, "step": 59420 }, { "epoch": 0.85, "grad_norm": 0.5703125, "learning_rate": 1.297786476719336e-05, "loss": 0.7838, "step": 59425 }, { "epoch": 0.85, "grad_norm": 0.51953125, "learning_rate": 1.2965533136111652e-05, "loss": 0.9595, "step": 59430 }, { "epoch": 0.85, "grad_norm": 0.55859375, "learning_rate": 1.2953206960449471e-05, "loss": 1.0603, "step": 59435 }, { "epoch": 0.85, "grad_norm": 0.61328125, "learning_rate": 1.29408862409794e-05, "loss": 0.9718, "step": 59440 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.2928570978473753e-05, "loss": 1.0246, "step": 59445 }, { "epoch": 0.85, "grad_norm": 0.6171875, "learning_rate": 1.2916261173704425e-05, "loss": 0.9285, "step": 59450 }, { "epoch": 0.85, "grad_norm": 0.55078125, "learning_rate": 1.290395682744302e-05, "loss": 0.8797, "step": 59455 }, { "epoch": 0.85, "grad_norm": 0.5, "learning_rate": 1.2891657940460822e-05, "loss": 0.8116, "step": 59460 }, { "epoch": 0.85, "grad_norm": 0.451171875, "learning_rate": 1.287936451352868e-05, "loss": 1.02, "step": 59465 }, { "epoch": 0.85, "grad_norm": 0.51953125, "learning_rate": 1.2867076547417223e-05, "loss": 0.8766, "step": 59470 }, { "epoch": 0.85, "grad_norm": 0.69140625, "learning_rate": 1.2854794042896623e-05, "loss": 1.0155, "step": 59475 }, { "epoch": 0.85, "grad_norm": 0.65234375, "learning_rate": 1.2842517000736776e-05, "loss": 0.9822, "step": 59480 }, { "epoch": 0.85, "grad_norm": 0.46484375, "learning_rate": 1.2830245421707255e-05, "loss": 0.8895, "step": 59485 }, { "epoch": 0.85, "grad_norm": 0.609375, "learning_rate": 1.2817979306577232e-05, "loss": 1.0055, "step": 59490 }, { "epoch": 0.85, "grad_norm": 0.55859375, "learning_rate": 1.2805718656115528e-05, "loss": 0.9778, "step": 59495 }, { "epoch": 0.85, "grad_norm": 0.703125, "learning_rate": 1.2793463471090695e-05, "loss": 1.0549, "step": 59500 }, { "epoch": 0.85, "grad_norm": 0.58984375, "learning_rate": 1.2781213752270904e-05, "loss": 1.0148, "step": 59505 }, { "epoch": 0.85, "grad_norm": 1.09375, "learning_rate": 1.2768969500423954e-05, "loss": 0.9994, "step": 59510 }, { "epoch": 0.85, "grad_norm": 0.546875, "learning_rate": 1.2756730716317333e-05, "loss": 1.0061, "step": 59515 }, { "epoch": 0.85, "grad_norm": 0.5703125, "learning_rate": 1.2744497400718224e-05, "loss": 0.9772, "step": 59520 }, { "epoch": 0.85, "grad_norm": 0.59765625, "learning_rate": 1.2732269554393373e-05, "loss": 0.9542, "step": 59525 }, { "epoch": 0.85, "grad_norm": 0.5546875, "learning_rate": 1.2720047178109252e-05, "loss": 1.0453, "step": 59530 }, { "epoch": 0.85, "grad_norm": 0.5234375, "learning_rate": 1.2707830272631994e-05, "loss": 1.0213, "step": 59535 }, { "epoch": 0.85, "grad_norm": 0.5546875, "learning_rate": 1.2695618838727352e-05, "loss": 0.9817, "step": 59540 }, { "epoch": 0.85, "grad_norm": 0.52734375, "learning_rate": 1.268341287716074e-05, "loss": 1.0595, "step": 59545 }, { "epoch": 0.85, "grad_norm": 0.6171875, "learning_rate": 1.267121238869725e-05, "loss": 0.9901, "step": 59550 }, { "epoch": 0.85, "grad_norm": 0.55859375, "learning_rate": 1.2659017374101646e-05, "loss": 0.9364, "step": 59555 }, { "epoch": 0.85, "grad_norm": 0.5546875, "learning_rate": 1.2646827834138286e-05, "loss": 0.8643, "step": 59560 }, { "epoch": 0.85, "grad_norm": 0.5703125, "learning_rate": 1.2634643769571242e-05, "loss": 0.9698, "step": 59565 }, { "epoch": 0.85, "grad_norm": 0.59375, "learning_rate": 1.2622465181164256e-05, "loss": 0.8793, "step": 59570 }, { "epoch": 0.85, "grad_norm": 0.60546875, "learning_rate": 1.261029206968064e-05, "loss": 0.8717, "step": 59575 }, { "epoch": 0.85, "grad_norm": 0.51953125, "learning_rate": 1.2598124435883452e-05, "loss": 0.7989, "step": 59580 }, { "epoch": 0.85, "grad_norm": 0.53125, "learning_rate": 1.258596228053538e-05, "loss": 0.9763, "step": 59585 }, { "epoch": 0.85, "grad_norm": 0.58984375, "learning_rate": 1.2573805604398725e-05, "loss": 0.8852, "step": 59590 }, { "epoch": 0.85, "grad_norm": 0.462890625, "learning_rate": 1.2561654408235535e-05, "loss": 1.0915, "step": 59595 }, { "epoch": 0.85, "grad_norm": 0.5, "learning_rate": 1.2549508692807422e-05, "loss": 0.9351, "step": 59600 }, { "epoch": 0.86, "grad_norm": 0.48828125, "learning_rate": 1.2537368458875687e-05, "loss": 0.9376, "step": 59605 }, { "epoch": 0.86, "grad_norm": 0.55078125, "learning_rate": 1.2525233707201311e-05, "loss": 0.9712, "step": 59610 }, { "epoch": 0.86, "grad_norm": 0.66015625, "learning_rate": 1.2513104438544932e-05, "loss": 1.1597, "step": 59615 }, { "epoch": 0.86, "grad_norm": 0.5546875, "learning_rate": 1.2500980653666782e-05, "loss": 0.7914, "step": 59620 }, { "epoch": 0.86, "grad_norm": 0.64453125, "learning_rate": 1.2488862353326813e-05, "loss": 0.9986, "step": 59625 }, { "epoch": 0.86, "grad_norm": 0.6953125, "learning_rate": 1.2476749538284648e-05, "loss": 1.0384, "step": 59630 }, { "epoch": 0.86, "grad_norm": 0.55078125, "learning_rate": 1.246464220929947e-05, "loss": 0.9533, "step": 59635 }, { "epoch": 0.86, "grad_norm": 0.546875, "learning_rate": 1.2452540367130228e-05, "loss": 0.9425, "step": 59640 }, { "epoch": 0.86, "grad_norm": 0.55859375, "learning_rate": 1.2440444012535468e-05, "loss": 0.8041, "step": 59645 }, { "epoch": 0.86, "grad_norm": 0.66796875, "learning_rate": 1.2428353146273407e-05, "loss": 1.0403, "step": 59650 }, { "epoch": 0.86, "grad_norm": 0.625, "learning_rate": 1.2416267769101898e-05, "loss": 1.0474, "step": 59655 }, { "epoch": 0.86, "grad_norm": 0.59375, "learning_rate": 1.2404187881778462e-05, "loss": 1.2028, "step": 59660 }, { "epoch": 0.86, "grad_norm": 0.59375, "learning_rate": 1.2392113485060309e-05, "loss": 1.0045, "step": 59665 }, { "epoch": 0.86, "grad_norm": 0.5546875, "learning_rate": 1.2380044579704254e-05, "loss": 1.0195, "step": 59670 }, { "epoch": 0.86, "grad_norm": 0.55859375, "learning_rate": 1.236798116646678e-05, "loss": 0.9197, "step": 59675 }, { "epoch": 0.86, "grad_norm": 0.51953125, "learning_rate": 1.2355923246104084e-05, "loss": 0.9245, "step": 59680 }, { "epoch": 0.86, "grad_norm": 0.66015625, "learning_rate": 1.2343870819371906e-05, "loss": 1.0074, "step": 59685 }, { "epoch": 0.86, "grad_norm": 0.5234375, "learning_rate": 1.2331823887025739e-05, "loss": 0.9121, "step": 59690 }, { "epoch": 0.86, "grad_norm": 0.53515625, "learning_rate": 1.2319782449820727e-05, "loss": 0.8504, "step": 59695 }, { "epoch": 0.86, "grad_norm": 0.478515625, "learning_rate": 1.2307746508511597e-05, "loss": 1.005, "step": 59700 }, { "epoch": 0.86, "grad_norm": 0.66796875, "learning_rate": 1.229571606385278e-05, "loss": 0.9193, "step": 59705 }, { "epoch": 0.86, "grad_norm": 0.5234375, "learning_rate": 1.2283691116598384e-05, "loss": 0.8849, "step": 59710 }, { "epoch": 0.86, "grad_norm": 0.58984375, "learning_rate": 1.2271671667502105e-05, "loss": 0.9522, "step": 59715 }, { "epoch": 0.86, "grad_norm": 0.5703125, "learning_rate": 1.2259657717317362e-05, "loss": 0.9858, "step": 59720 }, { "epoch": 0.86, "grad_norm": 0.5, "learning_rate": 1.2247649266797223e-05, "loss": 0.9662, "step": 59725 }, { "epoch": 0.86, "grad_norm": 0.6484375, "learning_rate": 1.223564631669435e-05, "loss": 0.9701, "step": 59730 }, { "epoch": 0.86, "grad_norm": 0.56640625, "learning_rate": 1.2223648867761116e-05, "loss": 1.1265, "step": 59735 }, { "epoch": 0.86, "grad_norm": 0.58984375, "learning_rate": 1.2211656920749559e-05, "loss": 0.9027, "step": 59740 }, { "epoch": 0.86, "grad_norm": 0.59765625, "learning_rate": 1.2199670476411319e-05, "loss": 0.9315, "step": 59745 }, { "epoch": 0.86, "grad_norm": 0.5390625, "learning_rate": 1.2187689535497738e-05, "loss": 1.036, "step": 59750 }, { "epoch": 0.86, "grad_norm": 0.57421875, "learning_rate": 1.2175714098759772e-05, "loss": 0.9804, "step": 59755 }, { "epoch": 0.86, "grad_norm": 0.51171875, "learning_rate": 1.2163744166948099e-05, "loss": 0.8822, "step": 59760 }, { "epoch": 0.86, "grad_norm": 0.6015625, "learning_rate": 1.215177974081294e-05, "loss": 1.0182, "step": 59765 }, { "epoch": 0.86, "grad_norm": 0.515625, "learning_rate": 1.2139820821104286e-05, "loss": 0.82, "step": 59770 }, { "epoch": 0.86, "grad_norm": 0.60546875, "learning_rate": 1.2127867408571757e-05, "loss": 0.862, "step": 59775 }, { "epoch": 0.86, "grad_norm": 0.55078125, "learning_rate": 1.2115919503964545e-05, "loss": 0.9314, "step": 59780 }, { "epoch": 0.86, "grad_norm": 0.60546875, "learning_rate": 1.2103977108031595e-05, "loss": 0.9286, "step": 59785 }, { "epoch": 0.86, "grad_norm": 0.56640625, "learning_rate": 1.2092040221521484e-05, "loss": 0.8684, "step": 59790 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.2080108845182392e-05, "loss": 0.9665, "step": 59795 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.2068182979762198e-05, "loss": 0.8151, "step": 59800 }, { "epoch": 0.86, "grad_norm": 0.53125, "learning_rate": 1.2056262626008496e-05, "loss": 0.8913, "step": 59805 }, { "epoch": 0.86, "grad_norm": 0.53125, "learning_rate": 1.2044347784668364e-05, "loss": 0.9524, "step": 59810 }, { "epoch": 0.86, "grad_norm": 0.55078125, "learning_rate": 1.2032438456488692e-05, "loss": 0.9238, "step": 59815 }, { "epoch": 0.86, "grad_norm": 0.56640625, "learning_rate": 1.2020534642215985e-05, "loss": 0.9864, "step": 59820 }, { "epoch": 0.86, "grad_norm": 0.50390625, "learning_rate": 1.2008636342596347e-05, "loss": 0.9358, "step": 59825 }, { "epoch": 0.86, "grad_norm": 0.62890625, "learning_rate": 1.199674355837559e-05, "loss": 1.0017, "step": 59830 }, { "epoch": 0.86, "grad_norm": 0.53515625, "learning_rate": 1.1984856290299196e-05, "loss": 0.9784, "step": 59835 }, { "epoch": 0.86, "grad_norm": 0.5703125, "learning_rate": 1.1972974539112225e-05, "loss": 0.9955, "step": 59840 }, { "epoch": 0.86, "grad_norm": 0.578125, "learning_rate": 1.1961098305559471e-05, "loss": 1.0199, "step": 59845 }, { "epoch": 0.86, "grad_norm": 0.515625, "learning_rate": 1.194922759038537e-05, "loss": 0.8711, "step": 59850 }, { "epoch": 0.86, "grad_norm": 0.515625, "learning_rate": 1.193736239433395e-05, "loss": 1.0284, "step": 59855 }, { "epoch": 0.86, "grad_norm": 0.5625, "learning_rate": 1.192550271814894e-05, "loss": 0.8833, "step": 59860 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.1913648562573732e-05, "loss": 0.8689, "step": 59865 }, { "epoch": 0.86, "grad_norm": 0.51953125, "learning_rate": 1.1901799928351365e-05, "loss": 0.926, "step": 59870 }, { "epoch": 0.86, "grad_norm": 0.5625, "learning_rate": 1.1889956816224502e-05, "loss": 0.9446, "step": 59875 }, { "epoch": 0.86, "grad_norm": 0.6015625, "learning_rate": 1.1878119226935481e-05, "loss": 1.0387, "step": 59880 }, { "epoch": 0.86, "grad_norm": 0.49609375, "learning_rate": 1.1866287161226341e-05, "loss": 0.9219, "step": 59885 }, { "epoch": 0.86, "grad_norm": 0.5625, "learning_rate": 1.185446061983867e-05, "loss": 0.9404, "step": 59890 }, { "epoch": 0.86, "grad_norm": 0.48828125, "learning_rate": 1.1842639603513805e-05, "loss": 0.9646, "step": 59895 }, { "epoch": 0.86, "grad_norm": 0.65234375, "learning_rate": 1.183082411299271e-05, "loss": 1.0031, "step": 59900 }, { "epoch": 0.86, "grad_norm": 0.47265625, "learning_rate": 1.181901414901595e-05, "loss": 1.0052, "step": 59905 }, { "epoch": 0.86, "grad_norm": 0.5390625, "learning_rate": 1.1807209712323842e-05, "loss": 0.9562, "step": 59910 }, { "epoch": 0.86, "grad_norm": 0.478515625, "learning_rate": 1.1795410803656271e-05, "loss": 0.8217, "step": 59915 }, { "epoch": 0.86, "grad_norm": 0.515625, "learning_rate": 1.1783617423752779e-05, "loss": 0.9394, "step": 59920 }, { "epoch": 0.86, "grad_norm": 0.5078125, "learning_rate": 1.1771829573352622e-05, "loss": 1.0048, "step": 59925 }, { "epoch": 0.86, "grad_norm": 0.5703125, "learning_rate": 1.1760047253194695e-05, "loss": 1.036, "step": 59930 }, { "epoch": 0.86, "grad_norm": 0.56640625, "learning_rate": 1.1748270464017475e-05, "loss": 0.9587, "step": 59935 }, { "epoch": 0.86, "grad_norm": 0.58984375, "learning_rate": 1.1736499206559182e-05, "loss": 0.8339, "step": 59940 }, { "epoch": 0.86, "grad_norm": 0.58984375, "learning_rate": 1.172473348155766e-05, "loss": 0.9504, "step": 59945 }, { "epoch": 0.86, "grad_norm": 0.58203125, "learning_rate": 1.1712973289750351e-05, "loss": 0.8442, "step": 59950 }, { "epoch": 0.86, "grad_norm": 0.63671875, "learning_rate": 1.1701218631874434e-05, "loss": 0.9779, "step": 59955 }, { "epoch": 0.86, "grad_norm": 0.6015625, "learning_rate": 1.1689469508666718e-05, "loss": 1.0213, "step": 59960 }, { "epoch": 0.86, "grad_norm": 0.5234375, "learning_rate": 1.1677725920863625e-05, "loss": 1.0294, "step": 59965 }, { "epoch": 0.86, "grad_norm": 0.55078125, "learning_rate": 1.1665987869201245e-05, "loss": 0.96, "step": 59970 }, { "epoch": 0.86, "grad_norm": 0.57421875, "learning_rate": 1.1654255354415356e-05, "loss": 0.9051, "step": 59975 }, { "epoch": 0.86, "grad_norm": 0.53515625, "learning_rate": 1.1642528377241369e-05, "loss": 0.8809, "step": 59980 }, { "epoch": 0.86, "grad_norm": 0.5390625, "learning_rate": 1.1630806938414318e-05, "loss": 0.9754, "step": 59985 }, { "epoch": 0.86, "grad_norm": 0.515625, "learning_rate": 1.1619091038668928e-05, "loss": 0.8443, "step": 59990 }, { "epoch": 0.86, "grad_norm": 0.5234375, "learning_rate": 1.1607380678739598e-05, "loss": 0.949, "step": 59995 }, { "epoch": 0.86, "grad_norm": 0.5390625, "learning_rate": 1.1595675859360288e-05, "loss": 0.9439, "step": 60000 }, { "epoch": 0.86, "grad_norm": 0.5234375, "learning_rate": 1.158397658126471e-05, "loss": 0.8939, "step": 60005 }, { "epoch": 0.86, "grad_norm": 0.56640625, "learning_rate": 1.1572282845186188e-05, "loss": 0.9047, "step": 60010 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.1560594651857692e-05, "loss": 0.9178, "step": 60015 }, { "epoch": 0.86, "grad_norm": 0.546875, "learning_rate": 1.1548912002011825e-05, "loss": 0.7978, "step": 60020 }, { "epoch": 0.86, "grad_norm": 0.50390625, "learning_rate": 1.1537234896380922e-05, "loss": 0.9594, "step": 60025 }, { "epoch": 0.86, "grad_norm": 0.515625, "learning_rate": 1.1525563335696854e-05, "loss": 0.887, "step": 60030 }, { "epoch": 0.86, "grad_norm": 0.5234375, "learning_rate": 1.1513897320691236e-05, "loss": 0.9302, "step": 60035 }, { "epoch": 0.86, "grad_norm": 0.59375, "learning_rate": 1.1502236852095338e-05, "loss": 0.8694, "step": 60040 }, { "epoch": 0.86, "grad_norm": 0.5703125, "learning_rate": 1.149058193064001e-05, "loss": 0.9651, "step": 60045 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.1478932557055799e-05, "loss": 1.0108, "step": 60050 }, { "epoch": 0.86, "grad_norm": 0.51953125, "learning_rate": 1.1467288732072944e-05, "loss": 0.9062, "step": 60055 }, { "epoch": 0.86, "grad_norm": 0.5703125, "learning_rate": 1.1455650456421241e-05, "loss": 1.0262, "step": 60060 }, { "epoch": 0.86, "grad_norm": 0.60546875, "learning_rate": 1.1444017730830226e-05, "loss": 1.0358, "step": 60065 }, { "epoch": 0.86, "grad_norm": 0.5546875, "learning_rate": 1.1432390556029015e-05, "loss": 1.0133, "step": 60070 }, { "epoch": 0.86, "grad_norm": 0.5078125, "learning_rate": 1.142076893274645e-05, "loss": 0.8855, "step": 60075 }, { "epoch": 0.86, "grad_norm": 0.59375, "learning_rate": 1.1409152861710959e-05, "loss": 1.0818, "step": 60080 }, { "epoch": 0.86, "grad_norm": 0.6015625, "learning_rate": 1.1397542343650657e-05, "loss": 1.052, "step": 60085 }, { "epoch": 0.86, "grad_norm": 0.609375, "learning_rate": 1.1385937379293343e-05, "loss": 1.0159, "step": 60090 }, { "epoch": 0.86, "grad_norm": 0.50390625, "learning_rate": 1.1374337969366367e-05, "loss": 0.822, "step": 60095 }, { "epoch": 0.86, "grad_norm": 0.498046875, "learning_rate": 1.1362744114596823e-05, "loss": 1.0121, "step": 60100 }, { "epoch": 0.86, "grad_norm": 0.578125, "learning_rate": 1.1351155815711445e-05, "loss": 0.8235, "step": 60105 }, { "epoch": 0.86, "grad_norm": 0.53125, "learning_rate": 1.133957307343657e-05, "loss": 0.9591, "step": 60110 }, { "epoch": 0.86, "grad_norm": 0.609375, "learning_rate": 1.1327995888498233e-05, "loss": 1.0327, "step": 60115 }, { "epoch": 0.86, "grad_norm": 0.578125, "learning_rate": 1.1316424261622116e-05, "loss": 1.0982, "step": 60120 }, { "epoch": 0.86, "grad_norm": 0.55859375, "learning_rate": 1.130485819353353e-05, "loss": 1.0098, "step": 60125 }, { "epoch": 0.86, "grad_norm": 0.5078125, "learning_rate": 1.1293297684957427e-05, "loss": 0.965, "step": 60130 }, { "epoch": 0.86, "grad_norm": 0.51171875, "learning_rate": 1.1281742736618451e-05, "loss": 0.8862, "step": 60135 }, { "epoch": 0.86, "grad_norm": 0.5, "learning_rate": 1.127019334924091e-05, "loss": 0.9905, "step": 60140 }, { "epoch": 0.86, "grad_norm": 0.53515625, "learning_rate": 1.1258649523548681e-05, "loss": 0.919, "step": 60145 }, { "epoch": 0.86, "grad_norm": 0.53515625, "learning_rate": 1.1247111260265386e-05, "loss": 0.93, "step": 60150 }, { "epoch": 0.86, "grad_norm": 0.55859375, "learning_rate": 1.1235578560114212e-05, "loss": 0.87, "step": 60155 }, { "epoch": 0.86, "grad_norm": 0.53125, "learning_rate": 1.1224051423818083e-05, "loss": 0.9597, "step": 60160 }, { "epoch": 0.86, "grad_norm": 0.53515625, "learning_rate": 1.121252985209953e-05, "loss": 0.9734, "step": 60165 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.1201013845680719e-05, "loss": 0.8142, "step": 60170 }, { "epoch": 0.86, "grad_norm": 0.5625, "learning_rate": 1.1189503405283486e-05, "loss": 1.0303, "step": 60175 }, { "epoch": 0.86, "grad_norm": 0.6171875, "learning_rate": 1.117799853162932e-05, "loss": 1.0191, "step": 60180 }, { "epoch": 0.86, "grad_norm": 0.52734375, "learning_rate": 1.1166499225439387e-05, "loss": 0.956, "step": 60185 }, { "epoch": 0.86, "grad_norm": 0.61328125, "learning_rate": 1.1155005487434433e-05, "loss": 0.9282, "step": 60190 }, { "epoch": 0.86, "grad_norm": 0.609375, "learning_rate": 1.1143517318334928e-05, "loss": 0.9121, "step": 60195 }, { "epoch": 0.86, "grad_norm": 0.71484375, "learning_rate": 1.1132034718860973e-05, "loss": 1.0497, "step": 60200 }, { "epoch": 0.86, "grad_norm": 0.68359375, "learning_rate": 1.112055768973227e-05, "loss": 0.9809, "step": 60205 }, { "epoch": 0.86, "grad_norm": 0.5546875, "learning_rate": 1.1109086231668243e-05, "loss": 0.934, "step": 60210 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.1097620345387938e-05, "loss": 0.88, "step": 60215 }, { "epoch": 0.86, "grad_norm": 0.6015625, "learning_rate": 1.1086160031610038e-05, "loss": 1.0801, "step": 60220 }, { "epoch": 0.86, "grad_norm": 0.65234375, "learning_rate": 1.10747052910529e-05, "loss": 0.9801, "step": 60225 }, { "epoch": 0.86, "grad_norm": 0.625, "learning_rate": 1.1063256124434496e-05, "loss": 0.926, "step": 60230 }, { "epoch": 0.86, "grad_norm": 0.75, "learning_rate": 1.1051812532472505e-05, "loss": 0.9908, "step": 60235 }, { "epoch": 0.86, "grad_norm": 0.58984375, "learning_rate": 1.1040374515884189e-05, "loss": 0.9456, "step": 60240 }, { "epoch": 0.86, "grad_norm": 0.62890625, "learning_rate": 1.1028942075386517e-05, "loss": 1.0099, "step": 60245 }, { "epoch": 0.86, "grad_norm": 0.51953125, "learning_rate": 1.1017515211696095e-05, "loss": 0.9202, "step": 60250 }, { "epoch": 0.86, "grad_norm": 0.5546875, "learning_rate": 1.100609392552916e-05, "loss": 0.9572, "step": 60255 }, { "epoch": 0.86, "grad_norm": 0.58203125, "learning_rate": 1.0994678217601595e-05, "loss": 1.1325, "step": 60260 }, { "epoch": 0.86, "grad_norm": 0.55078125, "learning_rate": 1.0983268088629006e-05, "loss": 0.9196, "step": 60265 }, { "epoch": 0.86, "grad_norm": 0.59375, "learning_rate": 1.0971863539326521e-05, "loss": 1.079, "step": 60270 }, { "epoch": 0.86, "grad_norm": 0.51953125, "learning_rate": 1.0960464570409045e-05, "loss": 0.9669, "step": 60275 }, { "epoch": 0.86, "grad_norm": 0.53125, "learning_rate": 1.0949071182591064e-05, "loss": 1.0241, "step": 60280 }, { "epoch": 0.86, "grad_norm": 0.625, "learning_rate": 1.0937683376586693e-05, "loss": 0.7955, "step": 60285 }, { "epoch": 0.86, "grad_norm": 0.55859375, "learning_rate": 1.0926301153109774e-05, "loss": 0.9317, "step": 60290 }, { "epoch": 0.86, "grad_norm": 0.56640625, "learning_rate": 1.0914924512873759e-05, "loss": 0.9039, "step": 60295 }, { "epoch": 0.86, "grad_norm": 0.55078125, "learning_rate": 1.0903553456591709e-05, "loss": 0.9029, "step": 60300 }, { "epoch": 0.87, "grad_norm": 0.6328125, "learning_rate": 1.089218798497641e-05, "loss": 0.9796, "step": 60305 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 1.088082809874027e-05, "loss": 1.0671, "step": 60310 }, { "epoch": 0.87, "grad_norm": 0.45703125, "learning_rate": 1.0869473798595298e-05, "loss": 0.7776, "step": 60315 }, { "epoch": 0.87, "grad_norm": 0.55859375, "learning_rate": 1.0858125085253223e-05, "loss": 0.9387, "step": 60320 }, { "epoch": 0.87, "grad_norm": 0.60546875, "learning_rate": 1.084678195942541e-05, "loss": 0.9069, "step": 60325 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 1.0835444421822848e-05, "loss": 1.0661, "step": 60330 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 1.0824112473156157e-05, "loss": 0.9517, "step": 60335 }, { "epoch": 0.87, "grad_norm": 0.5234375, "learning_rate": 1.0812786114135653e-05, "loss": 1.0505, "step": 60340 }, { "epoch": 0.87, "grad_norm": 0.59375, "learning_rate": 1.0801465345471318e-05, "loss": 1.0632, "step": 60345 }, { "epoch": 0.87, "grad_norm": 0.515625, "learning_rate": 1.0790150167872703e-05, "loss": 0.9453, "step": 60350 }, { "epoch": 0.87, "grad_norm": 0.52734375, "learning_rate": 1.0778840582049066e-05, "loss": 0.9767, "step": 60355 }, { "epoch": 0.87, "grad_norm": 0.58203125, "learning_rate": 1.0767536588709349e-05, "loss": 0.9211, "step": 60360 }, { "epoch": 0.87, "grad_norm": 0.625, "learning_rate": 1.075623818856204e-05, "loss": 0.9557, "step": 60365 }, { "epoch": 0.87, "grad_norm": 0.63671875, "learning_rate": 1.0744945382315364e-05, "loss": 0.9671, "step": 60370 }, { "epoch": 0.87, "grad_norm": 1.6015625, "learning_rate": 1.0733658170677185e-05, "loss": 1.0677, "step": 60375 }, { "epoch": 0.87, "grad_norm": 0.62890625, "learning_rate": 1.0722376554354973e-05, "loss": 1.0411, "step": 60380 }, { "epoch": 0.87, "grad_norm": 0.59765625, "learning_rate": 1.0711100534055862e-05, "loss": 0.9604, "step": 60385 }, { "epoch": 0.87, "grad_norm": 0.6015625, "learning_rate": 1.0699830110486675e-05, "loss": 1.0111, "step": 60390 }, { "epoch": 0.87, "grad_norm": 0.58203125, "learning_rate": 1.0688565284353825e-05, "loss": 1.0119, "step": 60395 }, { "epoch": 0.87, "grad_norm": 0.56640625, "learning_rate": 1.0677306056363424e-05, "loss": 0.9933, "step": 60400 }, { "epoch": 0.87, "grad_norm": 0.6015625, "learning_rate": 1.066605242722123e-05, "loss": 1.035, "step": 60405 }, { "epoch": 0.87, "grad_norm": 0.498046875, "learning_rate": 1.065480439763258e-05, "loss": 0.7821, "step": 60410 }, { "epoch": 0.87, "grad_norm": 0.484375, "learning_rate": 1.0643561968302552e-05, "loss": 0.9285, "step": 60415 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 1.063232513993585e-05, "loss": 1.0274, "step": 60420 }, { "epoch": 0.87, "grad_norm": 0.70703125, "learning_rate": 1.062109391323678e-05, "loss": 0.9299, "step": 60425 }, { "epoch": 0.87, "grad_norm": 0.546875, "learning_rate": 1.0609868288909319e-05, "loss": 0.9836, "step": 60430 }, { "epoch": 0.87, "grad_norm": 0.546875, "learning_rate": 1.0598648267657151e-05, "loss": 0.9447, "step": 60435 }, { "epoch": 0.87, "grad_norm": 0.55078125, "learning_rate": 1.0587433850183525e-05, "loss": 0.8362, "step": 60440 }, { "epoch": 0.87, "grad_norm": 0.56640625, "learning_rate": 1.0576225037191367e-05, "loss": 0.9271, "step": 60445 }, { "epoch": 0.87, "grad_norm": 0.53125, "learning_rate": 1.056502182938326e-05, "loss": 0.976, "step": 60450 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 1.0553824227461461e-05, "loss": 0.9894, "step": 60455 }, { "epoch": 0.87, "grad_norm": 0.6875, "learning_rate": 1.0542632232127814e-05, "loss": 1.0547, "step": 60460 }, { "epoch": 0.87, "grad_norm": 0.51953125, "learning_rate": 1.0531445844083865e-05, "loss": 0.9005, "step": 60465 }, { "epoch": 0.87, "grad_norm": 0.45703125, "learning_rate": 1.05202650640308e-05, "loss": 0.9292, "step": 60470 }, { "epoch": 0.87, "grad_norm": 0.6796875, "learning_rate": 1.0509089892669411e-05, "loss": 0.8289, "step": 60475 }, { "epoch": 0.87, "grad_norm": 0.5, "learning_rate": 1.0497920330700195e-05, "loss": 0.9028, "step": 60480 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 1.0486756378823304e-05, "loss": 0.8278, "step": 60485 }, { "epoch": 0.87, "grad_norm": 0.546875, "learning_rate": 1.0475598037738433e-05, "loss": 0.9213, "step": 60490 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 1.0464445308145022e-05, "loss": 0.9051, "step": 60495 }, { "epoch": 0.87, "grad_norm": 0.55078125, "learning_rate": 1.0453298190742189e-05, "loss": 1.0026, "step": 60500 }, { "epoch": 0.87, "grad_norm": 0.53515625, "learning_rate": 1.0442156686228576e-05, "loss": 0.9675, "step": 60505 }, { "epoch": 0.87, "grad_norm": 0.6015625, "learning_rate": 1.0431020795302592e-05, "loss": 1.1873, "step": 60510 }, { "epoch": 0.87, "grad_norm": 0.58984375, "learning_rate": 1.0419890518662256e-05, "loss": 0.9899, "step": 60515 }, { "epoch": 0.87, "grad_norm": 0.6015625, "learning_rate": 1.0408765857005177e-05, "loss": 1.0228, "step": 60520 }, { "epoch": 0.87, "grad_norm": 0.61328125, "learning_rate": 1.0397646811028695e-05, "loss": 1.025, "step": 60525 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 1.0386533381429775e-05, "loss": 0.8756, "step": 60530 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 1.0375425568904984e-05, "loss": 1.1357, "step": 60535 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 1.0364323374150619e-05, "loss": 0.9211, "step": 60540 }, { "epoch": 0.87, "grad_norm": 0.51171875, "learning_rate": 1.0353226797862526e-05, "loss": 0.9579, "step": 60545 }, { "epoch": 0.87, "grad_norm": 0.46875, "learning_rate": 1.03421358407363e-05, "loss": 0.9193, "step": 60550 }, { "epoch": 0.87, "grad_norm": 0.59375, "learning_rate": 1.0331050503467088e-05, "loss": 1.0952, "step": 60555 }, { "epoch": 0.87, "grad_norm": 0.5234375, "learning_rate": 1.0319970786749755e-05, "loss": 1.0066, "step": 60560 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 1.0308896691278812e-05, "loss": 0.8826, "step": 60565 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 1.029782821774835e-05, "loss": 0.9592, "step": 60570 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 1.028676536685218e-05, "loss": 0.8765, "step": 60575 }, { "epoch": 0.87, "grad_norm": 0.53125, "learning_rate": 1.0275708139283746e-05, "loss": 0.8267, "step": 60580 }, { "epoch": 0.87, "grad_norm": 0.53125, "learning_rate": 1.0264656535736106e-05, "loss": 0.9003, "step": 60585 }, { "epoch": 0.87, "grad_norm": 0.51953125, "learning_rate": 1.0253610556902005e-05, "loss": 0.9266, "step": 60590 }, { "epoch": 0.87, "grad_norm": 0.51171875, "learning_rate": 1.0242570203473811e-05, "loss": 0.9141, "step": 60595 }, { "epoch": 0.87, "grad_norm": 0.6484375, "learning_rate": 1.0231535476143516e-05, "loss": 0.9906, "step": 60600 }, { "epoch": 0.87, "grad_norm": 0.6015625, "learning_rate": 1.0220506375602823e-05, "loss": 0.9491, "step": 60605 }, { "epoch": 0.87, "grad_norm": 0.55078125, "learning_rate": 1.0209482902543055e-05, "loss": 0.8141, "step": 60610 }, { "epoch": 0.87, "grad_norm": 0.515625, "learning_rate": 1.0198465057655137e-05, "loss": 0.9482, "step": 60615 }, { "epoch": 0.87, "grad_norm": 0.65625, "learning_rate": 1.0187452841629718e-05, "loss": 0.9945, "step": 60620 }, { "epoch": 0.87, "grad_norm": 0.609375, "learning_rate": 1.0176446255157058e-05, "loss": 0.8699, "step": 60625 }, { "epoch": 0.87, "grad_norm": 0.486328125, "learning_rate": 1.0165445298927023e-05, "loss": 0.8981, "step": 60630 }, { "epoch": 0.87, "grad_norm": 0.60546875, "learning_rate": 1.0154449973629188e-05, "loss": 1.1487, "step": 60635 }, { "epoch": 0.87, "grad_norm": 0.4609375, "learning_rate": 1.0143460279952777e-05, "loss": 0.8834, "step": 60640 }, { "epoch": 0.87, "grad_norm": 0.546875, "learning_rate": 1.0132476218586618e-05, "loss": 1.0051, "step": 60645 }, { "epoch": 0.87, "grad_norm": 0.62109375, "learning_rate": 1.012149779021917e-05, "loss": 0.8582, "step": 60650 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 1.0110524995538617e-05, "loss": 1.0623, "step": 60655 }, { "epoch": 0.87, "grad_norm": 0.515625, "learning_rate": 1.0099557835232743e-05, "loss": 1.0097, "step": 60660 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 1.0088596309988952e-05, "loss": 1.0686, "step": 60665 }, { "epoch": 0.87, "grad_norm": 0.609375, "learning_rate": 1.0077640420494349e-05, "loss": 1.0395, "step": 60670 }, { "epoch": 0.87, "grad_norm": 0.53125, "learning_rate": 1.0066690167435677e-05, "loss": 0.8752, "step": 60675 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 1.0055745551499263e-05, "loss": 0.8251, "step": 60680 }, { "epoch": 0.87, "grad_norm": 0.48046875, "learning_rate": 1.0044806573371168e-05, "loss": 0.7511, "step": 60685 }, { "epoch": 0.87, "grad_norm": 0.53515625, "learning_rate": 1.0033873233737056e-05, "loss": 1.025, "step": 60690 }, { "epoch": 0.87, "grad_norm": 0.55859375, "learning_rate": 1.0022945533282235e-05, "loss": 0.9772, "step": 60695 }, { "epoch": 0.87, "grad_norm": 0.49609375, "learning_rate": 1.0012023472691655e-05, "loss": 0.993, "step": 60700 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 1.0001107052649939e-05, "loss": 0.8906, "step": 60705 }, { "epoch": 0.87, "grad_norm": 0.63671875, "learning_rate": 9.990196273841312e-06, "loss": 0.9415, "step": 60710 }, { "epoch": 0.87, "grad_norm": 0.515625, "learning_rate": 9.979291136949708e-06, "loss": 1.1337, "step": 60715 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 9.96839164265867e-06, "loss": 0.8363, "step": 60720 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 9.95749779165136e-06, "loss": 0.9499, "step": 60725 }, { "epoch": 0.87, "grad_norm": 0.50390625, "learning_rate": 9.946609584610644e-06, "loss": 0.8801, "step": 60730 }, { "epoch": 0.87, "grad_norm": 0.52734375, "learning_rate": 9.935727022219022e-06, "loss": 0.8072, "step": 60735 }, { "epoch": 0.87, "grad_norm": 0.5625, "learning_rate": 9.924850105158578e-06, "loss": 0.8885, "step": 60740 }, { "epoch": 0.87, "grad_norm": 0.609375, "learning_rate": 9.913978834111126e-06, "loss": 0.9325, "step": 60745 }, { "epoch": 0.87, "grad_norm": 0.51171875, "learning_rate": 9.903113209758096e-06, "loss": 0.981, "step": 60750 }, { "epoch": 0.87, "grad_norm": 0.54296875, "learning_rate": 9.892253232780536e-06, "loss": 0.8377, "step": 60755 }, { "epoch": 0.87, "grad_norm": 0.6015625, "learning_rate": 9.881398903859152e-06, "loss": 0.9308, "step": 60760 }, { "epoch": 0.87, "grad_norm": 0.62109375, "learning_rate": 9.870550223674323e-06, "loss": 1.1031, "step": 60765 }, { "epoch": 0.87, "grad_norm": 0.59375, "learning_rate": 9.859707192906076e-06, "loss": 0.7768, "step": 60770 }, { "epoch": 0.87, "grad_norm": 0.625, "learning_rate": 9.848869812234018e-06, "loss": 1.0453, "step": 60775 }, { "epoch": 0.87, "grad_norm": 0.578125, "learning_rate": 9.838038082337474e-06, "loss": 0.9651, "step": 60780 }, { "epoch": 0.87, "grad_norm": 0.609375, "learning_rate": 9.827212003895414e-06, "loss": 1.0032, "step": 60785 }, { "epoch": 0.87, "grad_norm": 0.46875, "learning_rate": 9.816391577586381e-06, "loss": 1.1093, "step": 60790 }, { "epoch": 0.87, "grad_norm": 0.53515625, "learning_rate": 9.805576804088633e-06, "loss": 0.8107, "step": 60795 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 9.794767684080076e-06, "loss": 1.0138, "step": 60800 }, { "epoch": 0.87, "grad_norm": 0.546875, "learning_rate": 9.783964218238206e-06, "loss": 0.9424, "step": 60805 }, { "epoch": 0.87, "grad_norm": 0.74609375, "learning_rate": 9.773166407240197e-06, "loss": 0.9967, "step": 60810 }, { "epoch": 0.87, "grad_norm": 0.546875, "learning_rate": 9.762374251762873e-06, "loss": 0.8841, "step": 60815 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 9.751587752482728e-06, "loss": 1.014, "step": 60820 }, { "epoch": 0.87, "grad_norm": 0.57421875, "learning_rate": 9.740806910075829e-06, "loss": 1.0144, "step": 60825 }, { "epoch": 0.87, "grad_norm": 0.51171875, "learning_rate": 9.73003172521796e-06, "loss": 0.9608, "step": 60830 }, { "epoch": 0.87, "grad_norm": 0.51171875, "learning_rate": 9.7192621985845e-06, "loss": 0.9867, "step": 60835 }, { "epoch": 0.87, "grad_norm": 0.5625, "learning_rate": 9.708498330850502e-06, "loss": 0.9342, "step": 60840 }, { "epoch": 0.87, "grad_norm": 0.4296875, "learning_rate": 9.697740122690679e-06, "loss": 0.9266, "step": 60845 }, { "epoch": 0.87, "grad_norm": 0.6640625, "learning_rate": 9.686987574779338e-06, "loss": 1.1289, "step": 60850 }, { "epoch": 0.87, "grad_norm": 0.51953125, "learning_rate": 9.676240687790484e-06, "loss": 0.8821, "step": 60855 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 9.66549946239772e-06, "loss": 0.9076, "step": 60860 }, { "epoch": 0.87, "grad_norm": 0.6484375, "learning_rate": 9.654763899274355e-06, "loss": 0.9685, "step": 60865 }, { "epoch": 0.87, "grad_norm": 0.55078125, "learning_rate": 9.64403399909326e-06, "loss": 0.9358, "step": 60870 }, { "epoch": 0.87, "grad_norm": 0.625, "learning_rate": 9.63330976252702e-06, "loss": 1.0837, "step": 60875 }, { "epoch": 0.87, "grad_norm": 0.51171875, "learning_rate": 9.622591190247864e-06, "loss": 0.8836, "step": 60880 }, { "epoch": 0.87, "grad_norm": 0.66796875, "learning_rate": 9.6118782829276e-06, "loss": 1.0149, "step": 60885 }, { "epoch": 0.87, "grad_norm": 0.48046875, "learning_rate": 9.601171041237756e-06, "loss": 0.9759, "step": 60890 }, { "epoch": 0.87, "grad_norm": 0.63671875, "learning_rate": 9.590469465849484e-06, "loss": 0.9763, "step": 60895 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 9.579773557433535e-06, "loss": 0.9364, "step": 60900 }, { "epoch": 0.87, "grad_norm": 0.5, "learning_rate": 9.569083316660377e-06, "loss": 0.8355, "step": 60905 }, { "epoch": 0.87, "grad_norm": 0.66796875, "learning_rate": 9.558398744200048e-06, "loss": 0.9732, "step": 60910 }, { "epoch": 0.87, "grad_norm": 0.6015625, "learning_rate": 9.547719840722301e-06, "loss": 0.9637, "step": 60915 }, { "epoch": 0.87, "grad_norm": 0.6171875, "learning_rate": 9.537046606896482e-06, "loss": 1.1266, "step": 60920 }, { "epoch": 0.87, "grad_norm": 0.498046875, "learning_rate": 9.52637904339162e-06, "loss": 0.8926, "step": 60925 }, { "epoch": 0.87, "grad_norm": 0.58984375, "learning_rate": 9.515717150876369e-06, "loss": 0.8966, "step": 60930 }, { "epoch": 0.87, "grad_norm": 0.482421875, "learning_rate": 9.505060930018995e-06, "loss": 0.9604, "step": 60935 }, { "epoch": 0.87, "grad_norm": 0.55078125, "learning_rate": 9.494410381487474e-06, "loss": 0.8663, "step": 60940 }, { "epoch": 0.87, "grad_norm": 0.59375, "learning_rate": 9.483765505949405e-06, "loss": 0.9535, "step": 60945 }, { "epoch": 0.87, "grad_norm": 0.5703125, "learning_rate": 9.473126304071989e-06, "loss": 0.9091, "step": 60950 }, { "epoch": 0.87, "grad_norm": 0.53125, "learning_rate": 9.462492776522114e-06, "loss": 0.9805, "step": 60955 }, { "epoch": 0.87, "grad_norm": 0.58203125, "learning_rate": 9.451864923966336e-06, "loss": 0.9339, "step": 60960 }, { "epoch": 0.87, "grad_norm": 0.58984375, "learning_rate": 9.441242747070766e-06, "loss": 0.9978, "step": 60965 }, { "epoch": 0.87, "grad_norm": 0.53125, "learning_rate": 9.430626246501228e-06, "loss": 0.9269, "step": 60970 }, { "epoch": 0.87, "grad_norm": 0.859375, "learning_rate": 9.420015422923212e-06, "loss": 1.0158, "step": 60975 }, { "epoch": 0.87, "grad_norm": 0.6875, "learning_rate": 9.409410277001762e-06, "loss": 0.9552, "step": 60980 }, { "epoch": 0.87, "grad_norm": 0.578125, "learning_rate": 9.39881080940166e-06, "loss": 1.0178, "step": 60985 }, { "epoch": 0.87, "grad_norm": 0.625, "learning_rate": 9.388217020787304e-06, "loss": 0.9533, "step": 60990 }, { "epoch": 0.87, "grad_norm": 0.4921875, "learning_rate": 9.377628911822678e-06, "loss": 0.9394, "step": 60995 }, { "epoch": 0.88, "grad_norm": 0.56640625, "learning_rate": 9.367046483171482e-06, "loss": 0.9528, "step": 61000 }, { "epoch": 0.88, "grad_norm": 0.55859375, "learning_rate": 9.356469735497053e-06, "loss": 1.0007, "step": 61005 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 9.345898669462338e-06, "loss": 0.9128, "step": 61010 }, { "epoch": 0.88, "grad_norm": 0.55859375, "learning_rate": 9.33533328572992e-06, "loss": 0.997, "step": 61015 }, { "epoch": 0.88, "grad_norm": 0.546875, "learning_rate": 9.324773584962077e-06, "loss": 0.9481, "step": 61020 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 9.314219567820703e-06, "loss": 0.899, "step": 61025 }, { "epoch": 0.88, "grad_norm": 0.6328125, "learning_rate": 9.303671234967315e-06, "loss": 1.0267, "step": 61030 }, { "epoch": 0.88, "grad_norm": 0.58984375, "learning_rate": 9.293128587063115e-06, "loss": 0.9971, "step": 61035 }, { "epoch": 0.88, "grad_norm": 0.58203125, "learning_rate": 9.282591624768932e-06, "loss": 0.9922, "step": 61040 }, { "epoch": 0.88, "grad_norm": 0.58203125, "learning_rate": 9.272060348745216e-06, "loss": 1.1262, "step": 61045 }, { "epoch": 0.88, "grad_norm": 0.515625, "learning_rate": 9.26153475965208e-06, "loss": 0.9235, "step": 61050 }, { "epoch": 0.88, "grad_norm": 0.5546875, "learning_rate": 9.251014858149298e-06, "loss": 0.8597, "step": 61055 }, { "epoch": 0.88, "grad_norm": 0.53515625, "learning_rate": 9.240500644896255e-06, "loss": 1.07, "step": 61060 }, { "epoch": 0.88, "grad_norm": 0.55859375, "learning_rate": 9.22999212055201e-06, "loss": 0.9589, "step": 61065 }, { "epoch": 0.88, "grad_norm": 0.625, "learning_rate": 9.21948928577524e-06, "loss": 0.9358, "step": 61070 }, { "epoch": 0.88, "grad_norm": 0.56640625, "learning_rate": 9.208992141224249e-06, "loss": 0.9216, "step": 61075 }, { "epoch": 0.88, "grad_norm": 0.53125, "learning_rate": 9.198500687557031e-06, "loss": 0.9737, "step": 61080 }, { "epoch": 0.88, "grad_norm": 0.59765625, "learning_rate": 9.188014925431233e-06, "loss": 1.0058, "step": 61085 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 9.177534855504055e-06, "loss": 0.9112, "step": 61090 }, { "epoch": 0.88, "grad_norm": 0.546875, "learning_rate": 9.167060478432443e-06, "loss": 0.9753, "step": 61095 }, { "epoch": 0.88, "grad_norm": 0.5703125, "learning_rate": 9.156591794872938e-06, "loss": 1.0607, "step": 61100 }, { "epoch": 0.88, "grad_norm": 0.5234375, "learning_rate": 9.146128805481712e-06, "loss": 1.0579, "step": 61105 }, { "epoch": 0.88, "grad_norm": 0.49609375, "learning_rate": 9.1356715109146e-06, "loss": 0.8809, "step": 61110 }, { "epoch": 0.88, "grad_norm": 0.5234375, "learning_rate": 9.125219911827099e-06, "loss": 0.9079, "step": 61115 }, { "epoch": 0.88, "grad_norm": 0.60546875, "learning_rate": 9.114774008874317e-06, "loss": 1.0232, "step": 61120 }, { "epoch": 0.88, "grad_norm": 0.55859375, "learning_rate": 9.104333802710997e-06, "loss": 0.8465, "step": 61125 }, { "epoch": 0.88, "grad_norm": 0.58203125, "learning_rate": 9.093899293991548e-06, "loss": 0.9612, "step": 61130 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 9.083470483370037e-06, "loss": 1.0004, "step": 61135 }, { "epoch": 0.88, "grad_norm": 0.65234375, "learning_rate": 9.073047371500133e-06, "loss": 1.0981, "step": 61140 }, { "epoch": 0.88, "grad_norm": 0.578125, "learning_rate": 9.062629959035173e-06, "loss": 0.9233, "step": 61145 }, { "epoch": 0.88, "grad_norm": 0.5703125, "learning_rate": 9.052218246628163e-06, "loss": 0.9419, "step": 61150 }, { "epoch": 0.88, "grad_norm": 0.5, "learning_rate": 9.041812234931667e-06, "loss": 0.8708, "step": 61155 }, { "epoch": 0.88, "grad_norm": 0.5234375, "learning_rate": 9.031411924597977e-06, "loss": 0.9768, "step": 61160 }, { "epoch": 0.88, "grad_norm": 0.58203125, "learning_rate": 9.021017316279001e-06, "loss": 1.0744, "step": 61165 }, { "epoch": 0.88, "grad_norm": 0.60546875, "learning_rate": 9.010628410626287e-06, "loss": 0.9773, "step": 61170 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 9.00024520829098e-06, "loss": 0.9747, "step": 61175 }, { "epoch": 0.88, "grad_norm": 0.625, "learning_rate": 8.989867709923983e-06, "loss": 0.9236, "step": 61180 }, { "epoch": 0.88, "grad_norm": 0.58984375, "learning_rate": 8.979495916175695e-06, "loss": 0.9793, "step": 61185 }, { "epoch": 0.88, "grad_norm": 0.53515625, "learning_rate": 8.969129827696265e-06, "loss": 0.9123, "step": 61190 }, { "epoch": 0.88, "grad_norm": 0.546875, "learning_rate": 8.958769445135483e-06, "loss": 0.888, "step": 61195 }, { "epoch": 0.88, "grad_norm": 0.53125, "learning_rate": 8.948414769142698e-06, "loss": 0.9846, "step": 61200 }, { "epoch": 0.88, "grad_norm": 0.62109375, "learning_rate": 8.938065800366967e-06, "loss": 0.9033, "step": 61205 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 8.927722539457006e-06, "loss": 0.9003, "step": 61210 }, { "epoch": 0.88, "grad_norm": 0.58203125, "learning_rate": 8.917384987061105e-06, "loss": 0.94, "step": 61215 }, { "epoch": 0.88, "grad_norm": 0.5625, "learning_rate": 8.90705314382726e-06, "loss": 1.0094, "step": 61220 }, { "epoch": 0.88, "grad_norm": 0.59375, "learning_rate": 8.89672701040305e-06, "loss": 1.0216, "step": 61225 }, { "epoch": 0.88, "grad_norm": 0.7109375, "learning_rate": 8.88640658743577e-06, "loss": 1.0245, "step": 61230 }, { "epoch": 0.88, "grad_norm": 0.5, "learning_rate": 8.876091875572279e-06, "loss": 0.9166, "step": 61235 }, { "epoch": 0.88, "grad_norm": 0.70703125, "learning_rate": 8.865782875459127e-06, "loss": 1.1021, "step": 61240 }, { "epoch": 0.88, "grad_norm": 0.51171875, "learning_rate": 8.855479587742521e-06, "loss": 0.9594, "step": 61245 }, { "epoch": 0.88, "grad_norm": 0.53125, "learning_rate": 8.845182013068231e-06, "loss": 0.8905, "step": 61250 }, { "epoch": 0.88, "grad_norm": 0.5546875, "learning_rate": 8.834890152081754e-06, "loss": 0.9806, "step": 61255 }, { "epoch": 0.88, "grad_norm": 0.5234375, "learning_rate": 8.824604005428217e-06, "loss": 0.9909, "step": 61260 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 8.814323573752315e-06, "loss": 0.8444, "step": 61265 }, { "epoch": 0.88, "grad_norm": 0.62109375, "learning_rate": 8.804048857698466e-06, "loss": 0.9958, "step": 61270 }, { "epoch": 0.88, "grad_norm": 0.5078125, "learning_rate": 8.793779857910734e-06, "loss": 0.8591, "step": 61275 }, { "epoch": 0.88, "grad_norm": 0.53125, "learning_rate": 8.783516575032713e-06, "loss": 0.9333, "step": 61280 }, { "epoch": 0.88, "grad_norm": 0.48046875, "learning_rate": 8.773259009707769e-06, "loss": 0.9208, "step": 61285 }, { "epoch": 0.88, "grad_norm": 0.62890625, "learning_rate": 8.763007162578862e-06, "loss": 1.2006, "step": 61290 }, { "epoch": 0.88, "grad_norm": 0.67578125, "learning_rate": 8.75276103428856e-06, "loss": 1.1762, "step": 61295 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 8.742520625479111e-06, "loss": 0.9718, "step": 61300 }, { "epoch": 0.88, "grad_norm": 0.6328125, "learning_rate": 8.732285936792427e-06, "loss": 1.0278, "step": 61305 }, { "epoch": 0.88, "grad_norm": 0.6015625, "learning_rate": 8.722056968869985e-06, "loss": 0.908, "step": 61310 }, { "epoch": 0.88, "grad_norm": 0.51171875, "learning_rate": 8.71183372235298e-06, "loss": 0.8661, "step": 61315 }, { "epoch": 0.88, "grad_norm": 0.5078125, "learning_rate": 8.701616197882211e-06, "loss": 1.0178, "step": 61320 }, { "epoch": 0.88, "grad_norm": 0.5859375, "learning_rate": 8.691404396098124e-06, "loss": 0.9051, "step": 61325 }, { "epoch": 0.88, "grad_norm": 0.5625, "learning_rate": 8.681198317640783e-06, "loss": 0.887, "step": 61330 }, { "epoch": 0.88, "grad_norm": 0.609375, "learning_rate": 8.67099796314993e-06, "loss": 0.9121, "step": 61335 }, { "epoch": 0.88, "grad_norm": 0.58203125, "learning_rate": 8.660803333264966e-06, "loss": 0.9048, "step": 61340 }, { "epoch": 0.88, "grad_norm": 0.59765625, "learning_rate": 8.650614428624848e-06, "loss": 0.9388, "step": 61345 }, { "epoch": 0.88, "grad_norm": 0.515625, "learning_rate": 8.640431249868264e-06, "loss": 0.9448, "step": 61350 }, { "epoch": 0.88, "grad_norm": 0.5703125, "learning_rate": 8.630253797633514e-06, "loss": 0.8843, "step": 61355 }, { "epoch": 0.88, "grad_norm": 0.546875, "learning_rate": 8.620082072558499e-06, "loss": 0.9721, "step": 61360 }, { "epoch": 0.88, "grad_norm": 0.53125, "learning_rate": 8.609916075280821e-06, "loss": 0.8592, "step": 61365 }, { "epoch": 0.88, "grad_norm": 0.53515625, "learning_rate": 8.599755806437693e-06, "loss": 0.8063, "step": 61370 }, { "epoch": 0.88, "grad_norm": 0.51953125, "learning_rate": 8.58960126666596e-06, "loss": 0.9778, "step": 61375 }, { "epoch": 0.88, "grad_norm": 0.4921875, "learning_rate": 8.579452456602133e-06, "loss": 0.9136, "step": 61380 }, { "epoch": 0.88, "grad_norm": 0.53125, "learning_rate": 8.569309376882362e-06, "loss": 0.9862, "step": 61385 }, { "epoch": 0.88, "grad_norm": 0.53125, "learning_rate": 8.559172028142381e-06, "loss": 0.9479, "step": 61390 }, { "epoch": 0.88, "grad_norm": 0.6015625, "learning_rate": 8.549040411017639e-06, "loss": 1.0091, "step": 61395 }, { "epoch": 0.88, "grad_norm": 0.6015625, "learning_rate": 8.538914526143216e-06, "loss": 0.9978, "step": 61400 }, { "epoch": 0.88, "grad_norm": 0.50390625, "learning_rate": 8.528794374153781e-06, "loss": 0.7958, "step": 61405 }, { "epoch": 0.88, "grad_norm": 0.640625, "learning_rate": 8.518679955683694e-06, "loss": 1.1249, "step": 61410 }, { "epoch": 0.88, "grad_norm": 0.55078125, "learning_rate": 8.50857127136695e-06, "loss": 0.9851, "step": 61415 }, { "epoch": 0.88, "grad_norm": 0.51171875, "learning_rate": 8.498468321837139e-06, "loss": 1.1021, "step": 61420 }, { "epoch": 0.88, "grad_norm": 0.56640625, "learning_rate": 8.488371107727545e-06, "loss": 0.9302, "step": 61425 }, { "epoch": 0.88, "grad_norm": 0.5625, "learning_rate": 8.478279629671094e-06, "loss": 0.9321, "step": 61430 }, { "epoch": 0.88, "grad_norm": 0.62890625, "learning_rate": 8.468193888300313e-06, "loss": 0.9139, "step": 61435 }, { "epoch": 0.88, "grad_norm": 0.5625, "learning_rate": 8.458113884247365e-06, "loss": 0.9128, "step": 61440 }, { "epoch": 0.88, "grad_norm": 0.546875, "learning_rate": 8.448039618144089e-06, "loss": 0.8569, "step": 61445 }, { "epoch": 0.88, "grad_norm": 0.53515625, "learning_rate": 8.437971090621988e-06, "loss": 1.0033, "step": 61450 }, { "epoch": 0.88, "grad_norm": 0.58984375, "learning_rate": 8.427908302312115e-06, "loss": 0.9779, "step": 61455 }, { "epoch": 0.88, "grad_norm": 0.69921875, "learning_rate": 8.417851253845244e-06, "loss": 0.7853, "step": 61460 }, { "epoch": 0.88, "grad_norm": 0.62890625, "learning_rate": 8.40779994585178e-06, "loss": 1.0948, "step": 61465 }, { "epoch": 0.88, "grad_norm": 0.5703125, "learning_rate": 8.397754378961709e-06, "loss": 1.0884, "step": 61470 }, { "epoch": 0.88, "grad_norm": 0.59375, "learning_rate": 8.387714553804716e-06, "loss": 1.2371, "step": 61475 }, { "epoch": 0.88, "grad_norm": 0.5234375, "learning_rate": 8.377680471010129e-06, "loss": 0.9435, "step": 61480 }, { "epoch": 0.88, "grad_norm": 0.57421875, "learning_rate": 8.367652131206882e-06, "loss": 1.043, "step": 61485 }, { "epoch": 0.88, "grad_norm": 0.578125, "learning_rate": 8.357629535023547e-06, "loss": 0.8901, "step": 61490 }, { "epoch": 0.88, "grad_norm": 0.59375, "learning_rate": 8.34761268308838e-06, "loss": 0.9678, "step": 61495 }, { "epoch": 0.88, "grad_norm": 0.55859375, "learning_rate": 8.337601576029219e-06, "loss": 0.8858, "step": 61500 }, { "epoch": 0.88, "grad_norm": 0.56640625, "learning_rate": 8.327596214473576e-06, "loss": 1.0307, "step": 61505 }, { "epoch": 0.88, "grad_norm": 0.490234375, "learning_rate": 8.317596599048638e-06, "loss": 0.94, "step": 61510 }, { "epoch": 0.88, "grad_norm": 0.5703125, "learning_rate": 8.307602730381137e-06, "loss": 1.0737, "step": 61515 }, { "epoch": 0.88, "grad_norm": 0.55078125, "learning_rate": 8.297614609097526e-06, "loss": 0.9876, "step": 61520 }, { "epoch": 0.88, "grad_norm": 0.578125, "learning_rate": 8.287632235823884e-06, "loss": 0.946, "step": 61525 }, { "epoch": 0.88, "grad_norm": 0.5859375, "learning_rate": 8.277655611185897e-06, "loss": 0.9989, "step": 61530 }, { "epoch": 0.88, "grad_norm": 0.59375, "learning_rate": 8.267684735808934e-06, "loss": 0.8744, "step": 61535 }, { "epoch": 0.88, "grad_norm": 0.55859375, "learning_rate": 8.25771961031795e-06, "loss": 0.8663, "step": 61540 }, { "epoch": 0.88, "grad_norm": 0.546875, "learning_rate": 8.247760235337598e-06, "loss": 0.9953, "step": 61545 }, { "epoch": 0.88, "grad_norm": 0.52734375, "learning_rate": 8.237806611492127e-06, "loss": 0.8985, "step": 61550 }, { "epoch": 0.88, "grad_norm": 0.56640625, "learning_rate": 8.227858739405436e-06, "loss": 1.0169, "step": 61555 }, { "epoch": 0.88, "grad_norm": 0.61328125, "learning_rate": 8.217916619701093e-06, "loss": 0.8976, "step": 61560 }, { "epoch": 0.88, "grad_norm": 0.5859375, "learning_rate": 8.207980253002257e-06, "loss": 0.9396, "step": 61565 }, { "epoch": 0.88, "grad_norm": 0.58984375, "learning_rate": 8.19804963993176e-06, "loss": 0.8884, "step": 61570 }, { "epoch": 0.88, "grad_norm": 0.52734375, "learning_rate": 8.188124781112082e-06, "loss": 0.8163, "step": 61575 }, { "epoch": 0.88, "grad_norm": 0.5078125, "learning_rate": 8.178205677165285e-06, "loss": 0.8764, "step": 61580 }, { "epoch": 0.88, "grad_norm": 0.62890625, "learning_rate": 8.168292328713145e-06, "loss": 1.0038, "step": 61585 }, { "epoch": 0.88, "grad_norm": 0.79296875, "learning_rate": 8.158384736377033e-06, "loss": 0.9548, "step": 61590 }, { "epoch": 0.88, "grad_norm": 0.5859375, "learning_rate": 8.148482900777976e-06, "loss": 1.0273, "step": 61595 }, { "epoch": 0.88, "grad_norm": 0.55078125, "learning_rate": 8.138586822536597e-06, "loss": 0.9237, "step": 61600 }, { "epoch": 0.88, "grad_norm": 0.51953125, "learning_rate": 8.128696502273214e-06, "loss": 0.9631, "step": 61605 }, { "epoch": 0.88, "grad_norm": 0.51171875, "learning_rate": 8.118811940607795e-06, "loss": 0.9891, "step": 61610 }, { "epoch": 0.88, "grad_norm": 0.52734375, "learning_rate": 8.108933138159857e-06, "loss": 0.9581, "step": 61615 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 8.099060095548661e-06, "loss": 0.9396, "step": 61620 }, { "epoch": 0.88, "grad_norm": 0.60546875, "learning_rate": 8.089192813393042e-06, "loss": 1.2638, "step": 61625 }, { "epoch": 0.88, "grad_norm": 0.52734375, "learning_rate": 8.079331292311498e-06, "loss": 0.9143, "step": 61630 }, { "epoch": 0.88, "grad_norm": 0.5234375, "learning_rate": 8.069475532922166e-06, "loss": 1.0492, "step": 61635 }, { "epoch": 0.88, "grad_norm": 0.58203125, "learning_rate": 8.059625535842807e-06, "loss": 0.8953, "step": 61640 }, { "epoch": 0.88, "grad_norm": 0.515625, "learning_rate": 8.049781301690806e-06, "loss": 1.0398, "step": 61645 }, { "epoch": 0.88, "grad_norm": 0.5859375, "learning_rate": 8.039942831083246e-06, "loss": 0.9835, "step": 61650 }, { "epoch": 0.88, "grad_norm": 0.5546875, "learning_rate": 8.030110124636814e-06, "loss": 0.8837, "step": 61655 }, { "epoch": 0.88, "grad_norm": 0.53125, "learning_rate": 8.020283182967814e-06, "loss": 0.9021, "step": 61660 }, { "epoch": 0.88, "grad_norm": 0.5625, "learning_rate": 8.010462006692221e-06, "loss": 0.966, "step": 61665 }, { "epoch": 0.88, "grad_norm": 0.57421875, "learning_rate": 8.000646596425643e-06, "loss": 0.9157, "step": 61670 }, { "epoch": 0.88, "grad_norm": 0.58984375, "learning_rate": 7.99083695278331e-06, "loss": 0.9347, "step": 61675 }, { "epoch": 0.88, "grad_norm": 0.56640625, "learning_rate": 7.981033076380095e-06, "loss": 0.9805, "step": 61680 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 7.971234967830554e-06, "loss": 0.8568, "step": 61685 }, { "epoch": 0.88, "grad_norm": 0.52734375, "learning_rate": 7.961442627748793e-06, "loss": 0.903, "step": 61690 }, { "epoch": 0.88, "grad_norm": 0.55078125, "learning_rate": 7.951656056748658e-06, "loss": 0.9831, "step": 61695 }, { "epoch": 0.89, "grad_norm": 0.56640625, "learning_rate": 7.941875255443532e-06, "loss": 0.9424, "step": 61700 }, { "epoch": 0.89, "grad_norm": 0.62890625, "learning_rate": 7.932100224446526e-06, "loss": 0.9975, "step": 61705 }, { "epoch": 0.89, "grad_norm": 0.56640625, "learning_rate": 7.922330964370316e-06, "loss": 1.0486, "step": 61710 }, { "epoch": 0.89, "grad_norm": 0.69140625, "learning_rate": 7.912567475827271e-06, "loss": 1.1237, "step": 61715 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.902809759429398e-06, "loss": 0.9946, "step": 61720 }, { "epoch": 0.89, "grad_norm": 0.58984375, "learning_rate": 7.893057815788273e-06, "loss": 0.9619, "step": 61725 }, { "epoch": 0.89, "grad_norm": 0.5390625, "learning_rate": 7.88331164551519e-06, "loss": 0.8598, "step": 61730 }, { "epoch": 0.89, "grad_norm": 0.5390625, "learning_rate": 7.873571249221057e-06, "loss": 0.924, "step": 61735 }, { "epoch": 0.89, "grad_norm": 0.51953125, "learning_rate": 7.863836627516396e-06, "loss": 0.974, "step": 61740 }, { "epoch": 0.89, "grad_norm": 0.51171875, "learning_rate": 7.854107781011399e-06, "loss": 1.0555, "step": 61745 }, { "epoch": 0.89, "grad_norm": 0.58984375, "learning_rate": 7.844384710315867e-06, "loss": 1.0496, "step": 61750 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.834667416039254e-06, "loss": 0.9431, "step": 61755 }, { "epoch": 0.89, "grad_norm": 0.609375, "learning_rate": 7.824955898790642e-06, "loss": 0.9346, "step": 61760 }, { "epoch": 0.89, "grad_norm": 0.5546875, "learning_rate": 7.8152501591788e-06, "loss": 1.0252, "step": 61765 }, { "epoch": 0.89, "grad_norm": 0.5546875, "learning_rate": 7.80555019781204e-06, "loss": 0.9873, "step": 61770 }, { "epoch": 0.89, "grad_norm": 0.66796875, "learning_rate": 7.795856015298397e-06, "loss": 0.813, "step": 61775 }, { "epoch": 0.89, "grad_norm": 0.51953125, "learning_rate": 7.786167612245531e-06, "loss": 0.8898, "step": 61780 }, { "epoch": 0.89, "grad_norm": 0.625, "learning_rate": 7.776484989260691e-06, "loss": 1.0275, "step": 61785 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.7668081469508e-06, "loss": 0.8083, "step": 61790 }, { "epoch": 0.89, "grad_norm": 0.498046875, "learning_rate": 7.757137085922428e-06, "loss": 0.9573, "step": 61795 }, { "epoch": 0.89, "grad_norm": 0.55859375, "learning_rate": 7.74747180678177e-06, "loss": 0.9706, "step": 61800 }, { "epoch": 0.89, "grad_norm": 0.609375, "learning_rate": 7.737812310134617e-06, "loss": 1.0688, "step": 61805 }, { "epoch": 0.89, "grad_norm": 0.59375, "learning_rate": 7.728158596586477e-06, "loss": 1.0002, "step": 61810 }, { "epoch": 0.89, "grad_norm": 0.5703125, "learning_rate": 7.718510666742462e-06, "loss": 0.977, "step": 61815 }, { "epoch": 0.89, "grad_norm": 0.5, "learning_rate": 7.70886852120728e-06, "loss": 0.9562, "step": 61820 }, { "epoch": 0.89, "grad_norm": 0.54296875, "learning_rate": 7.699232160585324e-06, "loss": 1.0195, "step": 61825 }, { "epoch": 0.89, "grad_norm": 0.48046875, "learning_rate": 7.689601585480643e-06, "loss": 1.0175, "step": 61830 }, { "epoch": 0.89, "grad_norm": 0.53515625, "learning_rate": 7.679976796496846e-06, "loss": 0.8564, "step": 61835 }, { "epoch": 0.89, "grad_norm": 0.58203125, "learning_rate": 7.670357794237249e-06, "loss": 0.9566, "step": 61840 }, { "epoch": 0.89, "grad_norm": 0.498046875, "learning_rate": 7.6607445793048e-06, "loss": 0.9369, "step": 61845 }, { "epoch": 0.89, "grad_norm": 0.57421875, "learning_rate": 7.651137152302035e-06, "loss": 1.0571, "step": 61850 }, { "epoch": 0.89, "grad_norm": 0.59375, "learning_rate": 7.641535513831165e-06, "loss": 0.995, "step": 61855 }, { "epoch": 0.89, "grad_norm": 0.68359375, "learning_rate": 7.631939664494048e-06, "loss": 0.9018, "step": 61860 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.622349604892131e-06, "loss": 0.9492, "step": 61865 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.612765335626559e-06, "loss": 1.0655, "step": 61870 }, { "epoch": 0.89, "grad_norm": 0.474609375, "learning_rate": 7.603186857298083e-06, "loss": 0.9456, "step": 61875 }, { "epoch": 0.89, "grad_norm": 0.53515625, "learning_rate": 7.59361417050708e-06, "loss": 0.72, "step": 61880 }, { "epoch": 0.89, "grad_norm": 0.482421875, "learning_rate": 7.584047275853578e-06, "loss": 1.0067, "step": 61885 }, { "epoch": 0.89, "grad_norm": 0.5390625, "learning_rate": 7.574486173937256e-06, "loss": 1.0156, "step": 61890 }, { "epoch": 0.89, "grad_norm": 0.6484375, "learning_rate": 7.564930865357389e-06, "loss": 0.9955, "step": 61895 }, { "epoch": 0.89, "grad_norm": 0.484375, "learning_rate": 7.5553813507129426e-06, "loss": 0.8775, "step": 61900 }, { "epoch": 0.89, "grad_norm": 0.50390625, "learning_rate": 7.545837630602481e-06, "loss": 0.8675, "step": 61905 }, { "epoch": 0.89, "grad_norm": 0.578125, "learning_rate": 7.536299705624217e-06, "loss": 0.9599, "step": 61910 }, { "epoch": 0.89, "grad_norm": 0.53515625, "learning_rate": 7.526767576375982e-06, "loss": 1.0218, "step": 61915 }, { "epoch": 0.89, "grad_norm": 0.578125, "learning_rate": 7.5172412434552756e-06, "loss": 0.8365, "step": 61920 }, { "epoch": 0.89, "grad_norm": 0.54296875, "learning_rate": 7.507720707459232e-06, "loss": 1.0037, "step": 61925 }, { "epoch": 0.89, "grad_norm": 0.51171875, "learning_rate": 7.4982059689845726e-06, "loss": 1.0069, "step": 61930 }, { "epoch": 0.89, "grad_norm": 0.6171875, "learning_rate": 7.488697028627711e-06, "loss": 0.9896, "step": 61935 }, { "epoch": 0.89, "grad_norm": 0.5859375, "learning_rate": 7.479193886984703e-06, "loss": 0.9226, "step": 61940 }, { "epoch": 0.89, "grad_norm": 0.50390625, "learning_rate": 7.469696544651184e-06, "loss": 0.944, "step": 61945 }, { "epoch": 0.89, "grad_norm": 0.515625, "learning_rate": 7.460205002222464e-06, "loss": 0.9059, "step": 61950 }, { "epoch": 0.89, "grad_norm": 0.5625, "learning_rate": 7.450719260293515e-06, "loss": 0.9737, "step": 61955 }, { "epoch": 0.89, "grad_norm": 0.5625, "learning_rate": 7.44123931945887e-06, "loss": 0.8882, "step": 61960 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.431765180312744e-06, "loss": 0.797, "step": 61965 }, { "epoch": 0.89, "grad_norm": 0.65625, "learning_rate": 7.422296843449028e-06, "loss": 0.9947, "step": 61970 }, { "epoch": 0.89, "grad_norm": 0.57421875, "learning_rate": 7.412834309461159e-06, "loss": 0.9082, "step": 61975 }, { "epoch": 0.89, "grad_norm": 0.58203125, "learning_rate": 7.403377578942294e-06, "loss": 0.9802, "step": 61980 }, { "epoch": 0.89, "grad_norm": 0.5859375, "learning_rate": 7.393926652485195e-06, "loss": 1.0125, "step": 61985 }, { "epoch": 0.89, "grad_norm": 0.609375, "learning_rate": 7.384481530682219e-06, "loss": 0.9947, "step": 61990 }, { "epoch": 0.89, "grad_norm": 0.578125, "learning_rate": 7.3750422141254275e-06, "loss": 1.0376, "step": 61995 }, { "epoch": 0.89, "grad_norm": 0.5625, "learning_rate": 7.3656087034064904e-06, "loss": 1.016, "step": 62000 }, { "epoch": 0.89, "grad_norm": 0.55859375, "learning_rate": 7.356180999116691e-06, "loss": 0.9243, "step": 62005 }, { "epoch": 0.89, "grad_norm": 0.62109375, "learning_rate": 7.346759101847e-06, "loss": 1.0494, "step": 62010 }, { "epoch": 0.89, "grad_norm": 0.51953125, "learning_rate": 7.337343012187947e-06, "loss": 0.8579, "step": 62015 }, { "epoch": 0.89, "grad_norm": 0.5703125, "learning_rate": 7.32793273072978e-06, "loss": 0.8811, "step": 62020 }, { "epoch": 0.89, "grad_norm": 0.53515625, "learning_rate": 7.318528258062329e-06, "loss": 0.8636, "step": 62025 }, { "epoch": 0.89, "grad_norm": 1.5390625, "learning_rate": 7.309129594775077e-06, "loss": 1.0092, "step": 62030 }, { "epoch": 0.89, "grad_norm": 0.5390625, "learning_rate": 7.299736741457164e-06, "loss": 1.0579, "step": 62035 }, { "epoch": 0.89, "grad_norm": 0.58984375, "learning_rate": 7.290349698697318e-06, "loss": 0.9998, "step": 62040 }, { "epoch": 0.89, "grad_norm": 0.546875, "learning_rate": 7.280968467083937e-06, "loss": 0.7762, "step": 62045 }, { "epoch": 0.89, "grad_norm": 0.6171875, "learning_rate": 7.271593047205061e-06, "loss": 0.9415, "step": 62050 }, { "epoch": 0.89, "grad_norm": 0.55859375, "learning_rate": 7.2622234396483306e-06, "loss": 0.8974, "step": 62055 }, { "epoch": 0.89, "grad_norm": 0.462890625, "learning_rate": 7.252859645001075e-06, "loss": 1.0368, "step": 62060 }, { "epoch": 0.89, "grad_norm": 0.52734375, "learning_rate": 7.243501663850205e-06, "loss": 0.9015, "step": 62065 }, { "epoch": 0.89, "grad_norm": 0.59765625, "learning_rate": 7.234149496782272e-06, "loss": 1.1338, "step": 62070 }, { "epoch": 0.89, "grad_norm": 0.56640625, "learning_rate": 7.224803144383496e-06, "loss": 0.8891, "step": 62075 }, { "epoch": 0.89, "grad_norm": 0.578125, "learning_rate": 7.215462607239743e-06, "loss": 1.0407, "step": 62080 }, { "epoch": 0.89, "grad_norm": 0.5390625, "learning_rate": 7.206127885936453e-06, "loss": 0.8616, "step": 62085 }, { "epoch": 0.89, "grad_norm": 0.5703125, "learning_rate": 7.196798981058739e-06, "loss": 1.0154, "step": 62090 }, { "epoch": 0.89, "grad_norm": 0.55859375, "learning_rate": 7.187475893191387e-06, "loss": 0.9882, "step": 62095 }, { "epoch": 0.89, "grad_norm": 0.578125, "learning_rate": 7.178158622918729e-06, "loss": 0.8684, "step": 62100 }, { "epoch": 0.89, "grad_norm": 0.5546875, "learning_rate": 7.168847170824811e-06, "loss": 0.9177, "step": 62105 }, { "epoch": 0.89, "grad_norm": 0.49609375, "learning_rate": 7.159541537493286e-06, "loss": 0.9148, "step": 62110 }, { "epoch": 0.89, "grad_norm": 0.56640625, "learning_rate": 7.150241723507433e-06, "loss": 1.0282, "step": 62115 }, { "epoch": 0.89, "grad_norm": 0.62890625, "learning_rate": 7.1409477294501645e-06, "loss": 1.0454, "step": 62120 }, { "epoch": 0.89, "grad_norm": 0.5078125, "learning_rate": 7.1316595559040465e-06, "loss": 0.9332, "step": 62125 }, { "epoch": 0.89, "grad_norm": 0.51171875, "learning_rate": 7.122377203451292e-06, "loss": 1.0585, "step": 62130 }, { "epoch": 0.89, "grad_norm": 0.59765625, "learning_rate": 7.113100672673701e-06, "loss": 0.9731, "step": 62135 }, { "epoch": 0.89, "grad_norm": 0.5703125, "learning_rate": 7.1038299641527416e-06, "loss": 0.869, "step": 62140 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.0945650784695396e-06, "loss": 1.0144, "step": 62145 }, { "epoch": 0.89, "grad_norm": 0.515625, "learning_rate": 7.085306016204796e-06, "loss": 0.9836, "step": 62150 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.076052777938891e-06, "loss": 0.9506, "step": 62155 }, { "epoch": 0.89, "grad_norm": 0.51953125, "learning_rate": 7.066805364251849e-06, "loss": 0.9612, "step": 62160 }, { "epoch": 0.89, "grad_norm": 0.52734375, "learning_rate": 7.057563775723286e-06, "loss": 0.8578, "step": 62165 }, { "epoch": 0.89, "grad_norm": 0.478515625, "learning_rate": 7.048328012932459e-06, "loss": 0.9426, "step": 62170 }, { "epoch": 0.89, "grad_norm": 0.546875, "learning_rate": 7.039098076458306e-06, "loss": 0.8757, "step": 62175 }, { "epoch": 0.89, "grad_norm": 0.47265625, "learning_rate": 7.029873966879352e-06, "loss": 0.9648, "step": 62180 }, { "epoch": 0.89, "grad_norm": 0.60546875, "learning_rate": 7.02065568477378e-06, "loss": 0.8497, "step": 62185 }, { "epoch": 0.89, "grad_norm": 0.58984375, "learning_rate": 7.011443230719428e-06, "loss": 0.8719, "step": 62190 }, { "epoch": 0.89, "grad_norm": 0.5546875, "learning_rate": 7.0022366052936885e-06, "loss": 1.085, "step": 62195 }, { "epoch": 0.89, "grad_norm": 0.58984375, "learning_rate": 6.993035809073678e-06, "loss": 1.1285, "step": 62200 }, { "epoch": 0.89, "grad_norm": 0.62890625, "learning_rate": 6.983840842636136e-06, "loss": 1.1114, "step": 62205 }, { "epoch": 0.89, "grad_norm": 0.61328125, "learning_rate": 6.9746517065573556e-06, "loss": 0.979, "step": 62210 }, { "epoch": 0.89, "grad_norm": 0.58984375, "learning_rate": 6.965468401413366e-06, "loss": 0.8401, "step": 62215 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 6.956290927779785e-06, "loss": 1.0541, "step": 62220 }, { "epoch": 0.89, "grad_norm": 0.6015625, "learning_rate": 6.947119286231851e-06, "loss": 1.0197, "step": 62225 }, { "epoch": 0.89, "grad_norm": 0.55078125, "learning_rate": 6.93795347734445e-06, "loss": 1.0151, "step": 62230 }, { "epoch": 0.89, "grad_norm": 0.58203125, "learning_rate": 6.92879350169211e-06, "loss": 1.0242, "step": 62235 }, { "epoch": 0.89, "grad_norm": 0.55859375, "learning_rate": 6.9196393598490175e-06, "loss": 0.9719, "step": 62240 }, { "epoch": 0.89, "grad_norm": 0.5703125, "learning_rate": 6.910491052388912e-06, "loss": 0.9262, "step": 62245 }, { "epoch": 0.89, "grad_norm": 0.62109375, "learning_rate": 6.901348579885258e-06, "loss": 1.1226, "step": 62250 }, { "epoch": 0.89, "grad_norm": 0.5859375, "learning_rate": 6.892211942911109e-06, "loss": 1.05, "step": 62255 }, { "epoch": 0.89, "grad_norm": 0.625, "learning_rate": 6.88308114203915e-06, "loss": 0.8664, "step": 62260 }, { "epoch": 0.89, "grad_norm": 0.609375, "learning_rate": 6.873956177841711e-06, "loss": 1.0119, "step": 62265 }, { "epoch": 0.89, "grad_norm": 0.546875, "learning_rate": 6.8648370508907825e-06, "loss": 1.0299, "step": 62270 }, { "epoch": 0.89, "grad_norm": 0.54296875, "learning_rate": 6.855723761757926e-06, "loss": 0.8667, "step": 62275 }, { "epoch": 0.89, "grad_norm": 0.58203125, "learning_rate": 6.846616311014386e-06, "loss": 1.0801, "step": 62280 }, { "epoch": 0.89, "grad_norm": 0.5703125, "learning_rate": 6.837514699231018e-06, "loss": 1.0818, "step": 62285 }, { "epoch": 0.89, "grad_norm": 0.51171875, "learning_rate": 6.828418926978353e-06, "loss": 0.9123, "step": 62290 }, { "epoch": 0.89, "grad_norm": 0.5859375, "learning_rate": 6.819328994826491e-06, "loss": 0.9992, "step": 62295 }, { "epoch": 0.89, "grad_norm": 0.5078125, "learning_rate": 6.810244903345209e-06, "loss": 1.0025, "step": 62300 }, { "epoch": 0.89, "grad_norm": 0.56640625, "learning_rate": 6.8011666531039185e-06, "loss": 1.0085, "step": 62305 }, { "epoch": 0.89, "grad_norm": 0.546875, "learning_rate": 6.7920942446716425e-06, "loss": 0.9108, "step": 62310 }, { "epoch": 0.89, "grad_norm": 0.490234375, "learning_rate": 6.78302767861706e-06, "loss": 0.8411, "step": 62315 }, { "epoch": 0.89, "grad_norm": 0.59375, "learning_rate": 6.77396695550846e-06, "loss": 0.8664, "step": 62320 }, { "epoch": 0.89, "grad_norm": 0.609375, "learning_rate": 6.764912075913799e-06, "loss": 0.8948, "step": 62325 }, { "epoch": 0.89, "grad_norm": 0.48828125, "learning_rate": 6.755863040400612e-06, "loss": 1.0311, "step": 62330 }, { "epoch": 0.89, "grad_norm": 0.58984375, "learning_rate": 6.7468198495361564e-06, "loss": 0.9616, "step": 62335 }, { "epoch": 0.89, "grad_norm": 0.5078125, "learning_rate": 6.7377825038872135e-06, "loss": 0.9264, "step": 62340 }, { "epoch": 0.89, "grad_norm": 0.55078125, "learning_rate": 6.728751004020284e-06, "loss": 0.8163, "step": 62345 }, { "epoch": 0.89, "grad_norm": 0.5859375, "learning_rate": 6.7197253505014825e-06, "loss": 0.8381, "step": 62350 }, { "epoch": 0.89, "grad_norm": 0.5625, "learning_rate": 6.710705543896512e-06, "loss": 1.0656, "step": 62355 }, { "epoch": 0.89, "grad_norm": 0.5078125, "learning_rate": 6.701691584770775e-06, "loss": 0.8805, "step": 62360 }, { "epoch": 0.89, "grad_norm": 0.6640625, "learning_rate": 6.692683473689276e-06, "loss": 1.0133, "step": 62365 }, { "epoch": 0.89, "grad_norm": 0.5390625, "learning_rate": 6.68368121121663e-06, "loss": 0.8248, "step": 62370 }, { "epoch": 0.89, "grad_norm": 0.6328125, "learning_rate": 6.6746847979171525e-06, "loss": 0.8632, "step": 62375 }, { "epoch": 0.89, "grad_norm": 0.51171875, "learning_rate": 6.665694234354691e-06, "loss": 0.8974, "step": 62380 }, { "epoch": 0.89, "grad_norm": 0.451171875, "learning_rate": 6.65670952109283e-06, "loss": 0.8646, "step": 62385 }, { "epoch": 0.89, "grad_norm": 0.55078125, "learning_rate": 6.647730658694718e-06, "loss": 0.8876, "step": 62390 }, { "epoch": 0.9, "grad_norm": 0.61328125, "learning_rate": 6.6387576477231595e-06, "loss": 1.0241, "step": 62395 }, { "epoch": 0.9, "grad_norm": 0.6015625, "learning_rate": 6.629790488740617e-06, "loss": 0.9139, "step": 62400 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 6.620829182309129e-06, "loss": 1.0862, "step": 62405 }, { "epoch": 0.9, "grad_norm": 0.4921875, "learning_rate": 6.611873728990425e-06, "loss": 1.0256, "step": 62410 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 6.602924129345855e-06, "loss": 1.1855, "step": 62415 }, { "epoch": 0.9, "grad_norm": 0.58203125, "learning_rate": 6.593980383936349e-06, "loss": 0.8099, "step": 62420 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 6.585042493322535e-06, "loss": 1.1324, "step": 62425 }, { "epoch": 0.9, "grad_norm": 0.4765625, "learning_rate": 6.576110458064677e-06, "loss": 0.9345, "step": 62430 }, { "epoch": 0.9, "grad_norm": 0.58203125, "learning_rate": 6.567184278722582e-06, "loss": 1.0149, "step": 62435 }, { "epoch": 0.9, "grad_norm": 0.59375, "learning_rate": 6.558263955855792e-06, "loss": 1.0576, "step": 62440 }, { "epoch": 0.9, "grad_norm": 0.71875, "learning_rate": 6.549349490023448e-06, "loss": 1.0319, "step": 62445 }, { "epoch": 0.9, "grad_norm": 0.640625, "learning_rate": 6.540440881784305e-06, "loss": 0.9467, "step": 62450 }, { "epoch": 0.9, "grad_norm": 0.484375, "learning_rate": 6.531538131696757e-06, "loss": 0.8775, "step": 62455 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 6.522641240318872e-06, "loss": 0.9048, "step": 62460 }, { "epoch": 0.9, "grad_norm": 0.5703125, "learning_rate": 6.51375020820828e-06, "loss": 0.8971, "step": 62465 }, { "epoch": 0.9, "grad_norm": 0.5703125, "learning_rate": 6.5048650359223025e-06, "loss": 0.8988, "step": 62470 }, { "epoch": 0.9, "grad_norm": 0.5859375, "learning_rate": 6.495985724017872e-06, "loss": 0.9041, "step": 62475 }, { "epoch": 0.9, "grad_norm": 0.58203125, "learning_rate": 6.487112273051555e-06, "loss": 0.8345, "step": 62480 }, { "epoch": 0.9, "grad_norm": 0.62109375, "learning_rate": 6.478244683579526e-06, "loss": 1.0131, "step": 62485 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 6.469382956157633e-06, "loss": 0.9545, "step": 62490 }, { "epoch": 0.9, "grad_norm": 0.5078125, "learning_rate": 6.46052709134135e-06, "loss": 0.8159, "step": 62495 }, { "epoch": 0.9, "grad_norm": 0.63671875, "learning_rate": 6.451677089685759e-06, "loss": 0.872, "step": 62500 }, { "epoch": 0.9, "grad_norm": 0.6484375, "learning_rate": 6.442832951745581e-06, "loss": 0.9304, "step": 62505 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 6.433994678075195e-06, "loss": 0.9571, "step": 62510 }, { "epoch": 0.9, "grad_norm": 0.58984375, "learning_rate": 6.42516226922858e-06, "loss": 0.9523, "step": 62515 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 6.416335725759359e-06, "loss": 0.9097, "step": 62520 }, { "epoch": 0.9, "grad_norm": 0.59375, "learning_rate": 6.4075150482208245e-06, "loss": 1.0431, "step": 62525 }, { "epoch": 0.9, "grad_norm": 0.5546875, "learning_rate": 6.398700237165811e-06, "loss": 0.9239, "step": 62530 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 6.389891293146899e-06, "loss": 0.942, "step": 62535 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 6.381088216716202e-06, "loss": 0.9218, "step": 62540 }, { "epoch": 0.9, "grad_norm": 0.51953125, "learning_rate": 6.3722910084255014e-06, "loss": 0.8891, "step": 62545 }, { "epoch": 0.9, "grad_norm": 0.6015625, "learning_rate": 6.363499668826245e-06, "loss": 0.9076, "step": 62550 }, { "epoch": 0.9, "grad_norm": 0.6015625, "learning_rate": 6.35471419846948e-06, "loss": 1.1142, "step": 62555 }, { "epoch": 0.9, "grad_norm": 0.56640625, "learning_rate": 6.345934597905867e-06, "loss": 0.9014, "step": 62560 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 6.337160867685743e-06, "loss": 0.9365, "step": 62565 }, { "epoch": 0.9, "grad_norm": 0.5859375, "learning_rate": 6.328393008359057e-06, "loss": 0.9801, "step": 62570 }, { "epoch": 0.9, "grad_norm": 0.52734375, "learning_rate": 6.319631020475369e-06, "loss": 0.9142, "step": 62575 }, { "epoch": 0.9, "grad_norm": 0.55859375, "learning_rate": 6.310874904583897e-06, "loss": 0.9259, "step": 62580 }, { "epoch": 0.9, "grad_norm": 0.65234375, "learning_rate": 6.302124661233511e-06, "loss": 1.1145, "step": 62585 }, { "epoch": 0.9, "grad_norm": 0.55859375, "learning_rate": 6.2933802909726615e-06, "loss": 0.938, "step": 62590 }, { "epoch": 0.9, "grad_norm": 0.609375, "learning_rate": 6.284641794349433e-06, "loss": 0.8037, "step": 62595 }, { "epoch": 0.9, "grad_norm": 0.65625, "learning_rate": 6.275909171911609e-06, "loss": 0.9177, "step": 62600 }, { "epoch": 0.9, "grad_norm": 0.58984375, "learning_rate": 6.267182424206541e-06, "loss": 0.9361, "step": 62605 }, { "epoch": 0.9, "grad_norm": 0.57421875, "learning_rate": 6.258461551781225e-06, "loss": 0.9151, "step": 62610 }, { "epoch": 0.9, "grad_norm": 0.58984375, "learning_rate": 6.24974655518229e-06, "loss": 1.0927, "step": 62615 }, { "epoch": 0.9, "grad_norm": 0.6171875, "learning_rate": 6.241037434956043e-06, "loss": 1.11, "step": 62620 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 6.2323341916483254e-06, "loss": 0.8704, "step": 62625 }, { "epoch": 0.9, "grad_norm": 0.55859375, "learning_rate": 6.223636825804702e-06, "loss": 0.7773, "step": 62630 }, { "epoch": 0.9, "grad_norm": 0.55859375, "learning_rate": 6.214945337970335e-06, "loss": 0.929, "step": 62635 }, { "epoch": 0.9, "grad_norm": 0.4921875, "learning_rate": 6.2062597286900005e-06, "loss": 0.8479, "step": 62640 }, { "epoch": 0.9, "grad_norm": 0.48828125, "learning_rate": 6.197579998508118e-06, "loss": 0.9114, "step": 62645 }, { "epoch": 0.9, "grad_norm": 0.56640625, "learning_rate": 6.188906147968776e-06, "loss": 0.8494, "step": 62650 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 6.180238177615616e-06, "loss": 0.8771, "step": 62655 }, { "epoch": 0.9, "grad_norm": 0.56640625, "learning_rate": 6.171576087991981e-06, "loss": 0.9365, "step": 62660 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 6.1629198796408276e-06, "loss": 1.0453, "step": 62665 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 6.154269553104719e-06, "loss": 0.9323, "step": 62670 }, { "epoch": 0.9, "grad_norm": 0.60546875, "learning_rate": 6.145625108925879e-06, "loss": 0.8692, "step": 62675 }, { "epoch": 0.9, "grad_norm": 0.609375, "learning_rate": 6.136986547646151e-06, "loss": 1.0395, "step": 62680 }, { "epoch": 0.9, "grad_norm": 0.58203125, "learning_rate": 6.128353869807002e-06, "loss": 0.8201, "step": 62685 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 6.119727075949555e-06, "loss": 0.9716, "step": 62690 }, { "epoch": 0.9, "grad_norm": 0.6015625, "learning_rate": 6.111106166614522e-06, "loss": 0.8377, "step": 62695 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 6.102491142342304e-06, "loss": 0.9958, "step": 62700 }, { "epoch": 0.9, "grad_norm": 0.57421875, "learning_rate": 6.093882003672868e-06, "loss": 1.0683, "step": 62705 }, { "epoch": 0.9, "grad_norm": 0.51171875, "learning_rate": 6.085278751145851e-06, "loss": 1.0908, "step": 62710 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 6.076681385300531e-06, "loss": 0.9115, "step": 62715 }, { "epoch": 0.9, "grad_norm": 0.515625, "learning_rate": 6.068089906675789e-06, "loss": 1.0243, "step": 62720 }, { "epoch": 0.9, "grad_norm": 0.52734375, "learning_rate": 6.05950431581015e-06, "loss": 0.9099, "step": 62725 }, { "epoch": 0.9, "grad_norm": 0.609375, "learning_rate": 6.0509246132417705e-06, "loss": 1.0154, "step": 62730 }, { "epoch": 0.9, "grad_norm": 0.52734375, "learning_rate": 6.042350799508434e-06, "loss": 0.9946, "step": 62735 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 6.0337828751475535e-06, "loss": 0.9308, "step": 62740 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 6.025220840696211e-06, "loss": 0.9511, "step": 62745 }, { "epoch": 0.9, "grad_norm": 0.48828125, "learning_rate": 6.0166646966910325e-06, "loss": 0.8393, "step": 62750 }, { "epoch": 0.9, "grad_norm": 0.57421875, "learning_rate": 6.008114443668334e-06, "loss": 0.873, "step": 62755 }, { "epoch": 0.9, "grad_norm": 0.5546875, "learning_rate": 5.999570082164096e-06, "loss": 0.9249, "step": 62760 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 5.991031612713849e-06, "loss": 1.0535, "step": 62765 }, { "epoch": 0.9, "grad_norm": 0.47265625, "learning_rate": 5.982499035852795e-06, "loss": 1.0427, "step": 62770 }, { "epoch": 0.9, "grad_norm": 0.466796875, "learning_rate": 5.9739723521158084e-06, "loss": 0.9109, "step": 62775 }, { "epoch": 0.9, "grad_norm": 0.703125, "learning_rate": 5.965451562037294e-06, "loss": 0.916, "step": 62780 }, { "epoch": 0.9, "grad_norm": 0.56640625, "learning_rate": 5.95693666615138e-06, "loss": 0.944, "step": 62785 }, { "epoch": 0.9, "grad_norm": 0.55859375, "learning_rate": 5.948427664991796e-06, "loss": 0.9449, "step": 62790 }, { "epoch": 0.9, "grad_norm": 0.58203125, "learning_rate": 5.9399245590918805e-06, "loss": 0.8104, "step": 62795 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 5.931427348984608e-06, "loss": 1.0679, "step": 62800 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 5.922936035202598e-06, "loss": 0.8095, "step": 62805 }, { "epoch": 0.9, "grad_norm": 0.5703125, "learning_rate": 5.9144506182781225e-06, "loss": 0.953, "step": 62810 }, { "epoch": 0.9, "grad_norm": 0.59375, "learning_rate": 5.905971098743013e-06, "loss": 0.9414, "step": 62815 }, { "epoch": 0.9, "grad_norm": 0.53125, "learning_rate": 5.897497477128811e-06, "loss": 0.8741, "step": 62820 }, { "epoch": 0.9, "grad_norm": 0.53125, "learning_rate": 5.889029753966646e-06, "loss": 0.9239, "step": 62825 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 5.880567929787273e-06, "loss": 0.8671, "step": 62830 }, { "epoch": 0.9, "grad_norm": 0.52734375, "learning_rate": 5.872112005121089e-06, "loss": 0.9372, "step": 62835 }, { "epoch": 0.9, "grad_norm": 0.55859375, "learning_rate": 5.863661980498137e-06, "loss": 0.887, "step": 62840 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 5.855217856448058e-06, "loss": 1.017, "step": 62845 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 5.846779633500155e-06, "loss": 0.8264, "step": 62850 }, { "epoch": 0.9, "grad_norm": 0.51171875, "learning_rate": 5.8383473121833455e-06, "loss": 0.9504, "step": 62855 }, { "epoch": 0.9, "grad_norm": 0.53515625, "learning_rate": 5.829920893026142e-06, "loss": 1.0121, "step": 62860 }, { "epoch": 0.9, "grad_norm": 0.625, "learning_rate": 5.8215003765567545e-06, "loss": 0.9497, "step": 62865 }, { "epoch": 0.9, "grad_norm": 0.50390625, "learning_rate": 5.813085763302994e-06, "loss": 0.8942, "step": 62870 }, { "epoch": 0.9, "grad_norm": 0.45703125, "learning_rate": 5.804677053792284e-06, "loss": 0.9689, "step": 62875 }, { "epoch": 0.9, "grad_norm": 0.56640625, "learning_rate": 5.796274248551681e-06, "loss": 0.9884, "step": 62880 }, { "epoch": 0.9, "grad_norm": 0.59375, "learning_rate": 5.787877348107918e-06, "loss": 0.9302, "step": 62885 }, { "epoch": 0.9, "grad_norm": 0.56640625, "learning_rate": 5.779486352987285e-06, "loss": 1.1086, "step": 62890 }, { "epoch": 0.9, "grad_norm": 0.5703125, "learning_rate": 5.771101263715761e-06, "loss": 0.9932, "step": 62895 }, { "epoch": 0.9, "grad_norm": 0.62890625, "learning_rate": 5.762722080818939e-06, "loss": 1.0333, "step": 62900 }, { "epoch": 0.9, "grad_norm": 0.625, "learning_rate": 5.754348804822018e-06, "loss": 0.9, "step": 62905 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 5.745981436249847e-06, "loss": 0.8931, "step": 62910 }, { "epoch": 0.9, "grad_norm": 0.51953125, "learning_rate": 5.737619975626907e-06, "loss": 0.8853, "step": 62915 }, { "epoch": 0.9, "grad_norm": 0.76953125, "learning_rate": 5.7292644234773096e-06, "loss": 0.9668, "step": 62920 }, { "epoch": 0.9, "grad_norm": 0.5546875, "learning_rate": 5.720914780324771e-06, "loss": 0.9179, "step": 62925 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 5.712571046692661e-06, "loss": 0.9579, "step": 62930 }, { "epoch": 0.9, "grad_norm": 0.671875, "learning_rate": 5.704233223104005e-06, "loss": 1.0306, "step": 62935 }, { "epoch": 0.9, "grad_norm": 0.53125, "learning_rate": 5.695901310081386e-06, "loss": 0.9032, "step": 62940 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 5.687575308147086e-06, "loss": 0.9209, "step": 62945 }, { "epoch": 0.9, "grad_norm": 0.55859375, "learning_rate": 5.679255217822987e-06, "loss": 0.9953, "step": 62950 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 5.670941039630595e-06, "loss": 0.9111, "step": 62955 }, { "epoch": 0.9, "grad_norm": 0.609375, "learning_rate": 5.662632774091026e-06, "loss": 0.9192, "step": 62960 }, { "epoch": 0.9, "grad_norm": 0.59375, "learning_rate": 5.654330421725085e-06, "loss": 0.8808, "step": 62965 }, { "epoch": 0.9, "grad_norm": 0.59375, "learning_rate": 5.646033983053178e-06, "loss": 1.0373, "step": 62970 }, { "epoch": 0.9, "grad_norm": 0.515625, "learning_rate": 5.637743458595302e-06, "loss": 0.9083, "step": 62975 }, { "epoch": 0.9, "grad_norm": 0.6015625, "learning_rate": 5.6294588488711385e-06, "loss": 0.9685, "step": 62980 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 5.621180154399996e-06, "loss": 0.8314, "step": 62985 }, { "epoch": 0.9, "grad_norm": 0.58984375, "learning_rate": 5.612907375700749e-06, "loss": 0.9279, "step": 62990 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 5.60464051329197e-06, "loss": 0.7895, "step": 62995 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 5.596379567691834e-06, "loss": 1.0582, "step": 63000 }, { "epoch": 0.9, "grad_norm": 0.6015625, "learning_rate": 5.588124539418127e-06, "loss": 1.0279, "step": 63005 }, { "epoch": 0.9, "grad_norm": 0.4921875, "learning_rate": 5.579875428988324e-06, "loss": 0.9399, "step": 63010 }, { "epoch": 0.9, "grad_norm": 0.6796875, "learning_rate": 5.571632236919466e-06, "loss": 1.0019, "step": 63015 }, { "epoch": 0.9, "grad_norm": 0.625, "learning_rate": 5.563394963728219e-06, "loss": 1.0214, "step": 63020 }, { "epoch": 0.9, "grad_norm": 0.60546875, "learning_rate": 5.555163609930947e-06, "loss": 0.9731, "step": 63025 }, { "epoch": 0.9, "grad_norm": 0.5703125, "learning_rate": 5.546938176043581e-06, "loss": 0.9859, "step": 63030 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 5.538718662581699e-06, "loss": 0.9335, "step": 63035 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 5.53050507006051e-06, "loss": 0.9076, "step": 63040 }, { "epoch": 0.9, "grad_norm": 0.57421875, "learning_rate": 5.522297398994869e-06, "loss": 0.9375, "step": 63045 }, { "epoch": 0.9, "grad_norm": 0.5703125, "learning_rate": 5.51409564989922e-06, "loss": 0.9551, "step": 63050 }, { "epoch": 0.9, "grad_norm": 0.68359375, "learning_rate": 5.505899823287663e-06, "loss": 1.0059, "step": 63055 }, { "epoch": 0.9, "grad_norm": 0.703125, "learning_rate": 5.4977099196739324e-06, "loss": 1.0367, "step": 63060 }, { "epoch": 0.9, "grad_norm": 0.515625, "learning_rate": 5.489525939571383e-06, "loss": 0.9067, "step": 63065 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 5.48134788349296e-06, "loss": 0.9347, "step": 63070 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 5.47317575195131e-06, "loss": 0.866, "step": 63075 }, { "epoch": 0.9, "grad_norm": 0.498046875, "learning_rate": 5.465009545458666e-06, "loss": 0.8887, "step": 63080 }, { "epoch": 0.9, "grad_norm": 0.5546875, "learning_rate": 5.456849264526887e-06, "loss": 0.9003, "step": 63085 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 5.448694909667462e-06, "loss": 1.0918, "step": 63090 }, { "epoch": 0.91, "grad_norm": 0.66796875, "learning_rate": 5.4405464813915395e-06, "loss": 0.9515, "step": 63095 }, { "epoch": 0.91, "grad_norm": 0.478515625, "learning_rate": 5.4324039802098544e-06, "loss": 1.002, "step": 63100 }, { "epoch": 0.91, "grad_norm": 0.52734375, "learning_rate": 5.424267406632777e-06, "loss": 0.9115, "step": 63105 }, { "epoch": 0.91, "grad_norm": 0.5234375, "learning_rate": 5.416136761170354e-06, "loss": 0.9534, "step": 63110 }, { "epoch": 0.91, "grad_norm": 0.56640625, "learning_rate": 5.4080120443322e-06, "loss": 0.7717, "step": 63115 }, { "epoch": 0.91, "grad_norm": 0.55859375, "learning_rate": 5.399893256627564e-06, "loss": 1.0774, "step": 63120 }, { "epoch": 0.91, "grad_norm": 0.48828125, "learning_rate": 5.391780398565383e-06, "loss": 0.8528, "step": 63125 }, { "epoch": 0.91, "grad_norm": 0.51953125, "learning_rate": 5.3836734706541385e-06, "loss": 1.0441, "step": 63130 }, { "epoch": 0.91, "grad_norm": 0.6171875, "learning_rate": 5.375572473401991e-06, "loss": 1.0177, "step": 63135 }, { "epoch": 0.91, "grad_norm": 0.52734375, "learning_rate": 5.367477407316745e-06, "loss": 0.927, "step": 63140 }, { "epoch": 0.91, "grad_norm": 0.53125, "learning_rate": 5.359388272905785e-06, "loss": 0.9415, "step": 63145 }, { "epoch": 0.91, "grad_norm": 0.53515625, "learning_rate": 5.35130507067616e-06, "loss": 1.005, "step": 63150 }, { "epoch": 0.91, "grad_norm": 0.65625, "learning_rate": 5.343227801134532e-06, "loss": 0.8438, "step": 63155 }, { "epoch": 0.91, "grad_norm": 0.55078125, "learning_rate": 5.335156464787183e-06, "loss": 0.8493, "step": 63160 }, { "epoch": 0.91, "grad_norm": 0.51171875, "learning_rate": 5.3270910621400435e-06, "loss": 1.1147, "step": 63165 }, { "epoch": 0.91, "grad_norm": 0.6171875, "learning_rate": 5.319031593698653e-06, "loss": 0.9504, "step": 63170 }, { "epoch": 0.91, "grad_norm": 0.5546875, "learning_rate": 5.310978059968219e-06, "loss": 0.9684, "step": 63175 }, { "epoch": 0.91, "grad_norm": 0.55859375, "learning_rate": 5.302930461453492e-06, "loss": 0.8936, "step": 63180 }, { "epoch": 0.91, "grad_norm": 0.52734375, "learning_rate": 5.294888798658948e-06, "loss": 0.9478, "step": 63185 }, { "epoch": 0.91, "grad_norm": 0.5234375, "learning_rate": 5.286853072088638e-06, "loss": 1.0088, "step": 63190 }, { "epoch": 0.91, "grad_norm": 0.5078125, "learning_rate": 5.278823282246237e-06, "loss": 0.9158, "step": 63195 }, { "epoch": 0.91, "grad_norm": 0.55859375, "learning_rate": 5.270799429635065e-06, "loss": 0.904, "step": 63200 }, { "epoch": 0.91, "grad_norm": 0.56640625, "learning_rate": 5.262781514758097e-06, "loss": 1.006, "step": 63205 }, { "epoch": 0.91, "grad_norm": 0.61328125, "learning_rate": 5.254769538117854e-06, "loss": 0.9891, "step": 63210 }, { "epoch": 0.91, "grad_norm": 0.5703125, "learning_rate": 5.246763500216578e-06, "loss": 0.9617, "step": 63215 }, { "epoch": 0.91, "grad_norm": 0.5859375, "learning_rate": 5.23876340155609e-06, "loss": 0.9842, "step": 63220 }, { "epoch": 0.91, "grad_norm": 0.515625, "learning_rate": 5.2307692426378226e-06, "loss": 0.948, "step": 63225 }, { "epoch": 0.91, "grad_norm": 0.5625, "learning_rate": 5.2227810239628635e-06, "loss": 0.9327, "step": 63230 }, { "epoch": 0.91, "grad_norm": 0.55859375, "learning_rate": 5.214798746031957e-06, "loss": 0.9766, "step": 63235 }, { "epoch": 0.91, "grad_norm": 0.56640625, "learning_rate": 5.206822409345391e-06, "loss": 0.9396, "step": 63240 }, { "epoch": 0.91, "grad_norm": 0.5546875, "learning_rate": 5.198852014403166e-06, "loss": 1.098, "step": 63245 }, { "epoch": 0.91, "grad_norm": 0.65234375, "learning_rate": 5.190887561704871e-06, "loss": 0.8943, "step": 63250 }, { "epoch": 0.91, "grad_norm": 0.59765625, "learning_rate": 5.182929051749708e-06, "loss": 0.9164, "step": 63255 }, { "epoch": 0.91, "grad_norm": 0.546875, "learning_rate": 5.174976485036542e-06, "loss": 0.9736, "step": 63260 }, { "epoch": 0.91, "grad_norm": 0.6015625, "learning_rate": 5.167029862063865e-06, "loss": 1.0046, "step": 63265 }, { "epoch": 0.91, "grad_norm": 0.6015625, "learning_rate": 5.159089183329757e-06, "loss": 0.8295, "step": 63270 }, { "epoch": 0.91, "grad_norm": 0.59765625, "learning_rate": 5.1511544493319515e-06, "loss": 0.925, "step": 63275 }, { "epoch": 0.91, "grad_norm": 0.58984375, "learning_rate": 5.143225660567796e-06, "loss": 1.0085, "step": 63280 }, { "epoch": 0.91, "grad_norm": 0.59765625, "learning_rate": 5.135302817534304e-06, "loss": 1.0287, "step": 63285 }, { "epoch": 0.91, "grad_norm": 0.5625, "learning_rate": 5.127385920728067e-06, "loss": 0.9187, "step": 63290 }, { "epoch": 0.91, "grad_norm": 0.53125, "learning_rate": 5.119474970645322e-06, "loss": 0.8212, "step": 63295 }, { "epoch": 0.91, "grad_norm": 0.55859375, "learning_rate": 5.111569967781959e-06, "loss": 1.0078, "step": 63300 }, { "epoch": 0.91, "grad_norm": 0.51953125, "learning_rate": 5.103670912633451e-06, "loss": 0.9496, "step": 63305 }, { "epoch": 0.91, "grad_norm": 0.482421875, "learning_rate": 5.095777805694935e-06, "loss": 0.9652, "step": 63310 }, { "epoch": 0.91, "grad_norm": 0.5, "learning_rate": 5.0878906474611574e-06, "loss": 0.922, "step": 63315 }, { "epoch": 0.91, "grad_norm": 0.671875, "learning_rate": 5.0800094384264694e-06, "loss": 1.0818, "step": 63320 }, { "epoch": 0.91, "grad_norm": 0.5625, "learning_rate": 5.07213417908492e-06, "loss": 0.8417, "step": 63325 }, { "epoch": 0.91, "grad_norm": 0.625, "learning_rate": 5.064264869930113e-06, "loss": 0.9057, "step": 63330 }, { "epoch": 0.91, "grad_norm": 0.671875, "learning_rate": 5.056401511455288e-06, "loss": 0.9798, "step": 63335 }, { "epoch": 0.91, "grad_norm": 0.59375, "learning_rate": 5.048544104153352e-06, "loss": 0.982, "step": 63340 }, { "epoch": 0.91, "grad_norm": 0.55859375, "learning_rate": 5.040692648516821e-06, "loss": 0.927, "step": 63345 }, { "epoch": 0.91, "grad_norm": 0.5546875, "learning_rate": 5.032847145037811e-06, "loss": 1.0344, "step": 63350 }, { "epoch": 0.91, "grad_norm": 0.5234375, "learning_rate": 5.025007594208109e-06, "loss": 0.8876, "step": 63355 }, { "epoch": 0.91, "grad_norm": 0.609375, "learning_rate": 5.0171739965191085e-06, "loss": 0.9278, "step": 63360 }, { "epoch": 0.91, "grad_norm": 0.59765625, "learning_rate": 5.0093463524617965e-06, "loss": 0.8402, "step": 63365 }, { "epoch": 0.91, "grad_norm": 0.546875, "learning_rate": 5.001524662526846e-06, "loss": 0.9543, "step": 63370 }, { "epoch": 0.91, "grad_norm": 0.58984375, "learning_rate": 4.993708927204543e-06, "loss": 1.1113, "step": 63375 }, { "epoch": 0.91, "grad_norm": 0.51171875, "learning_rate": 4.985899146984762e-06, "loss": 0.9639, "step": 63380 }, { "epoch": 0.91, "grad_norm": 0.61328125, "learning_rate": 4.978095322357024e-06, "loss": 1.0694, "step": 63385 }, { "epoch": 0.91, "grad_norm": 0.50390625, "learning_rate": 4.97029745381048e-06, "loss": 0.732, "step": 63390 }, { "epoch": 0.91, "grad_norm": 0.59375, "learning_rate": 4.9625055418339505e-06, "loss": 1.0459, "step": 63395 }, { "epoch": 0.91, "grad_norm": 0.57421875, "learning_rate": 4.954719586915791e-06, "loss": 0.9727, "step": 63400 }, { "epoch": 0.91, "grad_norm": 0.447265625, "learning_rate": 4.946939589544053e-06, "loss": 0.9419, "step": 63405 }, { "epoch": 0.91, "grad_norm": 0.6171875, "learning_rate": 4.939165550206415e-06, "loss": 0.7805, "step": 63410 }, { "epoch": 0.91, "grad_norm": 0.59765625, "learning_rate": 4.931397469390131e-06, "loss": 0.9549, "step": 63415 }, { "epoch": 0.91, "grad_norm": 0.5546875, "learning_rate": 4.923635347582134e-06, "loss": 1.0289, "step": 63420 }, { "epoch": 0.91, "grad_norm": 0.5546875, "learning_rate": 4.915879185268968e-06, "loss": 0.8387, "step": 63425 }, { "epoch": 0.91, "grad_norm": 0.53515625, "learning_rate": 4.908128982936777e-06, "loss": 0.9095, "step": 63430 }, { "epoch": 0.91, "grad_norm": 0.5703125, "learning_rate": 4.900384741071362e-06, "loss": 0.988, "step": 63435 }, { "epoch": 0.91, "grad_norm": 0.5390625, "learning_rate": 4.892646460158146e-06, "loss": 0.9183, "step": 63440 }, { "epoch": 0.91, "grad_norm": 0.6015625, "learning_rate": 4.884914140682151e-06, "loss": 0.9776, "step": 63445 }, { "epoch": 0.91, "grad_norm": 0.68359375, "learning_rate": 4.877187783128068e-06, "loss": 0.9776, "step": 63450 }, { "epoch": 0.91, "grad_norm": 0.63671875, "learning_rate": 4.869467387980209e-06, "loss": 1.0015, "step": 63455 }, { "epoch": 0.91, "grad_norm": 0.4765625, "learning_rate": 4.861752955722454e-06, "loss": 0.7547, "step": 63460 }, { "epoch": 0.91, "grad_norm": 0.51171875, "learning_rate": 4.8540444868383935e-06, "loss": 1.2453, "step": 63465 }, { "epoch": 0.91, "grad_norm": 0.54296875, "learning_rate": 4.846341981811187e-06, "loss": 0.8749, "step": 63470 }, { "epoch": 0.91, "grad_norm": 0.67578125, "learning_rate": 4.838645441123623e-06, "loss": 1.0333, "step": 63475 }, { "epoch": 0.91, "grad_norm": 0.75390625, "learning_rate": 4.830954865258164e-06, "loss": 0.931, "step": 63480 }, { "epoch": 0.91, "grad_norm": 0.515625, "learning_rate": 4.823270254696821e-06, "loss": 0.9601, "step": 63485 }, { "epoch": 0.91, "grad_norm": 0.494140625, "learning_rate": 4.815591609921322e-06, "loss": 0.8998, "step": 63490 }, { "epoch": 0.91, "grad_norm": 0.69921875, "learning_rate": 4.807918931412914e-06, "loss": 1.0241, "step": 63495 }, { "epoch": 0.91, "grad_norm": 0.640625, "learning_rate": 4.800252219652579e-06, "loss": 0.9631, "step": 63500 }, { "epoch": 0.91, "grad_norm": 0.59765625, "learning_rate": 4.792591475120867e-06, "loss": 1.09, "step": 63505 }, { "epoch": 0.91, "grad_norm": 0.55078125, "learning_rate": 4.784936698297937e-06, "loss": 0.9044, "step": 63510 }, { "epoch": 0.91, "grad_norm": 0.51171875, "learning_rate": 4.777287889663618e-06, "loss": 0.9348, "step": 63515 }, { "epoch": 0.91, "grad_norm": 0.53125, "learning_rate": 4.7696450496973464e-06, "loss": 0.9964, "step": 63520 }, { "epoch": 0.91, "grad_norm": 0.5859375, "learning_rate": 4.762008178878185e-06, "loss": 0.8946, "step": 63525 }, { "epoch": 0.91, "grad_norm": 0.61328125, "learning_rate": 4.754377277684807e-06, "loss": 1.025, "step": 63530 }, { "epoch": 0.91, "grad_norm": 0.51953125, "learning_rate": 4.746752346595562e-06, "loss": 0.9254, "step": 63535 }, { "epoch": 0.91, "grad_norm": 0.54296875, "learning_rate": 4.739133386088345e-06, "loss": 1.1481, "step": 63540 }, { "epoch": 0.91, "grad_norm": 0.609375, "learning_rate": 4.7315203966407425e-06, "loss": 1.0514, "step": 63545 }, { "epoch": 0.91, "grad_norm": 0.61328125, "learning_rate": 4.723913378729949e-06, "loss": 1.0534, "step": 63550 }, { "epoch": 0.91, "grad_norm": 0.5234375, "learning_rate": 4.716312332832762e-06, "loss": 0.9579, "step": 63555 }, { "epoch": 0.91, "grad_norm": 0.578125, "learning_rate": 4.708717259425644e-06, "loss": 0.9386, "step": 63560 }, { "epoch": 0.91, "grad_norm": 0.470703125, "learning_rate": 4.701128158984658e-06, "loss": 0.8514, "step": 63565 }, { "epoch": 0.91, "grad_norm": 0.490234375, "learning_rate": 4.693545031985491e-06, "loss": 0.9786, "step": 63570 }, { "epoch": 0.91, "grad_norm": 0.5234375, "learning_rate": 4.685967878903463e-06, "loss": 0.8708, "step": 63575 }, { "epoch": 0.91, "grad_norm": 0.447265625, "learning_rate": 4.678396700213539e-06, "loss": 0.9327, "step": 63580 }, { "epoch": 0.91, "grad_norm": 0.56640625, "learning_rate": 4.670831496390271e-06, "loss": 0.9573, "step": 63585 }, { "epoch": 0.91, "grad_norm": 0.546875, "learning_rate": 4.6632722679078366e-06, "loss": 0.9043, "step": 63590 }, { "epoch": 0.91, "grad_norm": 0.55859375, "learning_rate": 4.65571901524009e-06, "loss": 0.9097, "step": 63595 }, { "epoch": 0.91, "grad_norm": 0.57421875, "learning_rate": 4.648171738860463e-06, "loss": 1.0028, "step": 63600 }, { "epoch": 0.91, "grad_norm": 0.54296875, "learning_rate": 4.640630439242022e-06, "loss": 0.8418, "step": 63605 }, { "epoch": 0.91, "grad_norm": 0.57421875, "learning_rate": 4.633095116857467e-06, "loss": 0.9803, "step": 63610 }, { "epoch": 0.91, "grad_norm": 0.515625, "learning_rate": 4.62556577217913e-06, "loss": 0.859, "step": 63615 }, { "epoch": 0.91, "grad_norm": 0.482421875, "learning_rate": 4.618042405678946e-06, "loss": 0.9136, "step": 63620 }, { "epoch": 0.91, "grad_norm": 0.640625, "learning_rate": 4.610525017828504e-06, "loss": 0.9965, "step": 63625 }, { "epoch": 0.91, "grad_norm": 0.5390625, "learning_rate": 4.603013609098994e-06, "loss": 0.8447, "step": 63630 }, { "epoch": 0.91, "grad_norm": 0.62890625, "learning_rate": 4.595508179961228e-06, "loss": 1.0054, "step": 63635 }, { "epoch": 0.91, "grad_norm": 0.59375, "learning_rate": 4.588008730885685e-06, "loss": 0.9298, "step": 63640 }, { "epoch": 0.91, "grad_norm": 0.6640625, "learning_rate": 4.5805152623424e-06, "loss": 0.9792, "step": 63645 }, { "epoch": 0.91, "grad_norm": 0.578125, "learning_rate": 4.573027774801109e-06, "loss": 0.8879, "step": 63650 }, { "epoch": 0.91, "grad_norm": 0.55078125, "learning_rate": 4.565546268731102e-06, "loss": 0.9782, "step": 63655 }, { "epoch": 0.91, "grad_norm": 0.69921875, "learning_rate": 4.55807074460135e-06, "loss": 0.885, "step": 63660 }, { "epoch": 0.91, "grad_norm": 0.54296875, "learning_rate": 4.550601202880433e-06, "loss": 0.867, "step": 63665 }, { "epoch": 0.91, "grad_norm": 0.59375, "learning_rate": 4.543137644036533e-06, "loss": 1.1561, "step": 63670 }, { "epoch": 0.91, "grad_norm": 0.546875, "learning_rate": 4.5356800685374955e-06, "loss": 0.9632, "step": 63675 }, { "epoch": 0.91, "grad_norm": 0.58203125, "learning_rate": 4.5282284768507375e-06, "loss": 0.9658, "step": 63680 }, { "epoch": 0.91, "grad_norm": 0.57421875, "learning_rate": 4.520782869443352e-06, "loss": 0.9817, "step": 63685 }, { "epoch": 0.91, "grad_norm": 0.5625, "learning_rate": 4.513343246782043e-06, "loss": 0.8341, "step": 63690 }, { "epoch": 0.91, "grad_norm": 0.5234375, "learning_rate": 4.505909609333147e-06, "loss": 0.8791, "step": 63695 }, { "epoch": 0.91, "grad_norm": 0.56640625, "learning_rate": 4.498481957562573e-06, "loss": 0.9347, "step": 63700 }, { "epoch": 0.91, "grad_norm": 0.55078125, "learning_rate": 4.491060291935911e-06, "loss": 0.8894, "step": 63705 }, { "epoch": 0.91, "grad_norm": 0.52734375, "learning_rate": 4.4836446129183914e-06, "loss": 0.8209, "step": 63710 }, { "epoch": 0.91, "grad_norm": 0.45703125, "learning_rate": 4.476234920974787e-06, "loss": 0.9218, "step": 63715 }, { "epoch": 0.91, "grad_norm": 0.5390625, "learning_rate": 4.46883121656958e-06, "loss": 0.8734, "step": 63720 }, { "epoch": 0.91, "grad_norm": 0.6953125, "learning_rate": 4.461433500166834e-06, "loss": 0.9371, "step": 63725 }, { "epoch": 0.91, "grad_norm": 0.55859375, "learning_rate": 4.454041772230244e-06, "loss": 1.0467, "step": 63730 }, { "epoch": 0.91, "grad_norm": 0.56640625, "learning_rate": 4.446656033223129e-06, "loss": 0.9309, "step": 63735 }, { "epoch": 0.91, "grad_norm": 0.53125, "learning_rate": 4.439276283608451e-06, "loss": 0.9906, "step": 63740 }, { "epoch": 0.91, "grad_norm": 0.5859375, "learning_rate": 4.431902523848774e-06, "loss": 0.8913, "step": 63745 }, { "epoch": 0.91, "grad_norm": 0.5625, "learning_rate": 4.424534754406273e-06, "loss": 0.8362, "step": 63750 }, { "epoch": 0.91, "grad_norm": 0.69140625, "learning_rate": 4.417172975742789e-06, "loss": 0.8761, "step": 63755 }, { "epoch": 0.91, "grad_norm": 0.58984375, "learning_rate": 4.409817188319776e-06, "loss": 0.9338, "step": 63760 }, { "epoch": 0.91, "grad_norm": 0.52734375, "learning_rate": 4.4024673925982755e-06, "loss": 0.9317, "step": 63765 }, { "epoch": 0.91, "grad_norm": 0.640625, "learning_rate": 4.395123589038996e-06, "loss": 1.0069, "step": 63770 }, { "epoch": 0.91, "grad_norm": 0.57421875, "learning_rate": 4.38778577810226e-06, "loss": 1.0037, "step": 63775 }, { "epoch": 0.91, "grad_norm": 0.62109375, "learning_rate": 4.380453960247999e-06, "loss": 0.8736, "step": 63780 }, { "epoch": 0.91, "grad_norm": 0.56640625, "learning_rate": 4.373128135935789e-06, "loss": 1.0341, "step": 63785 }, { "epoch": 0.92, "grad_norm": 0.578125, "learning_rate": 4.365808305624819e-06, "loss": 1.0015, "step": 63790 }, { "epoch": 0.92, "grad_norm": 0.5625, "learning_rate": 4.35849446977391e-06, "loss": 0.9519, "step": 63795 }, { "epoch": 0.92, "grad_norm": 0.4921875, "learning_rate": 4.351186628841486e-06, "loss": 0.9249, "step": 63800 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 4.343884783285623e-06, "loss": 0.9506, "step": 63805 }, { "epoch": 0.92, "grad_norm": 0.5625, "learning_rate": 4.33658893356399e-06, "loss": 0.9668, "step": 63810 }, { "epoch": 0.92, "grad_norm": 0.546875, "learning_rate": 4.3292990801339196e-06, "loss": 0.8758, "step": 63815 }, { "epoch": 0.92, "grad_norm": 0.5703125, "learning_rate": 4.322015223452358e-06, "loss": 1.2518, "step": 63820 }, { "epoch": 0.92, "grad_norm": 0.5546875, "learning_rate": 4.314737363975829e-06, "loss": 0.8206, "step": 63825 }, { "epoch": 0.92, "grad_norm": 0.6484375, "learning_rate": 4.307465502160546e-06, "loss": 0.9852, "step": 63830 }, { "epoch": 0.92, "grad_norm": 0.55859375, "learning_rate": 4.300199638462321e-06, "loss": 0.9431, "step": 63835 }, { "epoch": 0.92, "grad_norm": 0.478515625, "learning_rate": 4.292939773336569e-06, "loss": 0.9671, "step": 63840 }, { "epoch": 0.92, "grad_norm": 0.5078125, "learning_rate": 4.285685907238346e-06, "loss": 1.0574, "step": 63845 }, { "epoch": 0.92, "grad_norm": 0.609375, "learning_rate": 4.278438040622346e-06, "loss": 0.9514, "step": 63850 }, { "epoch": 0.92, "grad_norm": 0.51171875, "learning_rate": 4.271196173942882e-06, "loss": 0.8474, "step": 63855 }, { "epoch": 0.92, "grad_norm": 0.578125, "learning_rate": 4.263960307653847e-06, "loss": 0.9748, "step": 63860 }, { "epoch": 0.92, "grad_norm": 0.55859375, "learning_rate": 4.256730442208812e-06, "loss": 1.079, "step": 63865 }, { "epoch": 0.92, "grad_norm": 0.51953125, "learning_rate": 4.249506578060969e-06, "loss": 0.8845, "step": 63870 }, { "epoch": 0.92, "grad_norm": 0.625, "learning_rate": 4.2422887156630894e-06, "loss": 1.0111, "step": 63875 }, { "epoch": 0.92, "grad_norm": 0.5, "learning_rate": 4.235076855467623e-06, "loss": 1.1052, "step": 63880 }, { "epoch": 0.92, "grad_norm": 0.5390625, "learning_rate": 4.227870997926609e-06, "loss": 0.8858, "step": 63885 }, { "epoch": 0.92, "grad_norm": 0.546875, "learning_rate": 4.220671143491705e-06, "loss": 0.944, "step": 63890 }, { "epoch": 0.92, "grad_norm": 0.6484375, "learning_rate": 4.213477292614221e-06, "loss": 1.1542, "step": 63895 }, { "epoch": 0.92, "grad_norm": 0.44140625, "learning_rate": 4.206289445745093e-06, "loss": 0.8764, "step": 63900 }, { "epoch": 0.92, "grad_norm": 0.51953125, "learning_rate": 4.199107603334818e-06, "loss": 0.9639, "step": 63905 }, { "epoch": 0.92, "grad_norm": 0.6015625, "learning_rate": 4.19193176583359e-06, "loss": 0.9392, "step": 63910 }, { "epoch": 0.92, "grad_norm": 0.60546875, "learning_rate": 4.184761933691206e-06, "loss": 1.0341, "step": 63915 }, { "epoch": 0.92, "grad_norm": 0.5078125, "learning_rate": 4.177598107357061e-06, "loss": 1.0983, "step": 63920 }, { "epoch": 0.92, "grad_norm": 0.66796875, "learning_rate": 4.170440287280186e-06, "loss": 0.9701, "step": 63925 }, { "epoch": 0.92, "grad_norm": 0.5859375, "learning_rate": 4.163288473909277e-06, "loss": 0.9325, "step": 63930 }, { "epoch": 0.92, "grad_norm": 0.69921875, "learning_rate": 4.1561426676925864e-06, "loss": 0.9986, "step": 63935 }, { "epoch": 0.92, "grad_norm": 0.578125, "learning_rate": 4.1490028690780225e-06, "loss": 0.9749, "step": 63940 }, { "epoch": 0.92, "grad_norm": 0.546875, "learning_rate": 4.1418690785131384e-06, "loss": 0.9181, "step": 63945 }, { "epoch": 0.92, "grad_norm": 0.50390625, "learning_rate": 4.134741296445055e-06, "loss": 1.0318, "step": 63950 }, { "epoch": 0.92, "grad_norm": 0.63671875, "learning_rate": 4.127619523320592e-06, "loss": 0.9893, "step": 63955 }, { "epoch": 0.92, "grad_norm": 0.515625, "learning_rate": 4.120503759586103e-06, "loss": 1.0783, "step": 63960 }, { "epoch": 0.92, "grad_norm": 0.58203125, "learning_rate": 4.113394005687654e-06, "loss": 0.9217, "step": 63965 }, { "epoch": 0.92, "grad_norm": 0.58984375, "learning_rate": 4.106290262070867e-06, "loss": 1.029, "step": 63970 }, { "epoch": 0.92, "grad_norm": 0.5234375, "learning_rate": 4.099192529181018e-06, "loss": 1.0799, "step": 63975 }, { "epoch": 0.92, "grad_norm": 0.5859375, "learning_rate": 4.092100807463017e-06, "loss": 0.9321, "step": 63980 }, { "epoch": 0.92, "grad_norm": 0.59375, "learning_rate": 4.085015097361344e-06, "loss": 0.9397, "step": 63985 }, { "epoch": 0.92, "grad_norm": 0.60546875, "learning_rate": 4.0779353993201765e-06, "loss": 0.9828, "step": 63990 }, { "epoch": 0.92, "grad_norm": 0.5703125, "learning_rate": 4.07086171378327e-06, "loss": 0.8303, "step": 63995 }, { "epoch": 0.92, "grad_norm": 0.5390625, "learning_rate": 4.0637940411939916e-06, "loss": 0.9768, "step": 64000 }, { "epoch": 0.92, "grad_norm": 0.5234375, "learning_rate": 4.0567323819953765e-06, "loss": 1.1028, "step": 64005 }, { "epoch": 0.92, "grad_norm": 0.5234375, "learning_rate": 4.049676736630048e-06, "loss": 0.8675, "step": 64010 }, { "epoch": 0.92, "grad_norm": 0.48046875, "learning_rate": 4.042627105540253e-06, "loss": 1.008, "step": 64015 }, { "epoch": 0.92, "grad_norm": 0.5625, "learning_rate": 4.0355834891678714e-06, "loss": 0.84, "step": 64020 }, { "epoch": 0.92, "grad_norm": 0.5703125, "learning_rate": 4.028545887954416e-06, "loss": 0.9896, "step": 64025 }, { "epoch": 0.92, "grad_norm": 0.5703125, "learning_rate": 4.021514302341012e-06, "loss": 1.0693, "step": 64030 }, { "epoch": 0.92, "grad_norm": 0.6328125, "learning_rate": 4.014488732768385e-06, "loss": 1.0449, "step": 64035 }, { "epoch": 0.92, "grad_norm": 0.5703125, "learning_rate": 4.007469179676948e-06, "loss": 0.9926, "step": 64040 }, { "epoch": 0.92, "grad_norm": 0.474609375, "learning_rate": 4.000455643506651e-06, "loss": 0.8867, "step": 64045 }, { "epoch": 0.92, "grad_norm": 0.50390625, "learning_rate": 3.99344812469713e-06, "loss": 0.8337, "step": 64050 }, { "epoch": 0.92, "grad_norm": 0.58203125, "learning_rate": 3.986446623687623e-06, "loss": 0.9334, "step": 64055 }, { "epoch": 0.92, "grad_norm": 0.51171875, "learning_rate": 3.979451140917012e-06, "loss": 1.0358, "step": 64060 }, { "epoch": 0.92, "grad_norm": 0.58203125, "learning_rate": 3.972461676823735e-06, "loss": 0.996, "step": 64065 }, { "epoch": 0.92, "grad_norm": 0.56640625, "learning_rate": 3.965478231845932e-06, "loss": 0.9938, "step": 64070 }, { "epoch": 0.92, "grad_norm": 0.60546875, "learning_rate": 3.958500806421339e-06, "loss": 0.9653, "step": 64075 }, { "epoch": 0.92, "grad_norm": 0.69140625, "learning_rate": 3.9515294009872865e-06, "loss": 0.9159, "step": 64080 }, { "epoch": 0.92, "grad_norm": 0.578125, "learning_rate": 3.9445640159807565e-06, "loss": 0.8874, "step": 64085 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.937604651838367e-06, "loss": 1.082, "step": 64090 }, { "epoch": 0.92, "grad_norm": 0.5703125, "learning_rate": 3.9306513089963135e-06, "loss": 0.9247, "step": 64095 }, { "epoch": 0.92, "grad_norm": 0.62109375, "learning_rate": 3.923703987890448e-06, "loss": 0.9998, "step": 64100 }, { "epoch": 0.92, "grad_norm": 0.5234375, "learning_rate": 3.916762688956255e-06, "loss": 1.0188, "step": 64105 }, { "epoch": 0.92, "grad_norm": 0.58203125, "learning_rate": 3.909827412628797e-06, "loss": 0.8621, "step": 64110 }, { "epoch": 0.92, "grad_norm": 0.640625, "learning_rate": 3.9028981593427935e-06, "loss": 0.9959, "step": 64115 }, { "epoch": 0.92, "grad_norm": 0.55078125, "learning_rate": 3.895974929532587e-06, "loss": 0.9092, "step": 64120 }, { "epoch": 0.92, "grad_norm": 0.6875, "learning_rate": 3.889057723632106e-06, "loss": 0.9389, "step": 64125 }, { "epoch": 0.92, "grad_norm": 0.5, "learning_rate": 3.88214654207496e-06, "loss": 0.8261, "step": 64130 }, { "epoch": 0.92, "grad_norm": 0.5390625, "learning_rate": 3.875241385294337e-06, "loss": 1.281, "step": 64135 }, { "epoch": 0.92, "grad_norm": 0.5234375, "learning_rate": 3.868342253723056e-06, "loss": 0.9396, "step": 64140 }, { "epoch": 0.92, "grad_norm": 0.58984375, "learning_rate": 3.861449147793561e-06, "loss": 0.9935, "step": 64145 }, { "epoch": 0.92, "grad_norm": 0.578125, "learning_rate": 3.85456206793795e-06, "loss": 0.8586, "step": 64150 }, { "epoch": 0.92, "grad_norm": 0.63671875, "learning_rate": 3.8476810145878675e-06, "loss": 0.957, "step": 64155 }, { "epoch": 0.92, "grad_norm": 0.58984375, "learning_rate": 3.840805988174656e-06, "loss": 0.8097, "step": 64160 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.833936989129239e-06, "loss": 0.9352, "step": 64165 }, { "epoch": 0.92, "grad_norm": 0.5859375, "learning_rate": 3.827074017882193e-06, "loss": 0.8878, "step": 64170 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 3.820217074863652e-06, "loss": 1.034, "step": 64175 }, { "epoch": 0.92, "grad_norm": 0.625, "learning_rate": 3.813366160503451e-06, "loss": 0.9547, "step": 64180 }, { "epoch": 0.92, "grad_norm": 0.6875, "learning_rate": 3.8065212752310232e-06, "loss": 0.8868, "step": 64185 }, { "epoch": 0.92, "grad_norm": 0.609375, "learning_rate": 3.79968241947537e-06, "loss": 0.9713, "step": 64190 }, { "epoch": 0.92, "grad_norm": 0.57421875, "learning_rate": 3.7928495936652043e-06, "loss": 1.0498, "step": 64195 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.7860227982287943e-06, "loss": 1.0994, "step": 64200 }, { "epoch": 0.92, "grad_norm": 0.55859375, "learning_rate": 3.7792020335940537e-06, "loss": 1.0165, "step": 64205 }, { "epoch": 0.92, "grad_norm": 0.6640625, "learning_rate": 3.772387300188507e-06, "loss": 1.0315, "step": 64210 }, { "epoch": 0.92, "grad_norm": 0.6015625, "learning_rate": 3.7655785984393455e-06, "loss": 0.9926, "step": 64215 }, { "epoch": 0.92, "grad_norm": 0.57421875, "learning_rate": 3.7587759287732948e-06, "loss": 1.0681, "step": 64220 }, { "epoch": 0.92, "grad_norm": 0.47265625, "learning_rate": 3.7519792916167808e-06, "loss": 0.9187, "step": 64225 }, { "epoch": 0.92, "grad_norm": 0.478515625, "learning_rate": 3.7451886873958285e-06, "loss": 0.9417, "step": 64230 }, { "epoch": 0.92, "grad_norm": 0.53515625, "learning_rate": 3.738404116536065e-06, "loss": 0.92, "step": 64235 }, { "epoch": 0.92, "grad_norm": 0.83203125, "learning_rate": 3.731625579462761e-06, "loss": 1.0117, "step": 64240 }, { "epoch": 0.92, "grad_norm": 0.578125, "learning_rate": 3.7248530766008315e-06, "loss": 1.0202, "step": 64245 }, { "epoch": 0.92, "grad_norm": 0.66796875, "learning_rate": 3.7180866083747377e-06, "loss": 0.9853, "step": 64250 }, { "epoch": 0.92, "grad_norm": 0.5, "learning_rate": 3.7113261752086294e-06, "loss": 0.934, "step": 64255 }, { "epoch": 0.92, "grad_norm": 0.55859375, "learning_rate": 3.7045717775262777e-06, "loss": 0.9742, "step": 64260 }, { "epoch": 0.92, "grad_norm": 0.58984375, "learning_rate": 3.697823415751023e-06, "loss": 0.9932, "step": 64265 }, { "epoch": 0.92, "grad_norm": 0.5078125, "learning_rate": 3.6910810903058813e-06, "loss": 0.9429, "step": 64270 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 3.6843448016134596e-06, "loss": 0.9631, "step": 64275 }, { "epoch": 0.92, "grad_norm": 0.640625, "learning_rate": 3.6776145500960093e-06, "loss": 0.9709, "step": 64280 }, { "epoch": 0.92, "grad_norm": 0.50390625, "learning_rate": 3.67089033617537e-06, "loss": 0.8357, "step": 64285 }, { "epoch": 0.92, "grad_norm": 0.57421875, "learning_rate": 3.6641721602730273e-06, "loss": 1.0281, "step": 64290 }, { "epoch": 0.92, "grad_norm": 0.5703125, "learning_rate": 3.657460022810111e-06, "loss": 0.8819, "step": 64295 }, { "epoch": 0.92, "grad_norm": 0.51171875, "learning_rate": 3.6507539242073172e-06, "loss": 0.9193, "step": 64300 }, { "epoch": 0.92, "grad_norm": 0.55859375, "learning_rate": 3.644053864884989e-06, "loss": 0.9544, "step": 64305 }, { "epoch": 0.92, "grad_norm": 0.5625, "learning_rate": 3.637359845263122e-06, "loss": 0.9234, "step": 64310 }, { "epoch": 0.92, "grad_norm": 0.48828125, "learning_rate": 3.6306718657612816e-06, "loss": 0.9635, "step": 64315 }, { "epoch": 0.92, "grad_norm": 0.55859375, "learning_rate": 3.6239899267986877e-06, "loss": 1.0724, "step": 64320 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.617314028794161e-06, "loss": 0.8347, "step": 64325 }, { "epoch": 0.92, "grad_norm": 0.54296875, "learning_rate": 3.6106441721661666e-06, "loss": 0.904, "step": 64330 }, { "epoch": 0.92, "grad_norm": 0.55859375, "learning_rate": 3.6039803573327704e-06, "loss": 0.9772, "step": 64335 }, { "epoch": 0.92, "grad_norm": 0.6171875, "learning_rate": 3.597322584711682e-06, "loss": 1.1015, "step": 64340 }, { "epoch": 0.92, "grad_norm": 0.578125, "learning_rate": 3.5906708547201907e-06, "loss": 0.8824, "step": 64345 }, { "epoch": 0.92, "grad_norm": 0.58984375, "learning_rate": 3.5840251677752514e-06, "loss": 0.9461, "step": 64350 }, { "epoch": 0.92, "grad_norm": 0.60546875, "learning_rate": 3.5773855242934417e-06, "loss": 0.9427, "step": 64355 }, { "epoch": 0.92, "grad_norm": 0.546875, "learning_rate": 3.5707519246909073e-06, "loss": 0.8533, "step": 64360 }, { "epoch": 0.92, "grad_norm": 0.66796875, "learning_rate": 3.56412436938347e-06, "loss": 1.1611, "step": 64365 }, { "epoch": 0.92, "grad_norm": 0.55078125, "learning_rate": 3.5575028587865546e-06, "loss": 1.0923, "step": 64370 }, { "epoch": 0.92, "grad_norm": 0.55078125, "learning_rate": 3.550887393315205e-06, "loss": 1.052, "step": 64375 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.544277973384058e-06, "loss": 0.8655, "step": 64380 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.5376745994074257e-06, "loss": 0.8928, "step": 64385 }, { "epoch": 0.92, "grad_norm": 0.55859375, "learning_rate": 3.5310772717992215e-06, "loss": 0.907, "step": 64390 }, { "epoch": 0.92, "grad_norm": 0.56640625, "learning_rate": 3.524485990972959e-06, "loss": 0.957, "step": 64395 }, { "epoch": 0.92, "grad_norm": 0.62109375, "learning_rate": 3.5179007573417853e-06, "loss": 1.0026, "step": 64400 }, { "epoch": 0.92, "grad_norm": 0.61328125, "learning_rate": 3.5113215713184934e-06, "loss": 1.0308, "step": 64405 }, { "epoch": 0.92, "grad_norm": 0.6328125, "learning_rate": 3.5047484333154303e-06, "loss": 1.0068, "step": 64410 }, { "epoch": 0.92, "grad_norm": 0.59375, "learning_rate": 3.498181343744644e-06, "loss": 0.8248, "step": 64415 }, { "epoch": 0.92, "grad_norm": 0.515625, "learning_rate": 3.491620303017773e-06, "loss": 0.9557, "step": 64420 }, { "epoch": 0.92, "grad_norm": 0.54296875, "learning_rate": 3.485065311546054e-06, "loss": 1.0306, "step": 64425 }, { "epoch": 0.92, "grad_norm": 0.515625, "learning_rate": 3.4785163697403477e-06, "loss": 0.9902, "step": 64430 }, { "epoch": 0.92, "grad_norm": 0.5234375, "learning_rate": 3.471973478011159e-06, "loss": 1.0089, "step": 64435 }, { "epoch": 0.92, "grad_norm": 0.55078125, "learning_rate": 3.4654366367686263e-06, "loss": 0.9222, "step": 64440 }, { "epoch": 0.92, "grad_norm": 0.5625, "learning_rate": 3.4589058464224554e-06, "loss": 0.8768, "step": 64445 }, { "epoch": 0.92, "grad_norm": 0.515625, "learning_rate": 3.452381107382019e-06, "loss": 0.9449, "step": 64450 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.4458624200563116e-06, "loss": 0.9348, "step": 64455 }, { "epoch": 0.92, "grad_norm": 0.59765625, "learning_rate": 3.4393497848538956e-06, "loss": 0.8981, "step": 64460 }, { "epoch": 0.92, "grad_norm": 0.48046875, "learning_rate": 3.4328432021830004e-06, "loss": 0.8987, "step": 64465 }, { "epoch": 0.92, "grad_norm": 0.53515625, "learning_rate": 3.426342672451499e-06, "loss": 1.2103, "step": 64470 }, { "epoch": 0.92, "grad_norm": 0.5546875, "learning_rate": 3.4198481960668103e-06, "loss": 0.9515, "step": 64475 }, { "epoch": 0.92, "grad_norm": 0.6484375, "learning_rate": 3.4133597734360423e-06, "loss": 1.0422, "step": 64480 }, { "epoch": 0.93, "grad_norm": 0.546875, "learning_rate": 3.4068774049658913e-06, "loss": 0.9403, "step": 64485 }, { "epoch": 0.93, "grad_norm": 0.51171875, "learning_rate": 3.400401091062655e-06, "loss": 0.9389, "step": 64490 }, { "epoch": 0.93, "grad_norm": 0.5390625, "learning_rate": 3.393930832132297e-06, "loss": 0.9773, "step": 64495 }, { "epoch": 0.93, "grad_norm": 0.54296875, "learning_rate": 3.3874666285803936e-06, "loss": 0.9369, "step": 64500 }, { "epoch": 0.93, "grad_norm": 0.54296875, "learning_rate": 3.381008480812109e-06, "loss": 0.8346, "step": 64505 }, { "epoch": 0.93, "grad_norm": 0.5625, "learning_rate": 3.374556389232253e-06, "loss": 0.8604, "step": 64510 }, { "epoch": 0.93, "grad_norm": 0.51171875, "learning_rate": 3.368110354245246e-06, "loss": 0.9006, "step": 64515 }, { "epoch": 0.93, "grad_norm": 0.76171875, "learning_rate": 3.3616703762551437e-06, "loss": 1.0335, "step": 64520 }, { "epoch": 0.93, "grad_norm": 0.60546875, "learning_rate": 3.355236455665589e-06, "loss": 0.9049, "step": 64525 }, { "epoch": 0.93, "grad_norm": 0.5546875, "learning_rate": 3.348808592879904e-06, "loss": 0.9769, "step": 64530 }, { "epoch": 0.93, "grad_norm": 0.6171875, "learning_rate": 3.3423867883009664e-06, "loss": 0.979, "step": 64535 }, { "epoch": 0.93, "grad_norm": 0.466796875, "learning_rate": 3.3359710423312984e-06, "loss": 0.9246, "step": 64540 }, { "epoch": 0.93, "grad_norm": 0.546875, "learning_rate": 3.3295613553730566e-06, "loss": 1.0752, "step": 64545 }, { "epoch": 0.93, "grad_norm": 0.6328125, "learning_rate": 3.323157727828019e-06, "loss": 1.1276, "step": 64550 }, { "epoch": 0.93, "grad_norm": 0.66015625, "learning_rate": 3.316760160097554e-06, "loss": 1.0467, "step": 64555 }, { "epoch": 0.93, "grad_norm": 0.54296875, "learning_rate": 3.310368652582674e-06, "loss": 0.9048, "step": 64560 }, { "epoch": 0.93, "grad_norm": 0.5625, "learning_rate": 3.303983205684014e-06, "loss": 0.8651, "step": 64565 }, { "epoch": 0.93, "grad_norm": 0.54296875, "learning_rate": 3.297603819801809e-06, "loss": 0.9898, "step": 64570 }, { "epoch": 0.93, "grad_norm": 0.5078125, "learning_rate": 3.2912304953359173e-06, "loss": 0.8427, "step": 64575 }, { "epoch": 0.93, "grad_norm": 0.578125, "learning_rate": 3.2848632326858643e-06, "loss": 1.0477, "step": 64580 }, { "epoch": 0.93, "grad_norm": 0.578125, "learning_rate": 3.27850203225073e-06, "loss": 1.0262, "step": 64585 }, { "epoch": 0.93, "grad_norm": 0.5234375, "learning_rate": 3.27214689442924e-06, "loss": 0.9684, "step": 64590 }, { "epoch": 0.93, "grad_norm": 0.59375, "learning_rate": 3.2657978196197426e-06, "loss": 1.0751, "step": 64595 }, { "epoch": 0.93, "grad_norm": 0.56640625, "learning_rate": 3.259454808220208e-06, "loss": 1.0147, "step": 64600 }, { "epoch": 0.93, "grad_norm": 0.5390625, "learning_rate": 3.2531178606282297e-06, "loss": 1.0055, "step": 64605 }, { "epoch": 0.93, "grad_norm": 0.53515625, "learning_rate": 3.246786977241012e-06, "loss": 1.0449, "step": 64610 }, { "epoch": 0.93, "grad_norm": 0.58984375, "learning_rate": 3.2404621584553707e-06, "loss": 1.0597, "step": 64615 }, { "epoch": 0.93, "grad_norm": 0.6015625, "learning_rate": 3.2341434046677554e-06, "loss": 0.9698, "step": 64620 }, { "epoch": 0.93, "grad_norm": 0.55859375, "learning_rate": 3.2278307162742607e-06, "loss": 1.0122, "step": 64625 }, { "epoch": 0.93, "grad_norm": 0.5234375, "learning_rate": 3.221524093670536e-06, "loss": 0.989, "step": 64630 }, { "epoch": 0.93, "grad_norm": 0.5390625, "learning_rate": 3.21522353725191e-06, "loss": 0.9688, "step": 64635 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 3.2089290474132893e-06, "loss": 0.8625, "step": 64640 }, { "epoch": 0.93, "grad_norm": 0.458984375, "learning_rate": 3.2026406245492467e-06, "loss": 0.8252, "step": 64645 }, { "epoch": 0.93, "grad_norm": 0.5234375, "learning_rate": 3.1963582690539117e-06, "loss": 0.9317, "step": 64650 }, { "epoch": 0.93, "grad_norm": 0.49609375, "learning_rate": 3.1900819813211025e-06, "loss": 0.9196, "step": 64655 }, { "epoch": 0.93, "grad_norm": 0.609375, "learning_rate": 3.1838117617442266e-06, "loss": 0.9512, "step": 64660 }, { "epoch": 0.93, "grad_norm": 0.5859375, "learning_rate": 3.1775476107162695e-06, "loss": 0.9112, "step": 64665 }, { "epoch": 0.93, "grad_norm": 0.57421875, "learning_rate": 3.171289528629906e-06, "loss": 0.9008, "step": 64670 }, { "epoch": 0.93, "grad_norm": 0.5234375, "learning_rate": 3.1650375158774004e-06, "loss": 0.9157, "step": 64675 }, { "epoch": 0.93, "grad_norm": 0.5078125, "learning_rate": 3.158791572850628e-06, "loss": 0.8422, "step": 64680 }, { "epoch": 0.93, "grad_norm": 0.5859375, "learning_rate": 3.1525516999410866e-06, "loss": 0.9628, "step": 64685 }, { "epoch": 0.93, "grad_norm": 0.55859375, "learning_rate": 3.14631789753993e-06, "loss": 1.0034, "step": 64690 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 3.1400901660378458e-06, "loss": 1.0796, "step": 64695 }, { "epoch": 0.93, "grad_norm": 0.5859375, "learning_rate": 3.1338685058252216e-06, "loss": 0.8844, "step": 64700 }, { "epoch": 0.93, "grad_norm": 0.62890625, "learning_rate": 3.1276529172920675e-06, "loss": 0.9305, "step": 64705 }, { "epoch": 0.93, "grad_norm": 0.57421875, "learning_rate": 3.121443400827928e-06, "loss": 1.0306, "step": 64710 }, { "epoch": 0.93, "grad_norm": 0.59765625, "learning_rate": 3.115239956822058e-06, "loss": 0.8335, "step": 64715 }, { "epoch": 0.93, "grad_norm": 0.5390625, "learning_rate": 3.109042585663313e-06, "loss": 0.9649, "step": 64720 }, { "epoch": 0.93, "grad_norm": 0.60546875, "learning_rate": 3.102851287740105e-06, "loss": 0.9282, "step": 64725 }, { "epoch": 0.93, "grad_norm": 0.439453125, "learning_rate": 3.0966660634405344e-06, "loss": 0.681, "step": 64730 }, { "epoch": 0.93, "grad_norm": 0.60546875, "learning_rate": 3.0904869131523128e-06, "loss": 0.8902, "step": 64735 }, { "epoch": 0.93, "grad_norm": 0.54296875, "learning_rate": 3.0843138372627424e-06, "loss": 1.1055, "step": 64740 }, { "epoch": 0.93, "grad_norm": 0.50390625, "learning_rate": 3.078146836158746e-06, "loss": 0.9402, "step": 64745 }, { "epoch": 0.93, "grad_norm": 0.5546875, "learning_rate": 3.0719859102268933e-06, "loss": 0.8482, "step": 64750 }, { "epoch": 0.93, "grad_norm": 0.515625, "learning_rate": 3.0658310598533633e-06, "loss": 1.1807, "step": 64755 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 3.059682285423926e-06, "loss": 0.9468, "step": 64760 }, { "epoch": 0.93, "grad_norm": 0.515625, "learning_rate": 3.053539587324017e-06, "loss": 0.9515, "step": 64765 }, { "epoch": 0.93, "grad_norm": 0.62890625, "learning_rate": 3.047402965938673e-06, "loss": 0.9918, "step": 64770 }, { "epoch": 0.93, "grad_norm": 0.58203125, "learning_rate": 3.041272421652508e-06, "loss": 1.0043, "step": 64775 }, { "epoch": 0.93, "grad_norm": 0.5390625, "learning_rate": 3.035147954849826e-06, "loss": 0.98, "step": 64780 }, { "epoch": 0.93, "grad_norm": 0.59375, "learning_rate": 3.0290295659145094e-06, "loss": 1.0729, "step": 64785 }, { "epoch": 0.93, "grad_norm": 0.58984375, "learning_rate": 3.02291725523004e-06, "loss": 0.9641, "step": 64790 }, { "epoch": 0.93, "grad_norm": 0.546875, "learning_rate": 3.0168110231795887e-06, "loss": 1.0132, "step": 64795 }, { "epoch": 0.93, "grad_norm": 0.6171875, "learning_rate": 3.010710870145872e-06, "loss": 0.9996, "step": 64800 }, { "epoch": 0.93, "grad_norm": 0.67578125, "learning_rate": 3.0046167965112504e-06, "loss": 1.2168, "step": 64805 }, { "epoch": 0.93, "grad_norm": 0.5, "learning_rate": 2.9985288026577184e-06, "loss": 0.8434, "step": 64810 }, { "epoch": 0.93, "grad_norm": 0.53125, "learning_rate": 2.9924468889668824e-06, "loss": 0.8563, "step": 64815 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 2.9863710558199474e-06, "loss": 1.0859, "step": 64820 }, { "epoch": 0.93, "grad_norm": 0.58203125, "learning_rate": 2.980301303597777e-06, "loss": 1.0357, "step": 64825 }, { "epoch": 0.93, "grad_norm": 0.62890625, "learning_rate": 2.9742376326808095e-06, "loss": 0.8976, "step": 64830 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 2.9681800434491315e-06, "loss": 1.0682, "step": 64835 }, { "epoch": 0.93, "grad_norm": 0.51953125, "learning_rate": 2.9621285362824268e-06, "loss": 0.8746, "step": 64840 }, { "epoch": 0.93, "grad_norm": 0.58203125, "learning_rate": 2.9560831115600486e-06, "loss": 0.9363, "step": 64845 }, { "epoch": 0.93, "grad_norm": 0.65625, "learning_rate": 2.9500437696608928e-06, "loss": 0.9036, "step": 64850 }, { "epoch": 0.93, "grad_norm": 0.61328125, "learning_rate": 2.944010510963513e-06, "loss": 0.9912, "step": 64855 }, { "epoch": 0.93, "grad_norm": 0.5, "learning_rate": 2.937983335846095e-06, "loss": 0.797, "step": 64860 }, { "epoch": 0.93, "grad_norm": 0.6171875, "learning_rate": 2.931962244686437e-06, "loss": 1.0168, "step": 64865 }, { "epoch": 0.93, "grad_norm": 0.60546875, "learning_rate": 2.925947237861926e-06, "loss": 1.0282, "step": 64870 }, { "epoch": 0.93, "grad_norm": 0.62109375, "learning_rate": 2.919938315749604e-06, "loss": 0.974, "step": 64875 }, { "epoch": 0.93, "grad_norm": 0.578125, "learning_rate": 2.9139354787261265e-06, "loss": 1.0569, "step": 64880 }, { "epoch": 0.93, "grad_norm": 0.625, "learning_rate": 2.907938727167725e-06, "loss": 1.0904, "step": 64885 }, { "epoch": 0.93, "grad_norm": 0.48828125, "learning_rate": 2.901948061450299e-06, "loss": 1.0113, "step": 64890 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 2.895963481949371e-06, "loss": 0.9074, "step": 64895 }, { "epoch": 0.93, "grad_norm": 0.546875, "learning_rate": 2.889984989040051e-06, "loss": 0.9813, "step": 64900 }, { "epoch": 0.93, "grad_norm": 0.55859375, "learning_rate": 2.884012583097051e-06, "loss": 0.8772, "step": 64905 }, { "epoch": 0.93, "grad_norm": 0.5859375, "learning_rate": 2.8780462644947603e-06, "loss": 0.923, "step": 64910 }, { "epoch": 0.93, "grad_norm": 0.58203125, "learning_rate": 2.8720860336071355e-06, "loss": 1.0726, "step": 64915 }, { "epoch": 0.93, "grad_norm": 0.64453125, "learning_rate": 2.866131890807777e-06, "loss": 0.9847, "step": 64920 }, { "epoch": 0.93, "grad_norm": 0.88671875, "learning_rate": 2.8601838364699097e-06, "loss": 0.9785, "step": 64925 }, { "epoch": 0.93, "grad_norm": 0.5703125, "learning_rate": 2.8542418709663343e-06, "loss": 0.9507, "step": 64930 }, { "epoch": 0.93, "grad_norm": 0.73828125, "learning_rate": 2.84830599466952e-06, "loss": 0.8207, "step": 64935 }, { "epoch": 0.93, "grad_norm": 0.5703125, "learning_rate": 2.842376207951547e-06, "loss": 0.9632, "step": 64940 }, { "epoch": 0.93, "grad_norm": 0.5078125, "learning_rate": 2.8364525111840735e-06, "loss": 1.0299, "step": 64945 }, { "epoch": 0.93, "grad_norm": 0.640625, "learning_rate": 2.8305349047384355e-06, "loss": 0.9159, "step": 64950 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 2.8246233889855145e-06, "loss": 0.9085, "step": 64955 }, { "epoch": 0.93, "grad_norm": 0.55859375, "learning_rate": 2.8187179642958916e-06, "loss": 0.9428, "step": 64960 }, { "epoch": 0.93, "grad_norm": 0.59375, "learning_rate": 2.812818631039682e-06, "loss": 0.9958, "step": 64965 }, { "epoch": 0.93, "grad_norm": 0.5, "learning_rate": 2.806925389586701e-06, "loss": 0.8781, "step": 64970 }, { "epoch": 0.93, "grad_norm": 0.59765625, "learning_rate": 2.801038240306342e-06, "loss": 0.8886, "step": 64975 }, { "epoch": 0.93, "grad_norm": 0.5078125, "learning_rate": 2.795157183567587e-06, "loss": 0.9409, "step": 64980 }, { "epoch": 0.93, "grad_norm": 0.625, "learning_rate": 2.7892822197390978e-06, "loss": 1.0129, "step": 64985 }, { "epoch": 0.93, "grad_norm": 0.55078125, "learning_rate": 2.7834133491891123e-06, "loss": 0.8587, "step": 64990 }, { "epoch": 0.93, "grad_norm": 0.53515625, "learning_rate": 2.777550572285492e-06, "loss": 0.9335, "step": 64995 }, { "epoch": 0.93, "grad_norm": 0.6171875, "learning_rate": 2.7716938893957323e-06, "loss": 1.0092, "step": 65000 }, { "epoch": 0.93, "grad_norm": 0.59375, "learning_rate": 2.765843300886939e-06, "loss": 0.9026, "step": 65005 }, { "epoch": 0.93, "grad_norm": 0.609375, "learning_rate": 2.75999880712583e-06, "loss": 0.9924, "step": 65010 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 2.7541604084787343e-06, "loss": 0.8775, "step": 65015 }, { "epoch": 0.93, "grad_norm": 0.609375, "learning_rate": 2.748328105311626e-06, "loss": 0.7592, "step": 65020 }, { "epoch": 0.93, "grad_norm": 0.6796875, "learning_rate": 2.7425018979900574e-06, "loss": 1.1202, "step": 65025 }, { "epoch": 0.93, "grad_norm": 0.65234375, "learning_rate": 2.736681786879247e-06, "loss": 1.1027, "step": 65030 }, { "epoch": 0.93, "grad_norm": 0.6328125, "learning_rate": 2.7308677723440033e-06, "loss": 1.0419, "step": 65035 }, { "epoch": 0.93, "grad_norm": 0.61328125, "learning_rate": 2.725059854748735e-06, "loss": 1.0767, "step": 65040 }, { "epoch": 0.93, "grad_norm": 0.578125, "learning_rate": 2.7192580344574948e-06, "loss": 0.9814, "step": 65045 }, { "epoch": 0.93, "grad_norm": 0.61328125, "learning_rate": 2.7134623118339807e-06, "loss": 1.1068, "step": 65050 }, { "epoch": 0.93, "grad_norm": 0.53515625, "learning_rate": 2.7076726872414358e-06, "loss": 0.924, "step": 65055 }, { "epoch": 0.93, "grad_norm": 0.5390625, "learning_rate": 2.70188916104277e-06, "loss": 0.9105, "step": 65060 }, { "epoch": 0.93, "grad_norm": 0.59375, "learning_rate": 2.6961117336005036e-06, "loss": 0.968, "step": 65065 }, { "epoch": 0.93, "grad_norm": 0.6171875, "learning_rate": 2.690340405276781e-06, "loss": 0.9132, "step": 65070 }, { "epoch": 0.93, "grad_norm": 0.53515625, "learning_rate": 2.6845751764333347e-06, "loss": 0.9445, "step": 65075 }, { "epoch": 0.93, "grad_norm": 0.494140625, "learning_rate": 2.678816047431554e-06, "loss": 0.9016, "step": 65080 }, { "epoch": 0.93, "grad_norm": 0.6015625, "learning_rate": 2.6730630186324378e-06, "loss": 1.0218, "step": 65085 }, { "epoch": 0.93, "grad_norm": 0.51171875, "learning_rate": 2.6673160903965656e-06, "loss": 0.9595, "step": 65090 }, { "epoch": 0.93, "grad_norm": 0.578125, "learning_rate": 2.6615752630841707e-06, "loss": 1.0249, "step": 65095 }, { "epoch": 0.93, "grad_norm": 0.486328125, "learning_rate": 2.6558405370551097e-06, "loss": 0.867, "step": 65100 }, { "epoch": 0.93, "grad_norm": 0.5703125, "learning_rate": 2.650111912668818e-06, "loss": 1.1201, "step": 65105 }, { "epoch": 0.93, "grad_norm": 0.6328125, "learning_rate": 2.6443893902843853e-06, "loss": 0.765, "step": 65110 }, { "epoch": 0.93, "grad_norm": 0.56640625, "learning_rate": 2.6386729702605027e-06, "loss": 0.8427, "step": 65115 }, { "epoch": 0.93, "grad_norm": 0.5859375, "learning_rate": 2.632962652955495e-06, "loss": 0.9295, "step": 65120 }, { "epoch": 0.93, "grad_norm": 0.51953125, "learning_rate": 2.6272584387272757e-06, "loss": 0.8598, "step": 65125 }, { "epoch": 0.93, "grad_norm": 0.57421875, "learning_rate": 2.621560327933381e-06, "loss": 0.9254, "step": 65130 }, { "epoch": 0.93, "grad_norm": 0.640625, "learning_rate": 2.6158683209310142e-06, "loss": 0.9867, "step": 65135 }, { "epoch": 0.93, "grad_norm": 0.77734375, "learning_rate": 2.610182418076912e-06, "loss": 1.1187, "step": 65140 }, { "epoch": 0.93, "grad_norm": 0.5859375, "learning_rate": 2.6045026197275e-06, "loss": 0.8453, "step": 65145 }, { "epoch": 0.93, "grad_norm": 0.625, "learning_rate": 2.5988289262387944e-06, "loss": 0.8491, "step": 65150 }, { "epoch": 0.93, "grad_norm": 0.6640625, "learning_rate": 2.59316133796641e-06, "loss": 1.0447, "step": 65155 }, { "epoch": 0.93, "grad_norm": 0.56640625, "learning_rate": 2.5874998552656293e-06, "loss": 0.9154, "step": 65160 }, { "epoch": 0.93, "grad_norm": 0.61328125, "learning_rate": 2.581844478491291e-06, "loss": 1.0599, "step": 65165 }, { "epoch": 0.93, "grad_norm": 0.5859375, "learning_rate": 2.576195207997889e-06, "loss": 1.068, "step": 65170 }, { "epoch": 0.93, "grad_norm": 0.578125, "learning_rate": 2.5705520441395183e-06, "loss": 0.9493, "step": 65175 }, { "epoch": 0.93, "grad_norm": 0.494140625, "learning_rate": 2.5649149872699287e-06, "loss": 1.04, "step": 65180 }, { "epoch": 0.94, "grad_norm": 0.78515625, "learning_rate": 2.5592840377424153e-06, "loss": 1.0012, "step": 65185 }, { "epoch": 0.94, "grad_norm": 0.64453125, "learning_rate": 2.5536591959099634e-06, "loss": 0.9686, "step": 65190 }, { "epoch": 0.94, "grad_norm": 0.5703125, "learning_rate": 2.5480404621251453e-06, "loss": 0.8808, "step": 65195 }, { "epoch": 0.94, "grad_norm": 0.55078125, "learning_rate": 2.5424278367401244e-06, "loss": 0.8381, "step": 65200 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 2.5368213201067192e-06, "loss": 0.9821, "step": 65205 }, { "epoch": 0.94, "grad_norm": 0.5546875, "learning_rate": 2.531220912576371e-06, "loss": 0.9479, "step": 65210 }, { "epoch": 0.94, "grad_norm": 0.515625, "learning_rate": 2.5256266145000986e-06, "loss": 0.9484, "step": 65215 }, { "epoch": 0.94, "grad_norm": 0.578125, "learning_rate": 2.5200384262285658e-06, "loss": 0.9905, "step": 65220 }, { "epoch": 0.94, "grad_norm": 0.55078125, "learning_rate": 2.5144563481120263e-06, "loss": 0.9472, "step": 65225 }, { "epoch": 0.94, "grad_norm": 0.50390625, "learning_rate": 2.508880380500411e-06, "loss": 1.0241, "step": 65230 }, { "epoch": 0.94, "grad_norm": 0.66015625, "learning_rate": 2.5033105237431964e-06, "loss": 0.9847, "step": 65235 }, { "epoch": 0.94, "grad_norm": 0.63671875, "learning_rate": 2.4977467781895138e-06, "loss": 1.1087, "step": 65240 }, { "epoch": 0.94, "grad_norm": 0.59375, "learning_rate": 2.4921891441881286e-06, "loss": 0.9616, "step": 65245 }, { "epoch": 0.94, "grad_norm": 0.6796875, "learning_rate": 2.486637622087362e-06, "loss": 1.0369, "step": 65250 }, { "epoch": 0.94, "grad_norm": 0.5390625, "learning_rate": 2.4810922122352032e-06, "loss": 0.9011, "step": 65255 }, { "epoch": 0.94, "grad_norm": 0.50390625, "learning_rate": 2.475552914979273e-06, "loss": 1.0724, "step": 65260 }, { "epoch": 0.94, "grad_norm": 0.51953125, "learning_rate": 2.4700197306667393e-06, "loss": 1.0771, "step": 65265 }, { "epoch": 0.94, "grad_norm": 0.59765625, "learning_rate": 2.4644926596444464e-06, "loss": 0.9856, "step": 65270 }, { "epoch": 0.94, "grad_norm": 0.625, "learning_rate": 2.45897170225885e-06, "loss": 1.0245, "step": 65275 }, { "epoch": 0.94, "grad_norm": 0.578125, "learning_rate": 2.4534568588559847e-06, "loss": 0.9473, "step": 65280 }, { "epoch": 0.94, "grad_norm": 0.55859375, "learning_rate": 2.447948129781541e-06, "loss": 1.0451, "step": 65285 }, { "epoch": 0.94, "grad_norm": 0.5, "learning_rate": 2.4424455153808202e-06, "loss": 0.8536, "step": 65290 }, { "epoch": 0.94, "grad_norm": 0.56640625, "learning_rate": 2.4369490159987018e-06, "loss": 0.8815, "step": 65295 }, { "epoch": 0.94, "grad_norm": 0.63671875, "learning_rate": 2.431458631979744e-06, "loss": 1.0601, "step": 65300 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 2.425974363668093e-06, "loss": 0.8539, "step": 65305 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 2.420496211407486e-06, "loss": 0.9943, "step": 65310 }, { "epoch": 0.94, "grad_norm": 0.51953125, "learning_rate": 2.4150241755413026e-06, "loss": 0.9937, "step": 65315 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 2.4095582564125584e-06, "loss": 0.9439, "step": 65320 }, { "epoch": 0.94, "grad_norm": 0.5078125, "learning_rate": 2.4040984543638567e-06, "loss": 0.8602, "step": 65325 }, { "epoch": 0.94, "grad_norm": 0.56640625, "learning_rate": 2.3986447697374013e-06, "loss": 0.8506, "step": 65330 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 2.3931972028750414e-06, "loss": 0.8138, "step": 65335 }, { "epoch": 0.94, "grad_norm": 0.51171875, "learning_rate": 2.3877557541182704e-06, "loss": 0.8287, "step": 65340 }, { "epoch": 0.94, "grad_norm": 0.58203125, "learning_rate": 2.3823204238081153e-06, "loss": 0.9755, "step": 65345 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 2.376891212285304e-06, "loss": 0.9008, "step": 65350 }, { "epoch": 0.94, "grad_norm": 0.5625, "learning_rate": 2.371468119890141e-06, "loss": 1.1037, "step": 65355 }, { "epoch": 0.94, "grad_norm": 0.5546875, "learning_rate": 2.3660511469625336e-06, "loss": 0.9623, "step": 65360 }, { "epoch": 0.94, "grad_norm": 0.55078125, "learning_rate": 2.3606402938420425e-06, "loss": 0.8876, "step": 65365 }, { "epoch": 0.94, "grad_norm": 0.60546875, "learning_rate": 2.35523556086783e-06, "loss": 1.1445, "step": 65370 }, { "epoch": 0.94, "grad_norm": 0.4921875, "learning_rate": 2.3498369483786476e-06, "loss": 0.9246, "step": 65375 }, { "epoch": 0.94, "grad_norm": 0.6484375, "learning_rate": 2.3444444567128907e-06, "loss": 1.0409, "step": 65380 }, { "epoch": 0.94, "grad_norm": 0.5234375, "learning_rate": 2.339058086208601e-06, "loss": 1.1015, "step": 65385 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 2.333677837203352e-06, "loss": 0.8732, "step": 65390 }, { "epoch": 0.94, "grad_norm": 0.61328125, "learning_rate": 2.3283037100344187e-06, "loss": 0.7876, "step": 65395 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 2.3229357050386536e-06, "loss": 0.8576, "step": 65400 }, { "epoch": 0.94, "grad_norm": 0.57421875, "learning_rate": 2.31757382255251e-06, "loss": 0.9901, "step": 65405 }, { "epoch": 0.94, "grad_norm": 0.51953125, "learning_rate": 2.312218062912097e-06, "loss": 0.8474, "step": 65410 }, { "epoch": 0.94, "grad_norm": 0.6171875, "learning_rate": 2.3068684264531236e-06, "loss": 0.8593, "step": 65415 }, { "epoch": 0.94, "grad_norm": 0.51171875, "learning_rate": 2.3015249135108883e-06, "loss": 0.9145, "step": 65420 }, { "epoch": 0.94, "grad_norm": 0.58984375, "learning_rate": 2.296187524420346e-06, "loss": 1.052, "step": 65425 }, { "epoch": 0.94, "grad_norm": 0.59375, "learning_rate": 2.290856259516039e-06, "loss": 0.948, "step": 65430 }, { "epoch": 0.94, "grad_norm": 0.625, "learning_rate": 2.285531119132145e-06, "loss": 1.0625, "step": 65435 }, { "epoch": 0.94, "grad_norm": 0.51953125, "learning_rate": 2.2802121036024528e-06, "loss": 0.9666, "step": 65440 }, { "epoch": 0.94, "grad_norm": 0.6328125, "learning_rate": 2.274899213260351e-06, "loss": 1.0447, "step": 65445 }, { "epoch": 0.94, "grad_norm": 0.54296875, "learning_rate": 2.269592448438873e-06, "loss": 1.0886, "step": 65450 }, { "epoch": 0.94, "grad_norm": 0.51953125, "learning_rate": 2.2642918094706423e-06, "loss": 0.8001, "step": 65455 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 2.2589972966879037e-06, "loss": 0.9675, "step": 65460 }, { "epoch": 0.94, "grad_norm": 0.5078125, "learning_rate": 2.253708910422547e-06, "loss": 0.8762, "step": 65465 }, { "epoch": 0.94, "grad_norm": 0.5703125, "learning_rate": 2.2484266510060416e-06, "loss": 0.9948, "step": 65470 }, { "epoch": 0.94, "grad_norm": 0.5390625, "learning_rate": 2.2431505187694767e-06, "loss": 0.8915, "step": 65475 }, { "epoch": 0.94, "grad_norm": 0.55859375, "learning_rate": 2.2378805140435778e-06, "loss": 0.9509, "step": 65480 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 2.232616637158669e-06, "loss": 0.9404, "step": 65485 }, { "epoch": 0.94, "grad_norm": 0.5703125, "learning_rate": 2.227358888444686e-06, "loss": 0.9962, "step": 65490 }, { "epoch": 0.94, "grad_norm": 0.47265625, "learning_rate": 2.2221072682312103e-06, "loss": 0.9881, "step": 65495 }, { "epoch": 0.94, "grad_norm": 0.5625, "learning_rate": 2.216861776847412e-06, "loss": 1.0974, "step": 65500 }, { "epoch": 0.94, "grad_norm": 0.494140625, "learning_rate": 2.211622414622072e-06, "loss": 1.0143, "step": 65505 }, { "epoch": 0.94, "grad_norm": 0.5703125, "learning_rate": 2.206389181883628e-06, "loss": 0.9016, "step": 65510 }, { "epoch": 0.94, "grad_norm": 0.515625, "learning_rate": 2.201162078960073e-06, "loss": 1.0309, "step": 65515 }, { "epoch": 0.94, "grad_norm": 0.59375, "learning_rate": 2.195941106179067e-06, "loss": 0.9086, "step": 65520 }, { "epoch": 0.94, "grad_norm": 0.46484375, "learning_rate": 2.1907262638678705e-06, "loss": 0.9846, "step": 65525 }, { "epoch": 0.94, "grad_norm": 0.63671875, "learning_rate": 2.1855175523533443e-06, "loss": 0.8841, "step": 65530 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 2.180314971961972e-06, "loss": 0.8873, "step": 65535 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 2.175118523019859e-06, "loss": 0.9228, "step": 65540 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 2.169928205852745e-06, "loss": 0.8389, "step": 65545 }, { "epoch": 0.94, "grad_norm": 0.59375, "learning_rate": 2.164744020785936e-06, "loss": 0.9474, "step": 65550 }, { "epoch": 0.94, "grad_norm": 0.50390625, "learning_rate": 2.159565968144406e-06, "loss": 0.7416, "step": 65555 }, { "epoch": 0.94, "grad_norm": 0.54296875, "learning_rate": 2.154394048252717e-06, "loss": 0.8988, "step": 65560 }, { "epoch": 0.94, "grad_norm": 0.56640625, "learning_rate": 2.1492282614350323e-06, "loss": 0.9856, "step": 65565 }, { "epoch": 0.94, "grad_norm": 0.5390625, "learning_rate": 2.1440686080151708e-06, "loss": 0.9949, "step": 65570 }, { "epoch": 0.94, "grad_norm": 0.58203125, "learning_rate": 2.1389150883165397e-06, "loss": 0.9206, "step": 65575 }, { "epoch": 0.94, "grad_norm": 0.56640625, "learning_rate": 2.1337677026621707e-06, "loss": 0.8943, "step": 65580 }, { "epoch": 0.94, "grad_norm": 0.56640625, "learning_rate": 2.1286264513746822e-06, "loss": 0.9125, "step": 65585 }, { "epoch": 0.94, "grad_norm": 0.486328125, "learning_rate": 2.123491334776373e-06, "loss": 0.9924, "step": 65590 }, { "epoch": 0.94, "grad_norm": 0.4921875, "learning_rate": 2.1183623531890963e-06, "loss": 0.9117, "step": 65595 }, { "epoch": 0.94, "grad_norm": 0.625, "learning_rate": 2.113239506934328e-06, "loss": 0.9869, "step": 65600 }, { "epoch": 0.94, "grad_norm": 0.58984375, "learning_rate": 2.1081227963332116e-06, "loss": 0.9346, "step": 65605 }, { "epoch": 0.94, "grad_norm": 0.5859375, "learning_rate": 2.103012221706435e-06, "loss": 0.9464, "step": 65610 }, { "epoch": 0.94, "grad_norm": 0.57421875, "learning_rate": 2.0979077833743532e-06, "loss": 0.9183, "step": 65615 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 2.09280948165691e-06, "loss": 0.9272, "step": 65620 }, { "epoch": 0.94, "grad_norm": 0.5625, "learning_rate": 2.087717316873683e-06, "loss": 0.9441, "step": 65625 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 2.082631289343828e-06, "loss": 0.8887, "step": 65630 }, { "epoch": 0.94, "grad_norm": 0.57421875, "learning_rate": 2.0775513993861905e-06, "loss": 0.941, "step": 65635 }, { "epoch": 0.94, "grad_norm": 0.55859375, "learning_rate": 2.0724776473191374e-06, "loss": 0.9125, "step": 65640 }, { "epoch": 0.94, "grad_norm": 0.55859375, "learning_rate": 2.067410033460726e-06, "loss": 0.9141, "step": 65645 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 2.0623485581285795e-06, "loss": 1.0082, "step": 65650 }, { "epoch": 0.94, "grad_norm": 0.54296875, "learning_rate": 2.0572932216399665e-06, "loss": 1.0309, "step": 65655 }, { "epoch": 0.94, "grad_norm": 0.5546875, "learning_rate": 2.052244024311767e-06, "loss": 0.931, "step": 65660 }, { "epoch": 0.94, "grad_norm": 0.61328125, "learning_rate": 2.0472009664604606e-06, "loss": 1.0014, "step": 65665 }, { "epoch": 0.94, "grad_norm": 0.6015625, "learning_rate": 2.042164048402162e-06, "loss": 0.996, "step": 65670 }, { "epoch": 0.94, "grad_norm": 0.55859375, "learning_rate": 2.0371332704525847e-06, "loss": 0.9044, "step": 65675 }, { "epoch": 0.94, "grad_norm": 0.5390625, "learning_rate": 2.0321086329270547e-06, "loss": 0.854, "step": 65680 }, { "epoch": 0.94, "grad_norm": 0.5546875, "learning_rate": 2.0270901361405412e-06, "loss": 1.0305, "step": 65685 }, { "epoch": 0.94, "grad_norm": 0.58203125, "learning_rate": 2.0220777804076053e-06, "loss": 0.9537, "step": 65690 }, { "epoch": 0.94, "grad_norm": 0.5390625, "learning_rate": 2.0170715660424166e-06, "loss": 0.7839, "step": 65695 }, { "epoch": 0.94, "grad_norm": 0.51953125, "learning_rate": 2.0120714933587803e-06, "loss": 1.1057, "step": 65700 }, { "epoch": 0.94, "grad_norm": 0.65234375, "learning_rate": 2.00707756267009e-06, "loss": 0.9495, "step": 65705 }, { "epoch": 0.94, "grad_norm": 0.58984375, "learning_rate": 2.002089774289384e-06, "loss": 0.9803, "step": 65710 }, { "epoch": 0.94, "grad_norm": 0.6640625, "learning_rate": 1.9971081285293013e-06, "loss": 0.9487, "step": 65715 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 1.9921326257021035e-06, "loss": 1.0803, "step": 65720 }, { "epoch": 0.94, "grad_norm": 0.64453125, "learning_rate": 1.9871632661196406e-06, "loss": 0.9914, "step": 65725 }, { "epoch": 0.94, "grad_norm": 0.49609375, "learning_rate": 1.98220005009343e-06, "loss": 0.8314, "step": 65730 }, { "epoch": 0.94, "grad_norm": 0.62890625, "learning_rate": 1.9772429779345457e-06, "loss": 0.8738, "step": 65735 }, { "epoch": 0.94, "grad_norm": 0.59375, "learning_rate": 1.9722920499537168e-06, "loss": 0.9574, "step": 65740 }, { "epoch": 0.94, "grad_norm": 0.578125, "learning_rate": 1.96734726646125e-06, "loss": 1.0062, "step": 65745 }, { "epoch": 0.94, "grad_norm": 0.5234375, "learning_rate": 1.9624086277671206e-06, "loss": 0.8644, "step": 65750 }, { "epoch": 0.94, "grad_norm": 0.5703125, "learning_rate": 1.957476134180869e-06, "loss": 1.0087, "step": 65755 }, { "epoch": 0.94, "grad_norm": 0.470703125, "learning_rate": 1.9525497860116703e-06, "loss": 0.7952, "step": 65760 }, { "epoch": 0.94, "grad_norm": 0.4765625, "learning_rate": 1.9476295835683223e-06, "loss": 0.9089, "step": 65765 }, { "epoch": 0.94, "grad_norm": 0.50390625, "learning_rate": 1.9427155271592224e-06, "loss": 0.8125, "step": 65770 }, { "epoch": 0.94, "grad_norm": 0.63671875, "learning_rate": 1.937807617092391e-06, "loss": 1.1625, "step": 65775 }, { "epoch": 0.94, "grad_norm": 0.71484375, "learning_rate": 1.9329058536754598e-06, "loss": 0.9517, "step": 65780 }, { "epoch": 0.94, "grad_norm": 0.625, "learning_rate": 1.9280102372156827e-06, "loss": 0.8917, "step": 65785 }, { "epoch": 0.94, "grad_norm": 0.54296875, "learning_rate": 1.923120768019926e-06, "loss": 0.9509, "step": 65790 }, { "epoch": 0.94, "grad_norm": 0.57421875, "learning_rate": 1.918237446394644e-06, "loss": 1.0978, "step": 65795 }, { "epoch": 0.94, "grad_norm": 0.498046875, "learning_rate": 1.913360272645959e-06, "loss": 0.8706, "step": 65800 }, { "epoch": 0.94, "grad_norm": 0.55078125, "learning_rate": 1.908489247079548e-06, "loss": 0.9537, "step": 65805 }, { "epoch": 0.94, "grad_norm": 0.5625, "learning_rate": 1.9036243700007562e-06, "loss": 0.8734, "step": 65810 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 1.8987656417145173e-06, "loss": 1.0161, "step": 65815 }, { "epoch": 0.94, "grad_norm": 0.53515625, "learning_rate": 1.8939130625253653e-06, "loss": 1.0725, "step": 65820 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 1.8890666327374795e-06, "loss": 0.8345, "step": 65825 }, { "epoch": 0.94, "grad_norm": 0.486328125, "learning_rate": 1.884226352654639e-06, "loss": 0.912, "step": 65830 }, { "epoch": 0.94, "grad_norm": 0.58203125, "learning_rate": 1.879392222580234e-06, "loss": 1.0161, "step": 65835 }, { "epoch": 0.94, "grad_norm": 0.58203125, "learning_rate": 1.8745642428172783e-06, "loss": 0.7901, "step": 65840 }, { "epoch": 0.94, "grad_norm": 0.58203125, "learning_rate": 1.8697424136683961e-06, "loss": 1.0204, "step": 65845 }, { "epoch": 0.94, "grad_norm": 0.5703125, "learning_rate": 1.8649267354358012e-06, "loss": 0.9864, "step": 65850 }, { "epoch": 0.94, "grad_norm": 0.57421875, "learning_rate": 1.8601172084213747e-06, "loss": 0.8536, "step": 65855 }, { "epoch": 0.94, "grad_norm": 0.53515625, "learning_rate": 1.855313832926575e-06, "loss": 0.9017, "step": 65860 }, { "epoch": 0.94, "grad_norm": 0.58203125, "learning_rate": 1.850516609252484e-06, "loss": 1.0248, "step": 65865 }, { "epoch": 0.94, "grad_norm": 0.4921875, "learning_rate": 1.8457255376997828e-06, "loss": 0.915, "step": 65870 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 1.8409406185688093e-06, "loss": 0.8835, "step": 65875 }, { "epoch": 0.95, "grad_norm": 0.60546875, "learning_rate": 1.8361618521594571e-06, "loss": 1.0339, "step": 65880 }, { "epoch": 0.95, "grad_norm": 0.63671875, "learning_rate": 1.831389238771286e-06, "loss": 1.0657, "step": 65885 }, { "epoch": 0.95, "grad_norm": 0.59375, "learning_rate": 1.8266227787034463e-06, "loss": 0.9807, "step": 65890 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.8218624722547095e-06, "loss": 0.9648, "step": 65895 }, { "epoch": 0.95, "grad_norm": 0.54296875, "learning_rate": 1.8171083197234262e-06, "loss": 0.9703, "step": 65900 }, { "epoch": 0.95, "grad_norm": 0.54296875, "learning_rate": 1.8123603214076246e-06, "loss": 0.8993, "step": 65905 }, { "epoch": 0.95, "grad_norm": 0.58984375, "learning_rate": 1.807618477604911e-06, "loss": 1.0617, "step": 65910 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.8028827886124922e-06, "loss": 0.9137, "step": 65915 }, { "epoch": 0.95, "grad_norm": 0.5859375, "learning_rate": 1.7981532547272195e-06, "loss": 0.8972, "step": 65920 }, { "epoch": 0.95, "grad_norm": 0.51171875, "learning_rate": 1.793429876245545e-06, "loss": 0.8466, "step": 65925 }, { "epoch": 0.95, "grad_norm": 0.54296875, "learning_rate": 1.7887126534635311e-06, "loss": 1.1436, "step": 65930 }, { "epoch": 0.95, "grad_norm": 0.5390625, "learning_rate": 1.7840015866768645e-06, "loss": 0.8009, "step": 65935 }, { "epoch": 0.95, "grad_norm": 0.54296875, "learning_rate": 1.7792966761808416e-06, "loss": 0.9147, "step": 65940 }, { "epoch": 0.95, "grad_norm": 0.6484375, "learning_rate": 1.7745979222703602e-06, "loss": 0.9281, "step": 65945 }, { "epoch": 0.95, "grad_norm": 0.5703125, "learning_rate": 1.769905325239951e-06, "loss": 1.0265, "step": 65950 }, { "epoch": 0.95, "grad_norm": 0.5546875, "learning_rate": 1.765218885383757e-06, "loss": 0.8913, "step": 65955 }, { "epoch": 0.95, "grad_norm": 0.498046875, "learning_rate": 1.7605386029955205e-06, "loss": 0.9417, "step": 65960 }, { "epoch": 0.95, "grad_norm": 0.578125, "learning_rate": 1.7558644783685962e-06, "loss": 0.9741, "step": 65965 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.751196511795994e-06, "loss": 0.8871, "step": 65970 }, { "epoch": 0.95, "grad_norm": 0.6328125, "learning_rate": 1.7465347035702795e-06, "loss": 0.8722, "step": 65975 }, { "epoch": 0.95, "grad_norm": 0.55859375, "learning_rate": 1.741879053983675e-06, "loss": 0.9352, "step": 65980 }, { "epoch": 0.95, "grad_norm": 0.51953125, "learning_rate": 1.7372295633280023e-06, "loss": 0.8443, "step": 65985 }, { "epoch": 0.95, "grad_norm": 0.58984375, "learning_rate": 1.7325862318946951e-06, "loss": 1.0828, "step": 65990 }, { "epoch": 0.95, "grad_norm": 0.58984375, "learning_rate": 1.727949059974787e-06, "loss": 0.8578, "step": 65995 }, { "epoch": 0.95, "grad_norm": 0.6015625, "learning_rate": 1.7233180478589683e-06, "loss": 0.886, "step": 66000 }, { "epoch": 0.95, "grad_norm": 0.65625, "learning_rate": 1.7186931958374953e-06, "loss": 1.0852, "step": 66005 }, { "epoch": 0.95, "grad_norm": 0.5625, "learning_rate": 1.7140745042002694e-06, "loss": 0.9812, "step": 66010 }, { "epoch": 0.95, "grad_norm": 0.578125, "learning_rate": 1.7094619732367812e-06, "loss": 0.923, "step": 66015 }, { "epoch": 0.95, "grad_norm": 0.52734375, "learning_rate": 1.7048556032361773e-06, "loss": 0.9416, "step": 66020 }, { "epoch": 0.95, "grad_norm": 0.5703125, "learning_rate": 1.7002553944871602e-06, "loss": 0.9781, "step": 66025 }, { "epoch": 0.95, "grad_norm": 0.53515625, "learning_rate": 1.6956613472780881e-06, "loss": 0.8714, "step": 66030 }, { "epoch": 0.95, "grad_norm": 0.62890625, "learning_rate": 1.6910734618969304e-06, "loss": 0.8347, "step": 66035 }, { "epoch": 0.95, "grad_norm": 0.49609375, "learning_rate": 1.6864917386312462e-06, "loss": 0.8215, "step": 66040 }, { "epoch": 0.95, "grad_norm": 0.55859375, "learning_rate": 1.681916177768228e-06, "loss": 0.9532, "step": 66045 }, { "epoch": 0.95, "grad_norm": 0.56640625, "learning_rate": 1.6773467795946796e-06, "loss": 0.9491, "step": 66050 }, { "epoch": 0.95, "grad_norm": 0.5859375, "learning_rate": 1.6727835443970275e-06, "loss": 1.0347, "step": 66055 }, { "epoch": 0.95, "grad_norm": 0.53515625, "learning_rate": 1.6682264724612762e-06, "loss": 0.9638, "step": 66060 }, { "epoch": 0.95, "grad_norm": 0.53515625, "learning_rate": 1.6636755640730862e-06, "loss": 0.7433, "step": 66065 }, { "epoch": 0.95, "grad_norm": 0.4765625, "learning_rate": 1.6591308195176958e-06, "loss": 0.9629, "step": 66070 }, { "epoch": 0.95, "grad_norm": 0.63671875, "learning_rate": 1.6545922390799994e-06, "loss": 0.9322, "step": 66075 }, { "epoch": 0.95, "grad_norm": 0.5, "learning_rate": 1.6500598230444586e-06, "loss": 0.7957, "step": 66080 }, { "epoch": 0.95, "grad_norm": 0.53125, "learning_rate": 1.6455335716951793e-06, "loss": 0.9208, "step": 66085 }, { "epoch": 0.95, "grad_norm": 0.54296875, "learning_rate": 1.6410134853158786e-06, "loss": 0.9595, "step": 66090 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.636499564189875e-06, "loss": 0.8989, "step": 66095 }, { "epoch": 0.95, "grad_norm": 0.4921875, "learning_rate": 1.6319918086000974e-06, "loss": 0.8626, "step": 66100 }, { "epoch": 0.95, "grad_norm": 0.55859375, "learning_rate": 1.6274902188291197e-06, "loss": 0.8508, "step": 66105 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.6229947951590718e-06, "loss": 0.9814, "step": 66110 }, { "epoch": 0.95, "grad_norm": 0.58203125, "learning_rate": 1.6185055378717728e-06, "loss": 0.954, "step": 66115 }, { "epoch": 0.95, "grad_norm": 0.5859375, "learning_rate": 1.6140224472485755e-06, "loss": 1.0494, "step": 66120 }, { "epoch": 0.95, "grad_norm": 0.51953125, "learning_rate": 1.6095455235705104e-06, "loss": 0.913, "step": 66125 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.6050747671181864e-06, "loss": 0.9894, "step": 66130 }, { "epoch": 0.95, "grad_norm": 0.515625, "learning_rate": 1.6006101781718463e-06, "loss": 0.7572, "step": 66135 }, { "epoch": 0.95, "grad_norm": 0.5703125, "learning_rate": 1.5961517570113215e-06, "loss": 1.012, "step": 66140 }, { "epoch": 0.95, "grad_norm": 0.56640625, "learning_rate": 1.5916995039160775e-06, "loss": 0.8833, "step": 66145 }, { "epoch": 0.95, "grad_norm": 0.58203125, "learning_rate": 1.58725341916518e-06, "loss": 1.0355, "step": 66150 }, { "epoch": 0.95, "grad_norm": 0.45703125, "learning_rate": 1.5828135030373282e-06, "loss": 0.8036, "step": 66155 }, { "epoch": 0.95, "grad_norm": 0.58984375, "learning_rate": 1.5783797558108326e-06, "loss": 0.9307, "step": 66160 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.5739521777635601e-06, "loss": 0.9648, "step": 66165 }, { "epoch": 0.95, "grad_norm": 0.5703125, "learning_rate": 1.5695307691730665e-06, "loss": 0.9466, "step": 66170 }, { "epoch": 0.95, "grad_norm": 0.58203125, "learning_rate": 1.5651155303164967e-06, "loss": 1.1032, "step": 66175 }, { "epoch": 0.95, "grad_norm": 0.6484375, "learning_rate": 1.5607064614705846e-06, "loss": 0.763, "step": 66180 }, { "epoch": 0.95, "grad_norm": 0.68359375, "learning_rate": 1.5563035629117095e-06, "loss": 0.9432, "step": 66185 }, { "epoch": 0.95, "grad_norm": 0.5546875, "learning_rate": 1.5519068349158505e-06, "loss": 0.8592, "step": 66190 }, { "epoch": 0.95, "grad_norm": 0.56640625, "learning_rate": 1.547516277758576e-06, "loss": 1.0989, "step": 66195 }, { "epoch": 0.95, "grad_norm": 0.5703125, "learning_rate": 1.5431318917151216e-06, "loss": 0.9865, "step": 66200 }, { "epoch": 0.95, "grad_norm": 0.5546875, "learning_rate": 1.5387536770603007e-06, "loss": 0.7946, "step": 66205 }, { "epoch": 0.95, "grad_norm": 0.6015625, "learning_rate": 1.5343816340685268e-06, "loss": 0.9329, "step": 66210 }, { "epoch": 0.95, "grad_norm": 0.6015625, "learning_rate": 1.530015763013859e-06, "loss": 1.0905, "step": 66215 }, { "epoch": 0.95, "grad_norm": 0.62890625, "learning_rate": 1.5256560641699446e-06, "loss": 1.2853, "step": 66220 }, { "epoch": 0.95, "grad_norm": 0.515625, "learning_rate": 1.5213025378100654e-06, "loss": 0.9044, "step": 66225 }, { "epoch": 0.95, "grad_norm": 0.65234375, "learning_rate": 1.5169551842071028e-06, "loss": 0.838, "step": 66230 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.5126140036335502e-06, "loss": 0.9923, "step": 66235 }, { "epoch": 0.95, "grad_norm": 0.5625, "learning_rate": 1.5082789963615228e-06, "loss": 1.1155, "step": 66240 }, { "epoch": 0.95, "grad_norm": 0.5703125, "learning_rate": 1.5039501626627372e-06, "loss": 0.8375, "step": 66245 }, { "epoch": 0.95, "grad_norm": 0.53125, "learning_rate": 1.4996275028085205e-06, "loss": 0.9562, "step": 66250 }, { "epoch": 0.95, "grad_norm": 0.62109375, "learning_rate": 1.4953110170698558e-06, "loss": 0.9143, "step": 66255 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.4910007057172714e-06, "loss": 0.8973, "step": 66260 }, { "epoch": 0.95, "grad_norm": 0.46875, "learning_rate": 1.4866965690209622e-06, "loss": 0.8352, "step": 66265 }, { "epoch": 0.95, "grad_norm": 0.57421875, "learning_rate": 1.4823986072507123e-06, "loss": 1.0024, "step": 66270 }, { "epoch": 0.95, "grad_norm": 0.53125, "learning_rate": 1.4781068206759063e-06, "loss": 0.8752, "step": 66275 }, { "epoch": 0.95, "grad_norm": 0.671875, "learning_rate": 1.4738212095655846e-06, "loss": 1.0038, "step": 66280 }, { "epoch": 0.95, "grad_norm": 0.58203125, "learning_rate": 1.4695417741883543e-06, "loss": 0.8395, "step": 66285 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.4652685148124678e-06, "loss": 0.925, "step": 66290 }, { "epoch": 0.95, "grad_norm": 0.62890625, "learning_rate": 1.4610014317057664e-06, "loss": 0.999, "step": 66295 }, { "epoch": 0.95, "grad_norm": 0.52734375, "learning_rate": 1.4567405251357246e-06, "loss": 0.9159, "step": 66300 }, { "epoch": 0.95, "grad_norm": 0.52734375, "learning_rate": 1.4524857953694183e-06, "loss": 0.8985, "step": 66305 }, { "epoch": 0.95, "grad_norm": 0.44921875, "learning_rate": 1.4482372426735337e-06, "loss": 0.8518, "step": 66310 }, { "epoch": 0.95, "grad_norm": 0.59765625, "learning_rate": 1.4439948673143799e-06, "loss": 0.967, "step": 66315 }, { "epoch": 0.95, "grad_norm": 0.6328125, "learning_rate": 1.439758669557889e-06, "loss": 0.9035, "step": 66320 }, { "epoch": 0.95, "grad_norm": 0.56640625, "learning_rate": 1.4355286496695598e-06, "loss": 0.9379, "step": 66325 }, { "epoch": 0.95, "grad_norm": 0.5546875, "learning_rate": 1.4313048079145463e-06, "loss": 1.0857, "step": 66330 }, { "epoch": 0.95, "grad_norm": 0.60546875, "learning_rate": 1.4270871445576262e-06, "loss": 0.9745, "step": 66335 }, { "epoch": 0.95, "grad_norm": 0.5703125, "learning_rate": 1.4228756598631322e-06, "loss": 0.8944, "step": 66340 }, { "epoch": 0.95, "grad_norm": 0.58984375, "learning_rate": 1.418670354095064e-06, "loss": 0.9383, "step": 66345 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.4144712275170224e-06, "loss": 0.984, "step": 66350 }, { "epoch": 0.95, "grad_norm": 0.50390625, "learning_rate": 1.4102782803921966e-06, "loss": 0.8358, "step": 66355 }, { "epoch": 0.95, "grad_norm": 0.6015625, "learning_rate": 1.4060915129834096e-06, "loss": 0.9024, "step": 66360 }, { "epoch": 0.95, "grad_norm": 0.5546875, "learning_rate": 1.401910925553107e-06, "loss": 0.9139, "step": 66365 }, { "epoch": 0.95, "grad_norm": 0.640625, "learning_rate": 1.397736518363324e-06, "loss": 0.9026, "step": 66370 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.3935682916756953e-06, "loss": 0.9661, "step": 66375 }, { "epoch": 0.95, "grad_norm": 0.609375, "learning_rate": 1.3894062457515233e-06, "loss": 0.8531, "step": 66380 }, { "epoch": 0.95, "grad_norm": 0.64453125, "learning_rate": 1.3852503808516771e-06, "loss": 0.9482, "step": 66385 }, { "epoch": 0.95, "grad_norm": 0.625, "learning_rate": 1.381100697236637e-06, "loss": 0.934, "step": 66390 }, { "epoch": 0.95, "grad_norm": 0.53125, "learning_rate": 1.3769571951665393e-06, "loss": 1.1681, "step": 66395 }, { "epoch": 0.95, "grad_norm": 0.56640625, "learning_rate": 1.3728198749010657e-06, "loss": 0.8876, "step": 66400 }, { "epoch": 0.95, "grad_norm": 0.478515625, "learning_rate": 1.368688736699586e-06, "loss": 0.8038, "step": 66405 }, { "epoch": 0.95, "grad_norm": 0.51171875, "learning_rate": 1.3645637808210266e-06, "loss": 0.9515, "step": 66410 }, { "epoch": 0.95, "grad_norm": 0.5625, "learning_rate": 1.360445007523936e-06, "loss": 0.914, "step": 66415 }, { "epoch": 0.95, "grad_norm": 0.5546875, "learning_rate": 1.3563324170665082e-06, "loss": 0.891, "step": 66420 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.3522260097064921e-06, "loss": 1.0436, "step": 66425 }, { "epoch": 0.95, "grad_norm": 0.50390625, "learning_rate": 1.3481257857013152e-06, "loss": 1.1661, "step": 66430 }, { "epoch": 0.95, "grad_norm": 0.51953125, "learning_rate": 1.3440317453079609e-06, "loss": 0.8857, "step": 66435 }, { "epoch": 0.95, "grad_norm": 0.55859375, "learning_rate": 1.3399438887830572e-06, "loss": 0.807, "step": 66440 }, { "epoch": 0.95, "grad_norm": 0.515625, "learning_rate": 1.3358622163828438e-06, "loss": 0.9729, "step": 66445 }, { "epoch": 0.95, "grad_norm": 0.50390625, "learning_rate": 1.331786728363149e-06, "loss": 0.8429, "step": 66450 }, { "epoch": 0.95, "grad_norm": 0.5546875, "learning_rate": 1.3277174249794355e-06, "loss": 0.7673, "step": 66455 }, { "epoch": 0.95, "grad_norm": 0.578125, "learning_rate": 1.3236543064867769e-06, "loss": 0.9777, "step": 66460 }, { "epoch": 0.95, "grad_norm": 0.478515625, "learning_rate": 1.319597373139836e-06, "loss": 0.9414, "step": 66465 }, { "epoch": 0.95, "grad_norm": 0.5546875, "learning_rate": 1.3155466251929316e-06, "loss": 0.8902, "step": 66470 }, { "epoch": 0.95, "grad_norm": 0.6015625, "learning_rate": 1.3115020628999498e-06, "loss": 0.8715, "step": 66475 }, { "epoch": 0.95, "grad_norm": 0.55859375, "learning_rate": 1.3074636865144206e-06, "loss": 0.9018, "step": 66480 }, { "epoch": 0.95, "grad_norm": 0.51171875, "learning_rate": 1.3034314962894756e-06, "loss": 0.8096, "step": 66485 }, { "epoch": 0.95, "grad_norm": 0.56640625, "learning_rate": 1.2994054924778342e-06, "loss": 1.0186, "step": 66490 }, { "epoch": 0.95, "grad_norm": 0.61328125, "learning_rate": 1.2953856753318838e-06, "loss": 0.9389, "step": 66495 }, { "epoch": 0.95, "grad_norm": 0.56640625, "learning_rate": 1.291372045103556e-06, "loss": 1.0381, "step": 66500 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.2873646020444608e-06, "loss": 0.8876, "step": 66505 }, { "epoch": 0.95, "grad_norm": 0.6015625, "learning_rate": 1.2833633464057748e-06, "loss": 0.9844, "step": 66510 }, { "epoch": 0.95, "grad_norm": 0.455078125, "learning_rate": 1.2793682784382977e-06, "loss": 0.9653, "step": 66515 }, { "epoch": 0.95, "grad_norm": 0.55859375, "learning_rate": 1.2753793983924622e-06, "loss": 0.9561, "step": 66520 }, { "epoch": 0.95, "grad_norm": 0.48828125, "learning_rate": 1.2713967065182797e-06, "loss": 0.9512, "step": 66525 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.2674202030653836e-06, "loss": 1.0748, "step": 66530 }, { "epoch": 0.95, "grad_norm": 0.490234375, "learning_rate": 1.2634498882830415e-06, "loss": 0.9455, "step": 66535 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.2594857624201207e-06, "loss": 1.1208, "step": 66540 }, { "epoch": 0.95, "grad_norm": 0.50390625, "learning_rate": 1.255527825725078e-06, "loss": 0.8845, "step": 66545 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.251576078446015e-06, "loss": 1.1462, "step": 66550 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.2476305208306226e-06, "loss": 0.9087, "step": 66555 }, { "epoch": 0.95, "grad_norm": 0.69921875, "learning_rate": 1.243691153126225e-06, "loss": 1.0049, "step": 66560 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.2397579755797361e-06, "loss": 0.9512, "step": 66565 }, { "epoch": 0.95, "grad_norm": 0.609375, "learning_rate": 1.2358309884377029e-06, "loss": 0.9347, "step": 66570 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.2319101919462616e-06, "loss": 0.9896, "step": 66575 }, { "epoch": 0.96, "grad_norm": 0.58203125, "learning_rate": 1.2279955863511826e-06, "loss": 0.9109, "step": 66580 }, { "epoch": 0.96, "grad_norm": 0.54296875, "learning_rate": 1.2240871718978253e-06, "loss": 0.9015, "step": 66585 }, { "epoch": 0.96, "grad_norm": 0.5625, "learning_rate": 1.2201849488311822e-06, "loss": 0.9944, "step": 66590 }, { "epoch": 0.96, "grad_norm": 0.56640625, "learning_rate": 1.216288917395847e-06, "loss": 1.1017, "step": 66595 }, { "epoch": 0.96, "grad_norm": 0.625, "learning_rate": 1.2123990778360238e-06, "loss": 0.9436, "step": 66600 }, { "epoch": 0.96, "grad_norm": 0.5703125, "learning_rate": 1.2085154303955515e-06, "loss": 0.9388, "step": 66605 }, { "epoch": 0.96, "grad_norm": 0.52734375, "learning_rate": 1.2046379753178238e-06, "loss": 0.912, "step": 66610 }, { "epoch": 0.96, "grad_norm": 0.5390625, "learning_rate": 1.2007667128459133e-06, "loss": 0.9379, "step": 66615 }, { "epoch": 0.96, "grad_norm": 0.515625, "learning_rate": 1.196901643222481e-06, "loss": 0.9229, "step": 66620 }, { "epoch": 0.96, "grad_norm": 0.546875, "learning_rate": 1.193042766689767e-06, "loss": 1.0321, "step": 66625 }, { "epoch": 0.96, "grad_norm": 0.5546875, "learning_rate": 1.1891900834896775e-06, "loss": 0.9575, "step": 66630 }, { "epoch": 0.96, "grad_norm": 0.59375, "learning_rate": 1.1853435938636858e-06, "loss": 1.1775, "step": 66635 }, { "epoch": 0.96, "grad_norm": 0.65234375, "learning_rate": 1.1815032980528885e-06, "loss": 0.9579, "step": 66640 }, { "epoch": 0.96, "grad_norm": 0.5078125, "learning_rate": 1.1776691962980146e-06, "loss": 0.9174, "step": 66645 }, { "epoch": 0.96, "grad_norm": 0.953125, "learning_rate": 1.1738412888393835e-06, "loss": 0.9915, "step": 66650 }, { "epoch": 0.96, "grad_norm": 0.55078125, "learning_rate": 1.1700195759169364e-06, "loss": 0.9272, "step": 66655 }, { "epoch": 0.96, "grad_norm": 0.5234375, "learning_rate": 1.1662040577702148e-06, "loss": 0.9615, "step": 66660 }, { "epoch": 0.96, "grad_norm": 0.498046875, "learning_rate": 1.1623947346383946e-06, "loss": 0.9528, "step": 66665 }, { "epoch": 0.96, "grad_norm": 0.462890625, "learning_rate": 1.1585916067602286e-06, "loss": 0.8361, "step": 66670 }, { "epoch": 0.96, "grad_norm": 0.578125, "learning_rate": 1.1547946743741155e-06, "loss": 1.0168, "step": 66675 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 1.1510039377180536e-06, "loss": 0.9082, "step": 66680 }, { "epoch": 0.96, "grad_norm": 0.498046875, "learning_rate": 1.1472193970296419e-06, "loss": 0.9601, "step": 66685 }, { "epoch": 0.96, "grad_norm": 0.56640625, "learning_rate": 1.1434410525461014e-06, "loss": 0.9541, "step": 66690 }, { "epoch": 0.96, "grad_norm": 0.5625, "learning_rate": 1.139668904504254e-06, "loss": 1.0303, "step": 66695 }, { "epoch": 0.96, "grad_norm": 0.61328125, "learning_rate": 1.1359029531405662e-06, "loss": 0.8633, "step": 66700 }, { "epoch": 0.96, "grad_norm": 0.515625, "learning_rate": 1.1321431986910712e-06, "loss": 0.9686, "step": 66705 }, { "epoch": 0.96, "grad_norm": 0.5703125, "learning_rate": 1.1283896413914362e-06, "loss": 0.9233, "step": 66710 }, { "epoch": 0.96, "grad_norm": 0.6640625, "learning_rate": 1.1246422814769509e-06, "loss": 1.0196, "step": 66715 }, { "epoch": 0.96, "grad_norm": 0.5, "learning_rate": 1.1209011191824935e-06, "loss": 0.9566, "step": 66720 }, { "epoch": 0.96, "grad_norm": 0.546875, "learning_rate": 1.1171661547425772e-06, "loss": 0.9021, "step": 66725 }, { "epoch": 0.96, "grad_norm": 0.5, "learning_rate": 1.1134373883913029e-06, "loss": 0.8414, "step": 66730 }, { "epoch": 0.96, "grad_norm": 0.58984375, "learning_rate": 1.1097148203623841e-06, "loss": 1.0286, "step": 66735 }, { "epoch": 0.96, "grad_norm": 0.6953125, "learning_rate": 1.1059984508891785e-06, "loss": 0.9377, "step": 66740 }, { "epoch": 0.96, "grad_norm": 0.57421875, "learning_rate": 1.1022882802046218e-06, "loss": 0.9322, "step": 66745 }, { "epoch": 0.96, "grad_norm": 0.55078125, "learning_rate": 1.0985843085412617e-06, "loss": 0.9119, "step": 66750 }, { "epoch": 0.96, "grad_norm": 0.50390625, "learning_rate": 1.0948865361312676e-06, "loss": 1.0042, "step": 66755 }, { "epoch": 0.96, "grad_norm": 0.546875, "learning_rate": 1.0911949632064434e-06, "loss": 0.9995, "step": 66760 }, { "epoch": 0.96, "grad_norm": 0.58203125, "learning_rate": 1.0875095899981702e-06, "loss": 1.1144, "step": 66765 }, { "epoch": 0.96, "grad_norm": 0.5546875, "learning_rate": 1.0838304167374302e-06, "loss": 0.8735, "step": 66770 }, { "epoch": 0.96, "grad_norm": 0.5625, "learning_rate": 1.0801574436548722e-06, "loss": 0.8851, "step": 66775 }, { "epoch": 0.96, "grad_norm": 0.51953125, "learning_rate": 1.076490670980701e-06, "loss": 0.9889, "step": 66780 }, { "epoch": 0.96, "grad_norm": 0.5390625, "learning_rate": 1.0728300989447548e-06, "loss": 1.0086, "step": 66785 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 1.0691757277764946e-06, "loss": 0.995, "step": 66790 }, { "epoch": 0.96, "grad_norm": 0.55859375, "learning_rate": 1.0655275577049706e-06, "loss": 0.9718, "step": 66795 }, { "epoch": 0.96, "grad_norm": 0.6484375, "learning_rate": 1.0618855889588552e-06, "loss": 0.9534, "step": 66800 }, { "epoch": 0.96, "grad_norm": 0.6015625, "learning_rate": 1.0582498217664215e-06, "loss": 1.0658, "step": 66805 }, { "epoch": 0.96, "grad_norm": 0.54296875, "learning_rate": 1.0546202563555874e-06, "loss": 0.9804, "step": 66810 }, { "epoch": 0.96, "grad_norm": 0.60546875, "learning_rate": 1.0509968929538484e-06, "loss": 1.0017, "step": 66815 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 1.0473797317883005e-06, "loss": 0.9555, "step": 66820 }, { "epoch": 0.96, "grad_norm": 0.5859375, "learning_rate": 1.043768773085707e-06, "loss": 1.0213, "step": 66825 }, { "epoch": 0.96, "grad_norm": 0.498046875, "learning_rate": 1.0401640170723758e-06, "loss": 0.9763, "step": 66830 }, { "epoch": 0.96, "grad_norm": 0.60546875, "learning_rate": 1.0365654639742706e-06, "loss": 1.0952, "step": 66835 }, { "epoch": 0.96, "grad_norm": 0.4765625, "learning_rate": 1.0329731140169663e-06, "loss": 1.2155, "step": 66840 }, { "epoch": 0.96, "grad_norm": 0.546875, "learning_rate": 1.0293869674256051e-06, "loss": 0.8288, "step": 66845 }, { "epoch": 0.96, "grad_norm": 0.56640625, "learning_rate": 1.025807024424985e-06, "loss": 0.9911, "step": 66850 }, { "epoch": 0.96, "grad_norm": 0.5390625, "learning_rate": 1.0222332852395156e-06, "loss": 0.9902, "step": 66855 }, { "epoch": 0.96, "grad_norm": 0.55078125, "learning_rate": 1.018665750093184e-06, "loss": 0.83, "step": 66860 }, { "epoch": 0.96, "grad_norm": 0.58203125, "learning_rate": 1.0151044192096115e-06, "loss": 0.8954, "step": 66865 }, { "epoch": 0.96, "grad_norm": 0.55078125, "learning_rate": 1.0115492928120306e-06, "loss": 1.0404, "step": 66870 }, { "epoch": 0.96, "grad_norm": 0.5234375, "learning_rate": 1.0080003711232743e-06, "loss": 0.8523, "step": 66875 }, { "epoch": 0.96, "grad_norm": 0.58984375, "learning_rate": 1.0044576543658092e-06, "loss": 0.97, "step": 66880 }, { "epoch": 0.96, "grad_norm": 0.48828125, "learning_rate": 1.0009211427616794e-06, "loss": 0.8799, "step": 66885 }, { "epoch": 0.96, "grad_norm": 0.58984375, "learning_rate": 9.973908365325525e-07, "loss": 1.0422, "step": 66890 }, { "epoch": 0.96, "grad_norm": 0.5859375, "learning_rate": 9.9386673589974e-07, "loss": 1.0017, "step": 66895 }, { "epoch": 0.96, "grad_norm": 0.546875, "learning_rate": 9.903488410840988e-07, "loss": 0.9493, "step": 66900 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 9.868371523061748e-07, "loss": 0.9286, "step": 66905 }, { "epoch": 0.96, "grad_norm": 0.5546875, "learning_rate": 9.833316697860474e-07, "loss": 0.9287, "step": 66910 }, { "epoch": 0.96, "grad_norm": 0.486328125, "learning_rate": 9.798323937434739e-07, "loss": 0.9732, "step": 66915 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 9.763393243977792e-07, "loss": 0.9871, "step": 66920 }, { "epoch": 0.96, "grad_norm": 0.5703125, "learning_rate": 9.728524619678991e-07, "loss": 0.8952, "step": 66925 }, { "epoch": 0.96, "grad_norm": 0.6796875, "learning_rate": 9.693718066724256e-07, "loss": 0.9673, "step": 66930 }, { "epoch": 0.96, "grad_norm": 0.68359375, "learning_rate": 9.658973587295062e-07, "loss": 0.8788, "step": 66935 }, { "epoch": 0.96, "grad_norm": 0.546875, "learning_rate": 9.624291183569334e-07, "loss": 0.8784, "step": 66940 }, { "epoch": 0.96, "grad_norm": 0.48828125, "learning_rate": 9.589670857721e-07, "loss": 1.0006, "step": 66945 }, { "epoch": 0.96, "grad_norm": 0.515625, "learning_rate": 9.555112611920104e-07, "loss": 0.9158, "step": 66950 }, { "epoch": 0.96, "grad_norm": 0.52734375, "learning_rate": 9.520616448332686e-07, "loss": 0.9912, "step": 66955 }, { "epoch": 0.96, "grad_norm": 0.57421875, "learning_rate": 9.486182369121132e-07, "loss": 0.9214, "step": 66960 }, { "epoch": 0.96, "grad_norm": 0.52734375, "learning_rate": 9.451810376443826e-07, "loss": 1.0211, "step": 66965 }, { "epoch": 0.96, "grad_norm": 0.48828125, "learning_rate": 9.417500472455043e-07, "loss": 0.9691, "step": 66970 }, { "epoch": 0.96, "grad_norm": 0.5859375, "learning_rate": 9.383252659305619e-07, "loss": 1.106, "step": 66975 }, { "epoch": 0.96, "grad_norm": 0.54296875, "learning_rate": 9.34906693914217e-07, "loss": 0.9614, "step": 66980 }, { "epoch": 0.96, "grad_norm": 0.59375, "learning_rate": 9.314943314107316e-07, "loss": 0.981, "step": 66985 }, { "epoch": 0.96, "grad_norm": 0.54296875, "learning_rate": 9.280881786340124e-07, "loss": 0.894, "step": 66990 }, { "epoch": 0.96, "grad_norm": 0.58984375, "learning_rate": 9.246882357975772e-07, "loss": 0.8769, "step": 66995 }, { "epoch": 0.96, "grad_norm": 0.74609375, "learning_rate": 9.212945031145115e-07, "loss": 1.0055, "step": 67000 }, { "epoch": 0.96, "grad_norm": 0.498046875, "learning_rate": 9.179069807975449e-07, "loss": 0.8045, "step": 67005 }, { "epoch": 0.96, "grad_norm": 0.57421875, "learning_rate": 9.145256690590076e-07, "loss": 1.0224, "step": 67010 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 9.111505681108634e-07, "loss": 0.907, "step": 67015 }, { "epoch": 0.96, "grad_norm": 0.55859375, "learning_rate": 9.077816781646431e-07, "loss": 1.1019, "step": 67020 }, { "epoch": 0.96, "grad_norm": 0.56640625, "learning_rate": 9.044189994315333e-07, "loss": 0.8189, "step": 67025 }, { "epoch": 0.96, "grad_norm": 0.55859375, "learning_rate": 9.010625321222987e-07, "loss": 1.0068, "step": 67030 }, { "epoch": 0.96, "grad_norm": 0.625, "learning_rate": 8.977122764473267e-07, "loss": 0.916, "step": 67035 }, { "epoch": 0.96, "grad_norm": 0.5703125, "learning_rate": 8.943682326166159e-07, "loss": 0.9092, "step": 67040 }, { "epoch": 0.96, "grad_norm": 0.625, "learning_rate": 8.910304008397763e-07, "loss": 0.9385, "step": 67045 }, { "epoch": 0.96, "grad_norm": 0.58984375, "learning_rate": 8.876987813260407e-07, "loss": 0.968, "step": 67050 }, { "epoch": 0.96, "grad_norm": 0.58203125, "learning_rate": 8.843733742842086e-07, "loss": 1.0223, "step": 67055 }, { "epoch": 0.96, "grad_norm": 0.5625, "learning_rate": 8.810541799227356e-07, "loss": 0.9759, "step": 67060 }, { "epoch": 0.96, "grad_norm": 0.52734375, "learning_rate": 8.777411984496775e-07, "loss": 0.9314, "step": 67065 }, { "epoch": 0.96, "grad_norm": 0.453125, "learning_rate": 8.744344300726904e-07, "loss": 0.9452, "step": 67070 }, { "epoch": 0.96, "grad_norm": 0.55078125, "learning_rate": 8.711338749990528e-07, "loss": 0.8704, "step": 67075 }, { "epoch": 0.96, "grad_norm": 0.55859375, "learning_rate": 8.678395334356437e-07, "loss": 0.8862, "step": 67080 }, { "epoch": 0.96, "grad_norm": 0.55078125, "learning_rate": 8.645514055889425e-07, "loss": 1.1477, "step": 67085 }, { "epoch": 0.96, "grad_norm": 0.6015625, "learning_rate": 8.612694916650843e-07, "loss": 0.9368, "step": 67090 }, { "epoch": 0.96, "grad_norm": 0.515625, "learning_rate": 8.579937918697489e-07, "loss": 0.9919, "step": 67095 }, { "epoch": 0.96, "grad_norm": 0.6171875, "learning_rate": 8.547243064082721e-07, "loss": 0.9392, "step": 67100 }, { "epoch": 0.96, "grad_norm": 0.5078125, "learning_rate": 8.514610354856123e-07, "loss": 0.8963, "step": 67105 }, { "epoch": 0.96, "grad_norm": 0.515625, "learning_rate": 8.482039793062835e-07, "loss": 0.9462, "step": 67110 }, { "epoch": 0.96, "grad_norm": 0.5234375, "learning_rate": 8.449531380744558e-07, "loss": 0.9711, "step": 67115 }, { "epoch": 0.96, "grad_norm": 0.57421875, "learning_rate": 8.417085119938883e-07, "loss": 0.9606, "step": 67120 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 8.38470101267963e-07, "loss": 1.0648, "step": 67125 }, { "epoch": 0.96, "grad_norm": 0.59375, "learning_rate": 8.352379060996729e-07, "loss": 0.9065, "step": 67130 }, { "epoch": 0.96, "grad_norm": 0.57421875, "learning_rate": 8.320119266916115e-07, "loss": 0.9925, "step": 67135 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 8.287921632459838e-07, "loss": 0.8692, "step": 67140 }, { "epoch": 0.96, "grad_norm": 0.578125, "learning_rate": 8.255786159646062e-07, "loss": 1.0614, "step": 67145 }, { "epoch": 0.96, "grad_norm": 0.60546875, "learning_rate": 8.223712850489063e-07, "loss": 0.8723, "step": 67150 }, { "epoch": 0.96, "grad_norm": 0.5546875, "learning_rate": 8.191701706999344e-07, "loss": 0.9797, "step": 67155 }, { "epoch": 0.96, "grad_norm": 0.58203125, "learning_rate": 8.159752731183412e-07, "loss": 0.8765, "step": 67160 }, { "epoch": 0.96, "grad_norm": 0.56640625, "learning_rate": 8.127865925043665e-07, "loss": 0.8819, "step": 67165 }, { "epoch": 0.96, "grad_norm": 0.59375, "learning_rate": 8.096041290578948e-07, "loss": 1.06, "step": 67170 }, { "epoch": 0.96, "grad_norm": 0.48046875, "learning_rate": 8.064278829784111e-07, "loss": 1.0899, "step": 67175 }, { "epoch": 0.96, "grad_norm": 0.55078125, "learning_rate": 8.032578544650004e-07, "loss": 1.0346, "step": 67180 }, { "epoch": 0.96, "grad_norm": 0.51171875, "learning_rate": 8.000940437163595e-07, "loss": 0.9365, "step": 67185 }, { "epoch": 0.96, "grad_norm": 0.6015625, "learning_rate": 7.969364509308075e-07, "loss": 0.9604, "step": 67190 }, { "epoch": 0.96, "grad_norm": 0.56640625, "learning_rate": 7.937850763062527e-07, "loss": 1.0253, "step": 67195 }, { "epoch": 0.96, "grad_norm": 0.58984375, "learning_rate": 7.90639920040237e-07, "loss": 1.0417, "step": 67200 }, { "epoch": 0.96, "grad_norm": 0.55078125, "learning_rate": 7.875009823299029e-07, "loss": 0.7986, "step": 67205 }, { "epoch": 0.96, "grad_norm": 0.59765625, "learning_rate": 7.84368263372004e-07, "loss": 0.8278, "step": 67210 }, { "epoch": 0.96, "grad_norm": 0.55859375, "learning_rate": 7.812417633628943e-07, "loss": 0.9306, "step": 67215 }, { "epoch": 0.96, "grad_norm": 0.56640625, "learning_rate": 7.781214824985617e-07, "loss": 0.9969, "step": 67220 }, { "epoch": 0.96, "grad_norm": 0.478515625, "learning_rate": 7.750074209745717e-07, "loss": 0.8955, "step": 67225 }, { "epoch": 0.96, "grad_norm": 0.51171875, "learning_rate": 7.718995789861238e-07, "loss": 0.9978, "step": 67230 }, { "epoch": 0.96, "grad_norm": 0.6328125, "learning_rate": 7.68797956728029e-07, "loss": 0.9923, "step": 67235 }, { "epoch": 0.96, "grad_norm": 0.58984375, "learning_rate": 7.65702554394676e-07, "loss": 1.0853, "step": 67240 }, { "epoch": 0.96, "grad_norm": 0.5078125, "learning_rate": 7.62613372180121e-07, "loss": 0.9365, "step": 67245 }, { "epoch": 0.96, "grad_norm": 0.55859375, "learning_rate": 7.595304102779754e-07, "loss": 0.8599, "step": 67250 }, { "epoch": 0.96, "grad_norm": 0.515625, "learning_rate": 7.564536688814849e-07, "loss": 1.0178, "step": 67255 }, { "epoch": 0.96, "grad_norm": 0.56640625, "learning_rate": 7.533831481835174e-07, "loss": 0.9237, "step": 67260 }, { "epoch": 0.96, "grad_norm": 0.439453125, "learning_rate": 7.503188483765189e-07, "loss": 1.0412, "step": 67265 }, { "epoch": 0.96, "grad_norm": 0.490234375, "learning_rate": 7.472607696525801e-07, "loss": 0.848, "step": 67270 }, { "epoch": 0.97, "grad_norm": 0.6484375, "learning_rate": 7.442089122033702e-07, "loss": 0.8946, "step": 67275 }, { "epoch": 0.97, "grad_norm": 0.5234375, "learning_rate": 7.411632762201804e-07, "loss": 0.8829, "step": 67280 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 7.381238618939357e-07, "loss": 1.0094, "step": 67285 }, { "epoch": 0.97, "grad_norm": 0.609375, "learning_rate": 7.350906694151283e-07, "loss": 1.258, "step": 67290 }, { "epoch": 0.97, "grad_norm": 0.5078125, "learning_rate": 7.32063698973895e-07, "loss": 0.8682, "step": 67295 }, { "epoch": 0.97, "grad_norm": 0.57421875, "learning_rate": 7.290429507599616e-07, "loss": 0.9659, "step": 67300 }, { "epoch": 0.97, "grad_norm": 0.640625, "learning_rate": 7.26028424962677e-07, "loss": 0.9875, "step": 67305 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 7.230201217710009e-07, "loss": 0.8878, "step": 67310 }, { "epoch": 0.97, "grad_norm": 0.5078125, "learning_rate": 7.200180413734936e-07, "loss": 0.9329, "step": 67315 }, { "epoch": 0.97, "grad_norm": 0.56640625, "learning_rate": 7.170221839583158e-07, "loss": 0.8614, "step": 67320 }, { "epoch": 0.97, "grad_norm": 0.5234375, "learning_rate": 7.140325497132616e-07, "loss": 0.9333, "step": 67325 }, { "epoch": 0.97, "grad_norm": 0.546875, "learning_rate": 7.110491388257367e-07, "loss": 0.7535, "step": 67330 }, { "epoch": 0.97, "grad_norm": 0.515625, "learning_rate": 7.080719514827139e-07, "loss": 1.0076, "step": 67335 }, { "epoch": 0.97, "grad_norm": 0.6796875, "learning_rate": 7.051009878708326e-07, "loss": 0.916, "step": 67340 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 7.021362481763106e-07, "loss": 0.8847, "step": 67345 }, { "epoch": 0.97, "grad_norm": 0.51953125, "learning_rate": 6.991777325849769e-07, "loss": 0.981, "step": 67350 }, { "epoch": 0.97, "grad_norm": 0.59375, "learning_rate": 6.962254412822833e-07, "loss": 0.9348, "step": 67355 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 6.932793744532707e-07, "loss": 0.9446, "step": 67360 }, { "epoch": 0.97, "grad_norm": 0.55078125, "learning_rate": 6.903395322826134e-07, "loss": 0.9172, "step": 67365 }, { "epoch": 0.97, "grad_norm": 0.5, "learning_rate": 6.874059149545753e-07, "loss": 0.7879, "step": 67370 }, { "epoch": 0.97, "grad_norm": 0.6171875, "learning_rate": 6.844785226530426e-07, "loss": 0.9321, "step": 67375 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 6.815573555615018e-07, "loss": 0.8774, "step": 67380 }, { "epoch": 0.97, "grad_norm": 0.53515625, "learning_rate": 6.78642413863062e-07, "loss": 0.8636, "step": 67385 }, { "epoch": 0.97, "grad_norm": 0.65234375, "learning_rate": 6.757336977404327e-07, "loss": 0.9958, "step": 67390 }, { "epoch": 0.97, "grad_norm": 0.447265625, "learning_rate": 6.728312073759458e-07, "loss": 0.9543, "step": 67395 }, { "epoch": 0.97, "grad_norm": 0.51953125, "learning_rate": 6.699349429515111e-07, "loss": 0.8073, "step": 67400 }, { "epoch": 0.97, "grad_norm": 0.45703125, "learning_rate": 6.670449046486837e-07, "loss": 0.7714, "step": 67405 }, { "epoch": 0.97, "grad_norm": 0.5625, "learning_rate": 6.641610926486186e-07, "loss": 1.0004, "step": 67410 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 6.612835071320711e-07, "loss": 0.9713, "step": 67415 }, { "epoch": 0.97, "grad_norm": 0.56640625, "learning_rate": 6.584121482794082e-07, "loss": 1.0336, "step": 67420 }, { "epoch": 0.97, "grad_norm": 0.578125, "learning_rate": 6.55547016270619e-07, "loss": 0.9656, "step": 67425 }, { "epoch": 0.97, "grad_norm": 0.55078125, "learning_rate": 6.526881112852711e-07, "loss": 0.8656, "step": 67430 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 6.498354335025881e-07, "loss": 0.9085, "step": 67435 }, { "epoch": 0.97, "grad_norm": 0.59765625, "learning_rate": 6.469889831013709e-07, "loss": 1.0413, "step": 67440 }, { "epoch": 0.97, "grad_norm": 0.4921875, "learning_rate": 6.441487602600327e-07, "loss": 0.8726, "step": 67445 }, { "epoch": 0.97, "grad_norm": 0.5234375, "learning_rate": 6.413147651566088e-07, "loss": 1.0768, "step": 67450 }, { "epoch": 0.97, "grad_norm": 0.58984375, "learning_rate": 6.384869979687347e-07, "loss": 0.9141, "step": 67455 }, { "epoch": 0.97, "grad_norm": 0.54296875, "learning_rate": 6.356654588736688e-07, "loss": 0.9616, "step": 67460 }, { "epoch": 0.97, "grad_norm": 0.578125, "learning_rate": 6.328501480482474e-07, "loss": 1.0049, "step": 67465 }, { "epoch": 0.97, "grad_norm": 0.59765625, "learning_rate": 6.300410656689515e-07, "loss": 0.9068, "step": 67470 }, { "epoch": 0.97, "grad_norm": 0.66796875, "learning_rate": 6.272382119118625e-07, "loss": 1.0419, "step": 67475 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 6.244415869526398e-07, "loss": 0.9802, "step": 67480 }, { "epoch": 0.97, "grad_norm": 0.640625, "learning_rate": 6.216511909666101e-07, "loss": 1.0195, "step": 67485 }, { "epoch": 0.97, "grad_norm": 0.6015625, "learning_rate": 6.188670241286665e-07, "loss": 1.1275, "step": 67490 }, { "epoch": 0.97, "grad_norm": 0.60546875, "learning_rate": 6.16089086613325e-07, "loss": 0.9283, "step": 67495 }, { "epoch": 0.97, "grad_norm": 0.5546875, "learning_rate": 6.133173785947022e-07, "loss": 1.1244, "step": 67500 }, { "epoch": 0.97, "grad_norm": 0.6328125, "learning_rate": 6.105519002465365e-07, "loss": 0.9914, "step": 67505 }, { "epoch": 0.97, "grad_norm": 0.462890625, "learning_rate": 6.077926517421784e-07, "loss": 0.7442, "step": 67510 }, { "epoch": 0.97, "grad_norm": 0.57421875, "learning_rate": 6.050396332545782e-07, "loss": 0.9948, "step": 67515 }, { "epoch": 0.97, "grad_norm": 0.63671875, "learning_rate": 6.02292844956287e-07, "loss": 0.9302, "step": 67520 }, { "epoch": 0.97, "grad_norm": 0.4921875, "learning_rate": 5.995522870194891e-07, "loss": 1.0059, "step": 67525 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 5.968179596159584e-07, "loss": 0.9251, "step": 67530 }, { "epoch": 0.97, "grad_norm": 0.56640625, "learning_rate": 5.940898629171021e-07, "loss": 0.9118, "step": 67535 }, { "epoch": 0.97, "grad_norm": 0.546875, "learning_rate": 5.913679970938946e-07, "loss": 1.0089, "step": 67540 }, { "epoch": 0.97, "grad_norm": 0.5625, "learning_rate": 5.886523623169548e-07, "loss": 1.1221, "step": 67545 }, { "epoch": 0.97, "grad_norm": 0.546875, "learning_rate": 5.859429587565136e-07, "loss": 0.9205, "step": 67550 }, { "epoch": 0.97, "grad_norm": 0.515625, "learning_rate": 5.832397865823791e-07, "loss": 0.9345, "step": 67555 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 5.805428459640161e-07, "loss": 0.9954, "step": 67560 }, { "epoch": 0.97, "grad_norm": 0.55859375, "learning_rate": 5.778521370704448e-07, "loss": 0.9087, "step": 67565 }, { "epoch": 0.97, "grad_norm": 0.5625, "learning_rate": 5.751676600703415e-07, "loss": 0.8738, "step": 67570 }, { "epoch": 0.97, "grad_norm": 0.609375, "learning_rate": 5.724894151319604e-07, "loss": 0.8994, "step": 67575 }, { "epoch": 0.97, "grad_norm": 0.6484375, "learning_rate": 5.698174024231895e-07, "loss": 0.9824, "step": 67580 }, { "epoch": 0.97, "grad_norm": 0.625, "learning_rate": 5.671516221114947e-07, "loss": 0.8964, "step": 67585 }, { "epoch": 0.97, "grad_norm": 0.57421875, "learning_rate": 5.64492074363987e-07, "loss": 0.9816, "step": 67590 }, { "epoch": 0.97, "grad_norm": 0.66015625, "learning_rate": 5.61838759347355e-07, "loss": 0.9253, "step": 67595 }, { "epoch": 0.97, "grad_norm": 0.5078125, "learning_rate": 5.591916772279326e-07, "loss": 0.9326, "step": 67600 }, { "epoch": 0.97, "grad_norm": 0.6328125, "learning_rate": 5.565508281716203e-07, "loss": 0.9426, "step": 67605 }, { "epoch": 0.97, "grad_norm": 0.6171875, "learning_rate": 5.539162123439634e-07, "loss": 1.0343, "step": 67610 }, { "epoch": 0.97, "grad_norm": 0.59375, "learning_rate": 5.512878299100966e-07, "loss": 0.9433, "step": 67615 }, { "epoch": 0.97, "grad_norm": 0.5703125, "learning_rate": 5.486656810347657e-07, "loss": 0.9195, "step": 67620 }, { "epoch": 0.97, "grad_norm": 0.54296875, "learning_rate": 5.460497658823393e-07, "loss": 1.1784, "step": 67625 }, { "epoch": 0.97, "grad_norm": 0.515625, "learning_rate": 5.434400846167864e-07, "loss": 0.9351, "step": 67630 }, { "epoch": 0.97, "grad_norm": 0.60546875, "learning_rate": 5.40836637401676e-07, "loss": 0.9793, "step": 67635 }, { "epoch": 0.97, "grad_norm": 0.578125, "learning_rate": 5.382394244001998e-07, "loss": 0.9489, "step": 67640 }, { "epoch": 0.97, "grad_norm": 0.66796875, "learning_rate": 5.356484457751609e-07, "loss": 0.9235, "step": 67645 }, { "epoch": 0.97, "grad_norm": 0.6796875, "learning_rate": 5.330637016889517e-07, "loss": 0.9436, "step": 67650 }, { "epoch": 0.97, "grad_norm": 0.55078125, "learning_rate": 5.30485192303587e-07, "loss": 1.1746, "step": 67655 }, { "epoch": 0.97, "grad_norm": 0.51171875, "learning_rate": 5.27912917780704e-07, "loss": 0.9042, "step": 67660 }, { "epoch": 0.97, "grad_norm": 0.62890625, "learning_rate": 5.253468782815296e-07, "loss": 0.9448, "step": 67665 }, { "epoch": 0.97, "grad_norm": 0.515625, "learning_rate": 5.227870739669017e-07, "loss": 0.8279, "step": 67670 }, { "epoch": 0.97, "grad_norm": 0.51171875, "learning_rate": 5.202335049972806e-07, "loss": 0.969, "step": 67675 }, { "epoch": 0.97, "grad_norm": 0.57421875, "learning_rate": 5.176861715327163e-07, "loss": 0.8491, "step": 67680 }, { "epoch": 0.97, "grad_norm": 0.58984375, "learning_rate": 5.151450737328811e-07, "loss": 0.9091, "step": 67685 }, { "epoch": 0.97, "grad_norm": 0.578125, "learning_rate": 5.126102117570586e-07, "loss": 0.8733, "step": 67690 }, { "epoch": 0.97, "grad_norm": 0.54296875, "learning_rate": 5.100815857641439e-07, "loss": 1.0445, "step": 67695 }, { "epoch": 0.97, "grad_norm": 0.57421875, "learning_rate": 5.075591959126103e-07, "loss": 0.9023, "step": 67700 }, { "epoch": 0.97, "grad_norm": 0.54296875, "learning_rate": 5.050430423605868e-07, "loss": 0.7164, "step": 67705 }, { "epoch": 0.97, "grad_norm": 0.57421875, "learning_rate": 5.025331252657806e-07, "loss": 0.7707, "step": 67710 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 5.000294447855103e-07, "loss": 1.0261, "step": 67715 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 4.975320010767171e-07, "loss": 0.8371, "step": 67720 }, { "epoch": 0.97, "grad_norm": 0.54296875, "learning_rate": 4.950407942959534e-07, "loss": 0.9907, "step": 67725 }, { "epoch": 0.97, "grad_norm": 0.5078125, "learning_rate": 4.9255582459935e-07, "loss": 0.9361, "step": 67730 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 4.900770921426712e-07, "loss": 0.874, "step": 67735 }, { "epoch": 0.97, "grad_norm": 0.49609375, "learning_rate": 4.876045970813037e-07, "loss": 0.9589, "step": 67740 }, { "epoch": 0.97, "grad_norm": 0.61328125, "learning_rate": 4.851383395702125e-07, "loss": 1.0567, "step": 67745 }, { "epoch": 0.97, "grad_norm": 0.46484375, "learning_rate": 4.82678319763985e-07, "loss": 0.8231, "step": 67750 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 4.802245378168202e-07, "loss": 1.0487, "step": 67755 }, { "epoch": 0.97, "grad_norm": 0.55078125, "learning_rate": 4.777769938825283e-07, "loss": 0.9396, "step": 67760 }, { "epoch": 0.97, "grad_norm": 0.58984375, "learning_rate": 4.753356881145199e-07, "loss": 0.9664, "step": 67765 }, { "epoch": 0.97, "grad_norm": 0.52734375, "learning_rate": 4.729006206658171e-07, "loss": 1.0031, "step": 67770 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 4.704717916890533e-07, "loss": 0.8784, "step": 67775 }, { "epoch": 0.97, "grad_norm": 0.56640625, "learning_rate": 4.6804920133647343e-07, "loss": 0.8679, "step": 67780 }, { "epoch": 0.97, "grad_norm": 0.67578125, "learning_rate": 4.656328497599338e-07, "loss": 1.0123, "step": 67785 }, { "epoch": 0.97, "grad_norm": 0.466796875, "learning_rate": 4.6322273711089103e-07, "loss": 0.8113, "step": 67790 }, { "epoch": 0.97, "grad_norm": 0.5078125, "learning_rate": 4.6081886354040207e-07, "loss": 0.9404, "step": 67795 }, { "epoch": 0.97, "grad_norm": 0.54296875, "learning_rate": 4.584212291991463e-07, "loss": 0.9276, "step": 67800 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 4.5602983423742584e-07, "loss": 0.999, "step": 67805 }, { "epoch": 0.97, "grad_norm": 0.55078125, "learning_rate": 4.5364467880512076e-07, "loss": 0.9367, "step": 67810 }, { "epoch": 0.97, "grad_norm": 0.62890625, "learning_rate": 4.5126576305174474e-07, "loss": 1.0302, "step": 67815 }, { "epoch": 0.97, "grad_norm": 0.57421875, "learning_rate": 4.488930871264008e-07, "loss": 0.9275, "step": 67820 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 4.4652665117782545e-07, "loss": 1.0678, "step": 67825 }, { "epoch": 0.97, "grad_norm": 0.55078125, "learning_rate": 4.441664553543334e-07, "loss": 0.9375, "step": 67830 }, { "epoch": 0.97, "grad_norm": 0.5859375, "learning_rate": 4.4181249980388416e-07, "loss": 0.9544, "step": 67835 }, { "epoch": 0.97, "grad_norm": 0.62890625, "learning_rate": 4.394647846740041e-07, "loss": 0.8992, "step": 67840 }, { "epoch": 0.97, "grad_norm": 0.58203125, "learning_rate": 4.3712331011186433e-07, "loss": 1.0277, "step": 67845 }, { "epoch": 0.97, "grad_norm": 0.58203125, "learning_rate": 4.3478807626422536e-07, "loss": 1.0133, "step": 67850 }, { "epoch": 0.97, "grad_norm": 0.55859375, "learning_rate": 4.3245908327747e-07, "loss": 1.0299, "step": 67855 }, { "epoch": 0.97, "grad_norm": 0.6640625, "learning_rate": 4.3013633129758146e-07, "loss": 0.9226, "step": 67860 }, { "epoch": 0.97, "grad_norm": 0.6953125, "learning_rate": 4.2781982047014337e-07, "loss": 0.8341, "step": 67865 }, { "epoch": 0.97, "grad_norm": 0.5703125, "learning_rate": 4.255095509403617e-07, "loss": 0.9564, "step": 67870 }, { "epoch": 0.97, "grad_norm": 0.546875, "learning_rate": 4.2320552285304296e-07, "loss": 0.8604, "step": 67875 }, { "epoch": 0.97, "grad_norm": 0.5546875, "learning_rate": 4.209077363526159e-07, "loss": 0.9471, "step": 67880 }, { "epoch": 0.97, "grad_norm": 0.53515625, "learning_rate": 4.1861619158309883e-07, "loss": 0.9266, "step": 67885 }, { "epoch": 0.97, "grad_norm": 0.5546875, "learning_rate": 4.163308886881323e-07, "loss": 0.8708, "step": 67890 }, { "epoch": 0.97, "grad_norm": 0.51953125, "learning_rate": 4.140518278109684e-07, "loss": 0.9368, "step": 67895 }, { "epoch": 0.97, "grad_norm": 0.5, "learning_rate": 4.1177900909445953e-07, "loss": 0.9397, "step": 67900 }, { "epoch": 0.97, "grad_norm": 0.470703125, "learning_rate": 4.095124326810473e-07, "loss": 0.9737, "step": 67905 }, { "epoch": 0.97, "grad_norm": 0.5234375, "learning_rate": 4.072520987128292e-07, "loss": 0.8897, "step": 67910 }, { "epoch": 0.97, "grad_norm": 0.5703125, "learning_rate": 4.0499800733148074e-07, "loss": 0.895, "step": 67915 }, { "epoch": 0.97, "grad_norm": 0.5546875, "learning_rate": 4.027501586782778e-07, "loss": 1.0609, "step": 67920 }, { "epoch": 0.97, "grad_norm": 0.52734375, "learning_rate": 4.005085528941299e-07, "loss": 0.8732, "step": 67925 }, { "epoch": 0.97, "grad_norm": 0.60546875, "learning_rate": 3.982731901195358e-07, "loss": 0.9002, "step": 67930 }, { "epoch": 0.97, "grad_norm": 0.6328125, "learning_rate": 3.9604407049461667e-07, "loss": 0.9728, "step": 67935 }, { "epoch": 0.97, "grad_norm": 0.5078125, "learning_rate": 3.9382119415909413e-07, "loss": 0.8315, "step": 67940 }, { "epoch": 0.97, "grad_norm": 0.578125, "learning_rate": 3.916045612523123e-07, "loss": 0.9386, "step": 67945 }, { "epoch": 0.97, "grad_norm": 0.625, "learning_rate": 3.8939417191319327e-07, "loss": 1.23, "step": 67950 }, { "epoch": 0.97, "grad_norm": 0.578125, "learning_rate": 3.87190026280293e-07, "loss": 0.8856, "step": 67955 }, { "epoch": 0.97, "grad_norm": 0.5625, "learning_rate": 3.8499212449176757e-07, "loss": 0.987, "step": 67960 }, { "epoch": 0.97, "grad_norm": 0.5859375, "learning_rate": 3.828004666853957e-07, "loss": 0.8913, "step": 67965 }, { "epoch": 0.97, "grad_norm": 0.55078125, "learning_rate": 3.8061505299854525e-07, "loss": 0.9654, "step": 67970 }, { "epoch": 0.98, "grad_norm": 0.6171875, "learning_rate": 3.784358835681956e-07, "loss": 1.0292, "step": 67975 }, { "epoch": 0.98, "grad_norm": 0.6328125, "learning_rate": 3.762629585309374e-07, "loss": 0.9346, "step": 67980 }, { "epoch": 0.98, "grad_norm": 0.56640625, "learning_rate": 3.740962780229951e-07, "loss": 0.9542, "step": 67985 }, { "epoch": 0.98, "grad_norm": 0.50390625, "learning_rate": 3.7193584218014886e-07, "loss": 0.9173, "step": 67990 }, { "epoch": 0.98, "grad_norm": 0.58203125, "learning_rate": 3.697816511378349e-07, "loss": 0.949, "step": 67995 }, { "epoch": 0.98, "grad_norm": 0.5390625, "learning_rate": 3.6763370503107855e-07, "loss": 0.8603, "step": 68000 }, { "epoch": 0.98, "grad_norm": 0.5625, "learning_rate": 3.6549200399451646e-07, "loss": 0.9199, "step": 68005 }, { "epoch": 0.98, "grad_norm": 0.6484375, "learning_rate": 3.633565481623857e-07, "loss": 1.018, "step": 68010 }, { "epoch": 0.98, "grad_norm": 0.5078125, "learning_rate": 3.61227337668546e-07, "loss": 0.953, "step": 68015 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 3.59104372646446e-07, "loss": 0.9271, "step": 68020 }, { "epoch": 0.98, "grad_norm": 0.55078125, "learning_rate": 3.5698765322917935e-07, "loss": 1.0202, "step": 68025 }, { "epoch": 0.98, "grad_norm": 0.609375, "learning_rate": 3.5487717954939547e-07, "loss": 0.9461, "step": 68030 }, { "epoch": 0.98, "grad_norm": 0.5234375, "learning_rate": 3.527729517394107e-07, "loss": 0.9437, "step": 68035 }, { "epoch": 0.98, "grad_norm": 0.64453125, "learning_rate": 3.506749699310974e-07, "loss": 0.9535, "step": 68040 }, { "epoch": 0.98, "grad_norm": 0.57421875, "learning_rate": 3.485832342559725e-07, "loss": 0.8304, "step": 68045 }, { "epoch": 0.98, "grad_norm": 0.65625, "learning_rate": 3.464977448451423e-07, "loss": 1.1197, "step": 68050 }, { "epoch": 0.98, "grad_norm": 0.609375, "learning_rate": 3.444185018293244e-07, "loss": 1.1238, "step": 68055 }, { "epoch": 0.98, "grad_norm": 0.54296875, "learning_rate": 3.423455053388591e-07, "loss": 1.0381, "step": 68060 }, { "epoch": 0.98, "grad_norm": 0.578125, "learning_rate": 3.402787555036757e-07, "loss": 1.038, "step": 68065 }, { "epoch": 0.98, "grad_norm": 0.58203125, "learning_rate": 3.382182524533262e-07, "loss": 0.9861, "step": 68070 }, { "epoch": 0.98, "grad_norm": 0.609375, "learning_rate": 3.361639963169627e-07, "loss": 1.0142, "step": 68075 }, { "epoch": 0.98, "grad_norm": 0.5546875, "learning_rate": 3.341159872233379e-07, "loss": 0.8862, "step": 68080 }, { "epoch": 0.98, "grad_norm": 0.5625, "learning_rate": 3.32074225300838e-07, "loss": 0.9435, "step": 68085 }, { "epoch": 0.98, "grad_norm": 0.55859375, "learning_rate": 3.300387106774383e-07, "loss": 0.9784, "step": 68090 }, { "epoch": 0.98, "grad_norm": 0.53515625, "learning_rate": 3.280094434807257e-07, "loss": 0.9197, "step": 68095 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 3.2598642383789846e-07, "loss": 0.9625, "step": 68100 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 3.239696518757662e-07, "loss": 0.9105, "step": 68105 }, { "epoch": 0.98, "grad_norm": 0.59375, "learning_rate": 3.21959127720739e-07, "loss": 0.8981, "step": 68110 }, { "epoch": 0.98, "grad_norm": 0.7421875, "learning_rate": 3.199548514988271e-07, "loss": 0.9995, "step": 68115 }, { "epoch": 0.98, "grad_norm": 0.69140625, "learning_rate": 3.1795682333567443e-07, "loss": 1.0054, "step": 68120 }, { "epoch": 0.98, "grad_norm": 0.51953125, "learning_rate": 3.1596504335652533e-07, "loss": 0.9117, "step": 68125 }, { "epoch": 0.98, "grad_norm": 0.5, "learning_rate": 3.1397951168620207e-07, "loss": 0.8921, "step": 68130 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 3.120002284491941e-07, "loss": 0.9583, "step": 68135 }, { "epoch": 0.98, "grad_norm": 0.490234375, "learning_rate": 3.100271937695354e-07, "loss": 1.0138, "step": 68140 }, { "epoch": 0.98, "grad_norm": 0.490234375, "learning_rate": 3.080604077709048e-07, "loss": 0.8712, "step": 68145 }, { "epoch": 0.98, "grad_norm": 0.57421875, "learning_rate": 3.0609987057660383e-07, "loss": 1.0037, "step": 68150 }, { "epoch": 0.98, "grad_norm": 0.640625, "learning_rate": 3.0414558230948963e-07, "loss": 0.9395, "step": 68155 }, { "epoch": 0.98, "grad_norm": 0.55078125, "learning_rate": 3.021975430920865e-07, "loss": 0.9111, "step": 68160 }, { "epoch": 0.98, "grad_norm": 0.53515625, "learning_rate": 3.002557530464745e-07, "loss": 0.8204, "step": 68165 }, { "epoch": 0.98, "grad_norm": 0.5625, "learning_rate": 2.9832021229438955e-07, "loss": 0.9531, "step": 68170 }, { "epoch": 0.98, "grad_norm": 0.62109375, "learning_rate": 2.963909209571458e-07, "loss": 0.8763, "step": 68175 }, { "epoch": 0.98, "grad_norm": 0.59375, "learning_rate": 2.9446787915565765e-07, "loss": 0.9364, "step": 68180 }, { "epoch": 0.98, "grad_norm": 0.55859375, "learning_rate": 2.9255108701049525e-07, "loss": 1.0371, "step": 68185 }, { "epoch": 0.98, "grad_norm": 0.55859375, "learning_rate": 2.906405446417848e-07, "loss": 0.919, "step": 68190 }, { "epoch": 0.98, "grad_norm": 0.61328125, "learning_rate": 2.887362521692749e-07, "loss": 0.8907, "step": 68195 }, { "epoch": 0.98, "grad_norm": 0.5390625, "learning_rate": 2.868382097123479e-07, "loss": 0.9184, "step": 68200 }, { "epoch": 0.98, "grad_norm": 0.6171875, "learning_rate": 2.8494641738996406e-07, "loss": 0.9982, "step": 68205 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 2.8306087532069535e-07, "loss": 1.0517, "step": 68210 }, { "epoch": 0.98, "grad_norm": 0.65234375, "learning_rate": 2.8118158362275827e-07, "loss": 0.9018, "step": 68215 }, { "epoch": 0.98, "grad_norm": 0.65625, "learning_rate": 2.793085424139141e-07, "loss": 1.1069, "step": 68220 }, { "epoch": 0.98, "grad_norm": 0.64453125, "learning_rate": 2.7744175181158015e-07, "loss": 0.936, "step": 68225 }, { "epoch": 0.98, "grad_norm": 0.60546875, "learning_rate": 2.7558121193278496e-07, "loss": 0.9315, "step": 68230 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 2.737269228941242e-07, "loss": 1.0781, "step": 68235 }, { "epoch": 0.98, "grad_norm": 0.53515625, "learning_rate": 2.718788848118381e-07, "loss": 0.9661, "step": 68240 }, { "epoch": 0.98, "grad_norm": 0.56640625, "learning_rate": 2.700370978017785e-07, "loss": 0.9609, "step": 68245 }, { "epoch": 0.98, "grad_norm": 0.67578125, "learning_rate": 2.682015619793643e-07, "loss": 1.0629, "step": 68250 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 2.6637227745965886e-07, "loss": 0.9062, "step": 68255 }, { "epoch": 0.98, "grad_norm": 0.6015625, "learning_rate": 2.645492443573372e-07, "loss": 1.0408, "step": 68260 }, { "epoch": 0.98, "grad_norm": 0.515625, "learning_rate": 2.627324627866523e-07, "loss": 0.9313, "step": 68265 }, { "epoch": 0.98, "grad_norm": 0.55859375, "learning_rate": 2.609219328614909e-07, "loss": 0.9128, "step": 68270 }, { "epoch": 0.98, "grad_norm": 0.6484375, "learning_rate": 2.591176546953289e-07, "loss": 1.0647, "step": 68275 }, { "epoch": 0.98, "grad_norm": 0.52734375, "learning_rate": 2.573196284012758e-07, "loss": 0.9276, "step": 68280 }, { "epoch": 0.98, "grad_norm": 0.6015625, "learning_rate": 2.555278540920192e-07, "loss": 1.0192, "step": 68285 }, { "epoch": 0.98, "grad_norm": 0.578125, "learning_rate": 2.537423318798804e-07, "loss": 0.8791, "step": 68290 }, { "epoch": 0.98, "grad_norm": 0.5, "learning_rate": 2.51963061876781e-07, "loss": 1.1994, "step": 68295 }, { "epoch": 0.98, "grad_norm": 0.52734375, "learning_rate": 2.501900441942207e-07, "loss": 0.9692, "step": 68300 }, { "epoch": 0.98, "grad_norm": 0.6015625, "learning_rate": 2.4842327894336603e-07, "loss": 0.9326, "step": 68305 }, { "epoch": 0.98, "grad_norm": 0.51953125, "learning_rate": 2.466627662349508e-07, "loss": 1.069, "step": 68310 }, { "epoch": 0.98, "grad_norm": 0.59765625, "learning_rate": 2.449085061793199e-07, "loss": 0.9998, "step": 68315 }, { "epoch": 0.98, "grad_norm": 0.61328125, "learning_rate": 2.4316049888643e-07, "loss": 0.9248, "step": 68320 }, { "epoch": 0.98, "grad_norm": 0.73828125, "learning_rate": 2.4141874446585997e-07, "loss": 0.8767, "step": 68325 }, { "epoch": 0.98, "grad_norm": 0.6171875, "learning_rate": 2.3968324302677815e-07, "loss": 0.8412, "step": 68330 }, { "epoch": 0.98, "grad_norm": 0.5625, "learning_rate": 2.3795399467796408e-07, "loss": 1.0728, "step": 68335 }, { "epoch": 0.98, "grad_norm": 0.55859375, "learning_rate": 2.3623099952782002e-07, "loss": 0.9397, "step": 68340 }, { "epoch": 0.98, "grad_norm": 0.53515625, "learning_rate": 2.3451425768432621e-07, "loss": 0.8171, "step": 68345 }, { "epoch": 0.98, "grad_norm": 0.58203125, "learning_rate": 2.3280376925511881e-07, "loss": 1.077, "step": 68350 }, { "epoch": 0.98, "grad_norm": 0.474609375, "learning_rate": 2.3109953434737875e-07, "loss": 0.8722, "step": 68355 }, { "epoch": 0.98, "grad_norm": 0.53515625, "learning_rate": 2.2940155306796497e-07, "loss": 0.8994, "step": 68360 }, { "epoch": 0.98, "grad_norm": 0.578125, "learning_rate": 2.2770982552328125e-07, "loss": 1.0112, "step": 68365 }, { "epoch": 0.98, "grad_norm": 0.60546875, "learning_rate": 2.260243518193761e-07, "loss": 0.9184, "step": 68370 }, { "epoch": 0.98, "grad_norm": 0.60546875, "learning_rate": 2.2434513206189833e-07, "loss": 0.9368, "step": 68375 }, { "epoch": 0.98, "grad_norm": 0.68359375, "learning_rate": 2.2267216635609711e-07, "loss": 1.0861, "step": 68380 }, { "epoch": 0.98, "grad_norm": 0.625, "learning_rate": 2.210054548068552e-07, "loss": 0.9625, "step": 68385 }, { "epoch": 0.98, "grad_norm": 0.609375, "learning_rate": 2.1934499751862236e-07, "loss": 0.8768, "step": 68390 }, { "epoch": 0.98, "grad_norm": 0.5859375, "learning_rate": 2.1769079459548204e-07, "loss": 0.9296, "step": 68395 }, { "epoch": 0.98, "grad_norm": 0.482421875, "learning_rate": 2.1604284614112902e-07, "loss": 0.856, "step": 68400 }, { "epoch": 0.98, "grad_norm": 0.5078125, "learning_rate": 2.144011522588585e-07, "loss": 0.8969, "step": 68405 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 2.127657130515548e-07, "loss": 0.8386, "step": 68410 }, { "epoch": 0.98, "grad_norm": 0.6796875, "learning_rate": 2.1113652862175816e-07, "loss": 0.8712, "step": 68415 }, { "epoch": 0.98, "grad_norm": 0.58984375, "learning_rate": 2.0951359907157575e-07, "loss": 0.8142, "step": 68420 }, { "epoch": 0.98, "grad_norm": 0.58203125, "learning_rate": 2.0789692450272624e-07, "loss": 0.9764, "step": 68425 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 2.062865050165508e-07, "loss": 0.8942, "step": 68430 }, { "epoch": 0.98, "grad_norm": 0.58984375, "learning_rate": 2.0468234071400194e-07, "loss": 0.95, "step": 68435 }, { "epoch": 0.98, "grad_norm": 0.59765625, "learning_rate": 2.0308443169561042e-07, "loss": 1.1128, "step": 68440 }, { "epoch": 0.98, "grad_norm": 0.54296875, "learning_rate": 2.0149277806155164e-07, "loss": 0.9583, "step": 68445 }, { "epoch": 0.98, "grad_norm": 0.48828125, "learning_rate": 1.9990737991159024e-07, "loss": 1.0166, "step": 68450 }, { "epoch": 0.98, "grad_norm": 0.51953125, "learning_rate": 1.983282373450801e-07, "loss": 0.8655, "step": 68455 }, { "epoch": 0.98, "grad_norm": 0.4765625, "learning_rate": 1.9675535046104198e-07, "loss": 0.9587, "step": 68460 }, { "epoch": 0.98, "grad_norm": 0.4609375, "learning_rate": 1.951887193580304e-07, "loss": 0.9855, "step": 68465 }, { "epoch": 0.98, "grad_norm": 0.515625, "learning_rate": 1.936283441342668e-07, "loss": 0.8688, "step": 68470 }, { "epoch": 0.98, "grad_norm": 0.5859375, "learning_rate": 1.9207422488753957e-07, "loss": 1.0159, "step": 68475 }, { "epoch": 0.98, "grad_norm": 0.57421875, "learning_rate": 1.9052636171528193e-07, "loss": 1.0702, "step": 68480 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 1.8898475471449405e-07, "loss": 1.0368, "step": 68485 }, { "epoch": 0.98, "grad_norm": 0.57421875, "learning_rate": 1.8744940398182088e-07, "loss": 1.0001, "step": 68490 }, { "epoch": 0.98, "grad_norm": 0.58203125, "learning_rate": 1.8592030961349648e-07, "loss": 1.0321, "step": 68495 }, { "epoch": 0.98, "grad_norm": 0.53515625, "learning_rate": 1.843974717053776e-07, "loss": 1.0103, "step": 68500 }, { "epoch": 0.98, "grad_norm": 0.4375, "learning_rate": 1.828808903528878e-07, "loss": 0.9373, "step": 68505 }, { "epoch": 0.98, "grad_norm": 0.6171875, "learning_rate": 1.8137056565111777e-07, "loss": 0.9273, "step": 68510 }, { "epoch": 0.98, "grad_norm": 0.6328125, "learning_rate": 1.7986649769471398e-07, "loss": 1.1433, "step": 68515 }, { "epoch": 0.98, "grad_norm": 0.470703125, "learning_rate": 1.7836868657797878e-07, "loss": 1.0121, "step": 68520 }, { "epoch": 0.98, "grad_norm": 0.66015625, "learning_rate": 1.7687713239477043e-07, "loss": 0.9412, "step": 68525 }, { "epoch": 0.98, "grad_norm": 0.58984375, "learning_rate": 1.7539183523859192e-07, "loss": 0.9708, "step": 68530 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 1.7391279520254654e-07, "loss": 0.8521, "step": 68535 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 1.7244001237933793e-07, "loss": 0.8871, "step": 68540 }, { "epoch": 0.98, "grad_norm": 0.61328125, "learning_rate": 1.709734868612922e-07, "loss": 1.0056, "step": 68545 }, { "epoch": 0.98, "grad_norm": 0.57421875, "learning_rate": 1.6951321874031367e-07, "loss": 0.9889, "step": 68550 }, { "epoch": 0.98, "grad_norm": 0.5859375, "learning_rate": 1.6805920810795128e-07, "loss": 0.8824, "step": 68555 }, { "epoch": 0.98, "grad_norm": 0.640625, "learning_rate": 1.6661145505533215e-07, "loss": 0.9732, "step": 68560 }, { "epoch": 0.98, "grad_norm": 0.578125, "learning_rate": 1.6516995967320593e-07, "loss": 1.0555, "step": 68565 }, { "epoch": 0.98, "grad_norm": 0.55859375, "learning_rate": 1.6373472205193363e-07, "loss": 0.9849, "step": 68570 }, { "epoch": 0.98, "grad_norm": 0.66015625, "learning_rate": 1.6230574228146557e-07, "loss": 0.9778, "step": 68575 }, { "epoch": 0.98, "grad_norm": 0.6640625, "learning_rate": 1.608830204513856e-07, "loss": 1.0558, "step": 68580 }, { "epoch": 0.98, "grad_norm": 0.609375, "learning_rate": 1.5946655665086685e-07, "loss": 1.0651, "step": 68585 }, { "epoch": 0.98, "grad_norm": 0.5390625, "learning_rate": 1.5805635096869387e-07, "loss": 0.8778, "step": 68590 }, { "epoch": 0.98, "grad_norm": 0.4921875, "learning_rate": 1.566524034932515e-07, "loss": 0.9079, "step": 68595 }, { "epoch": 0.98, "grad_norm": 0.50390625, "learning_rate": 1.5525471431254713e-07, "loss": 1.0091, "step": 68600 }, { "epoch": 0.98, "grad_norm": 0.5703125, "learning_rate": 1.5386328351419953e-07, "loss": 0.9609, "step": 68605 }, { "epoch": 0.98, "grad_norm": 0.55859375, "learning_rate": 1.5247811118541676e-07, "loss": 0.9568, "step": 68610 }, { "epoch": 0.98, "grad_norm": 0.54296875, "learning_rate": 1.5109919741301826e-07, "loss": 0.9626, "step": 68615 }, { "epoch": 0.98, "grad_norm": 0.490234375, "learning_rate": 1.4972654228343486e-07, "loss": 0.9385, "step": 68620 }, { "epoch": 0.98, "grad_norm": 0.484375, "learning_rate": 1.4836014588271996e-07, "loss": 0.8519, "step": 68625 }, { "epoch": 0.98, "grad_norm": 0.52734375, "learning_rate": 1.4700000829651617e-07, "loss": 0.9085, "step": 68630 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 1.456461296100664e-07, "loss": 0.9853, "step": 68635 }, { "epoch": 0.98, "grad_norm": 0.55078125, "learning_rate": 1.4429850990824723e-07, "loss": 0.9127, "step": 68640 }, { "epoch": 0.98, "grad_norm": 0.6484375, "learning_rate": 1.429571492755244e-07, "loss": 0.9672, "step": 68645 }, { "epoch": 0.98, "grad_norm": 0.62109375, "learning_rate": 1.416220477959751e-07, "loss": 0.9399, "step": 68650 }, { "epoch": 0.98, "grad_norm": 0.57421875, "learning_rate": 1.4029320555327684e-07, "loss": 0.9394, "step": 68655 }, { "epoch": 0.98, "grad_norm": 0.5859375, "learning_rate": 1.389706226307408e-07, "loss": 1.0412, "step": 68660 }, { "epoch": 0.98, "grad_norm": 0.5234375, "learning_rate": 1.3765429911124507e-07, "loss": 0.9976, "step": 68665 }, { "epoch": 0.99, "grad_norm": 0.56640625, "learning_rate": 1.3634423507732362e-07, "loss": 0.8278, "step": 68670 }, { "epoch": 0.99, "grad_norm": 0.578125, "learning_rate": 1.3504043061107752e-07, "loss": 1.0012, "step": 68675 }, { "epoch": 0.99, "grad_norm": 0.51953125, "learning_rate": 1.3374288579424132e-07, "loss": 0.848, "step": 68680 }, { "epoch": 0.99, "grad_norm": 0.5859375, "learning_rate": 1.3245160070812778e-07, "loss": 1.0536, "step": 68685 }, { "epoch": 0.99, "grad_norm": 0.58984375, "learning_rate": 1.3116657543369436e-07, "loss": 1.0768, "step": 68690 }, { "epoch": 0.99, "grad_norm": 0.6171875, "learning_rate": 1.2988781005147666e-07, "loss": 1.0061, "step": 68695 }, { "epoch": 0.99, "grad_norm": 0.57421875, "learning_rate": 1.2861530464163273e-07, "loss": 0.88, "step": 68700 }, { "epoch": 0.99, "grad_norm": 0.6796875, "learning_rate": 1.2734905928393215e-07, "loss": 1.0937, "step": 68705 }, { "epoch": 0.99, "grad_norm": 0.57421875, "learning_rate": 1.2608907405773362e-07, "loss": 1.0532, "step": 68710 }, { "epoch": 0.99, "grad_norm": 0.54296875, "learning_rate": 1.2483534904200734e-07, "loss": 0.9389, "step": 68715 }, { "epoch": 0.99, "grad_norm": 0.55859375, "learning_rate": 1.2358788431536816e-07, "loss": 0.9631, "step": 68720 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 1.2234667995598693e-07, "loss": 1.0137, "step": 68725 }, { "epoch": 0.99, "grad_norm": 0.490234375, "learning_rate": 1.2111173604165692e-07, "loss": 0.9438, "step": 68730 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 1.1988305264980516e-07, "loss": 1.0743, "step": 68735 }, { "epoch": 0.99, "grad_norm": 0.5859375, "learning_rate": 1.1866062985743664e-07, "loss": 0.9187, "step": 68740 }, { "epoch": 0.99, "grad_norm": 0.6171875, "learning_rate": 1.1744446774116791e-07, "loss": 0.9324, "step": 68745 }, { "epoch": 0.99, "grad_norm": 0.65625, "learning_rate": 1.1623456637723795e-07, "loss": 0.9166, "step": 68750 }, { "epoch": 0.99, "grad_norm": 0.6953125, "learning_rate": 1.1503092584148611e-07, "loss": 1.0688, "step": 68755 }, { "epoch": 0.99, "grad_norm": 0.51953125, "learning_rate": 1.1383354620936315e-07, "loss": 0.8598, "step": 68760 }, { "epoch": 0.99, "grad_norm": 0.56640625, "learning_rate": 1.1264242755590903e-07, "loss": 1.0886, "step": 68765 }, { "epoch": 0.99, "grad_norm": 0.60546875, "learning_rate": 1.1145756995578626e-07, "loss": 0.9614, "step": 68770 }, { "epoch": 0.99, "grad_norm": 0.5546875, "learning_rate": 1.1027897348326877e-07, "loss": 0.8253, "step": 68775 }, { "epoch": 0.99, "grad_norm": 0.45703125, "learning_rate": 1.0910663821221967e-07, "loss": 1.011, "step": 68780 }, { "epoch": 0.99, "grad_norm": 0.5, "learning_rate": 1.0794056421614684e-07, "loss": 0.9053, "step": 68785 }, { "epoch": 0.99, "grad_norm": 0.54296875, "learning_rate": 1.0678075156812517e-07, "loss": 1.0008, "step": 68790 }, { "epoch": 0.99, "grad_norm": 0.51171875, "learning_rate": 1.0562720034085205e-07, "loss": 0.9327, "step": 68795 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 1.0447991060663631e-07, "loss": 0.9186, "step": 68800 }, { "epoch": 0.99, "grad_norm": 0.7734375, "learning_rate": 1.0333888243738709e-07, "loss": 0.856, "step": 68805 }, { "epoch": 0.99, "grad_norm": 0.57421875, "learning_rate": 1.0220411590463607e-07, "loss": 0.9629, "step": 68810 }, { "epoch": 0.99, "grad_norm": 0.515625, "learning_rate": 1.0107561107950414e-07, "loss": 0.9257, "step": 68815 }, { "epoch": 0.99, "grad_norm": 0.59375, "learning_rate": 9.995336803272359e-08, "loss": 0.933, "step": 68820 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 9.883738683464927e-08, "loss": 0.8716, "step": 68825 }, { "epoch": 0.99, "grad_norm": 0.59375, "learning_rate": 9.772766755522522e-08, "loss": 0.7858, "step": 68830 }, { "epoch": 0.99, "grad_norm": 0.6328125, "learning_rate": 9.662421026400692e-08, "loss": 0.8859, "step": 68835 }, { "epoch": 0.99, "grad_norm": 0.58203125, "learning_rate": 9.552701503016126e-08, "loss": 0.9141, "step": 68840 }, { "epoch": 0.99, "grad_norm": 0.68359375, "learning_rate": 9.443608192246655e-08, "loss": 0.7986, "step": 68845 }, { "epoch": 0.99, "grad_norm": 0.61328125, "learning_rate": 9.335141100930145e-08, "loss": 0.8627, "step": 68850 }, { "epoch": 0.99, "grad_norm": 0.51953125, "learning_rate": 9.227300235865599e-08, "loss": 1.0326, "step": 68855 }, { "epoch": 0.99, "grad_norm": 0.5703125, "learning_rate": 9.120085603812056e-08, "loss": 0.8892, "step": 68860 }, { "epoch": 0.99, "grad_norm": 0.7421875, "learning_rate": 9.013497211489696e-08, "loss": 0.8939, "step": 68865 }, { "epoch": 0.99, "grad_norm": 0.609375, "learning_rate": 8.907535065580952e-08, "loss": 0.8497, "step": 68870 }, { "epoch": 0.99, "grad_norm": 0.5859375, "learning_rate": 8.802199172726066e-08, "loss": 1.0038, "step": 68875 }, { "epoch": 0.99, "grad_norm": 0.5234375, "learning_rate": 8.697489539529757e-08, "loss": 0.9418, "step": 68880 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 8.593406172552332e-08, "loss": 0.8043, "step": 68885 }, { "epoch": 0.99, "grad_norm": 0.609375, "learning_rate": 8.489949078320792e-08, "loss": 0.9, "step": 68890 }, { "epoch": 0.99, "grad_norm": 0.51171875, "learning_rate": 8.387118263317728e-08, "loss": 0.934, "step": 68895 }, { "epoch": 0.99, "grad_norm": 0.546875, "learning_rate": 8.284913733990208e-08, "loss": 1.1524, "step": 68900 }, { "epoch": 0.99, "grad_norm": 0.546875, "learning_rate": 8.183335496743105e-08, "loss": 0.9928, "step": 68905 }, { "epoch": 0.99, "grad_norm": 0.58984375, "learning_rate": 8.082383557944662e-08, "loss": 0.9562, "step": 68910 }, { "epoch": 0.99, "grad_norm": 0.546875, "learning_rate": 7.982057923922037e-08, "loss": 1.0204, "step": 68915 }, { "epoch": 0.99, "grad_norm": 0.5546875, "learning_rate": 7.882358600964646e-08, "loss": 0.9144, "step": 68920 }, { "epoch": 0.99, "grad_norm": 0.5546875, "learning_rate": 7.783285595320822e-08, "loss": 0.8462, "step": 68925 }, { "epoch": 0.99, "grad_norm": 0.59765625, "learning_rate": 7.684838913200043e-08, "loss": 0.9862, "step": 68930 }, { "epoch": 0.99, "grad_norm": 0.5703125, "learning_rate": 7.58701856077515e-08, "loss": 0.8919, "step": 68935 }, { "epoch": 0.99, "grad_norm": 0.578125, "learning_rate": 7.489824544175683e-08, "loss": 0.9889, "step": 68940 }, { "epoch": 0.99, "grad_norm": 0.6484375, "learning_rate": 7.393256869494547e-08, "loss": 1.0822, "step": 68945 }, { "epoch": 0.99, "grad_norm": 0.5390625, "learning_rate": 7.297315542784678e-08, "loss": 0.8859, "step": 68950 }, { "epoch": 0.99, "grad_norm": 0.478515625, "learning_rate": 7.202000570060153e-08, "loss": 0.9169, "step": 68955 }, { "epoch": 0.99, "grad_norm": 0.625, "learning_rate": 7.107311957293971e-08, "loss": 0.9741, "step": 68960 }, { "epoch": 0.99, "grad_norm": 0.54296875, "learning_rate": 7.013249710423608e-08, "loss": 0.9086, "step": 68965 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 6.919813835343237e-08, "loss": 0.8493, "step": 68970 }, { "epoch": 0.99, "grad_norm": 0.5859375, "learning_rate": 6.827004337910391e-08, "loss": 0.9377, "step": 68975 }, { "epoch": 0.99, "grad_norm": 0.62890625, "learning_rate": 6.734821223941535e-08, "loss": 0.9256, "step": 68980 }, { "epoch": 0.99, "grad_norm": 0.62109375, "learning_rate": 6.643264499216483e-08, "loss": 0.9293, "step": 68985 }, { "epoch": 0.99, "grad_norm": 0.55859375, "learning_rate": 6.552334169472874e-08, "loss": 0.992, "step": 68990 }, { "epoch": 0.99, "grad_norm": 0.63671875, "learning_rate": 6.462030240409478e-08, "loss": 0.8916, "step": 68995 }, { "epoch": 0.99, "grad_norm": 0.5234375, "learning_rate": 6.372352717688434e-08, "loss": 1.0083, "step": 69000 }, { "epoch": 0.99, "grad_norm": 0.55078125, "learning_rate": 6.283301606930803e-08, "loss": 0.8847, "step": 69005 }, { "epoch": 0.99, "grad_norm": 0.57421875, "learning_rate": 6.194876913716563e-08, "loss": 0.9906, "step": 69010 }, { "epoch": 0.99, "grad_norm": 0.52734375, "learning_rate": 6.107078643590169e-08, "loss": 1.1935, "step": 69015 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 6.019906802053887e-08, "loss": 0.8415, "step": 69020 }, { "epoch": 0.99, "grad_norm": 0.55078125, "learning_rate": 5.9333613945722344e-08, "loss": 0.9834, "step": 69025 }, { "epoch": 0.99, "grad_norm": 0.58984375, "learning_rate": 5.84744242656976e-08, "loss": 0.9222, "step": 69030 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 5.762149903432157e-08, "loss": 1.1191, "step": 69035 }, { "epoch": 0.99, "grad_norm": 0.546875, "learning_rate": 5.677483830505148e-08, "loss": 0.9117, "step": 69040 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 5.5934442130956e-08, "loss": 0.9156, "step": 69045 }, { "epoch": 0.99, "grad_norm": 0.58984375, "learning_rate": 5.510031056472631e-08, "loss": 0.877, "step": 69050 }, { "epoch": 0.99, "grad_norm": 0.5234375, "learning_rate": 5.4272443658631714e-08, "loss": 1.0652, "step": 69055 }, { "epoch": 0.99, "grad_norm": 0.54296875, "learning_rate": 5.3450841464564027e-08, "loss": 0.9143, "step": 69060 }, { "epoch": 0.99, "grad_norm": 0.55859375, "learning_rate": 5.263550403402651e-08, "loss": 0.9403, "step": 69065 }, { "epoch": 0.99, "grad_norm": 0.58203125, "learning_rate": 5.1826431418133816e-08, "loss": 0.9235, "step": 69070 }, { "epoch": 0.99, "grad_norm": 0.55859375, "learning_rate": 5.1023623667589834e-08, "loss": 0.8941, "step": 69075 }, { "epoch": 0.99, "grad_norm": 0.66015625, "learning_rate": 5.0227080832720985e-08, "loss": 0.9721, "step": 69080 }, { "epoch": 0.99, "grad_norm": 0.61328125, "learning_rate": 4.9436802963442886e-08, "loss": 1.0108, "step": 69085 }, { "epoch": 0.99, "grad_norm": 0.546875, "learning_rate": 4.865279010930479e-08, "loss": 0.9499, "step": 69090 }, { "epoch": 0.99, "grad_norm": 0.66796875, "learning_rate": 4.787504231944517e-08, "loss": 1.0272, "step": 69095 }, { "epoch": 0.99, "grad_norm": 0.5625, "learning_rate": 4.710355964262503e-08, "loss": 0.9583, "step": 69100 }, { "epoch": 0.99, "grad_norm": 0.56640625, "learning_rate": 4.633834212717236e-08, "loss": 0.9453, "step": 69105 }, { "epoch": 0.99, "grad_norm": 0.5703125, "learning_rate": 4.5579389821082117e-08, "loss": 1.0074, "step": 69110 }, { "epoch": 0.99, "grad_norm": 0.50390625, "learning_rate": 4.482670277190515e-08, "loss": 0.8634, "step": 69115 }, { "epoch": 0.99, "grad_norm": 0.5390625, "learning_rate": 4.4080281026837036e-08, "loss": 0.8821, "step": 69120 }, { "epoch": 0.99, "grad_norm": 0.4921875, "learning_rate": 4.3340124632651465e-08, "loss": 0.8397, "step": 69125 }, { "epoch": 0.99, "grad_norm": 0.59375, "learning_rate": 4.2606233635755775e-08, "loss": 1.0284, "step": 69130 }, { "epoch": 0.99, "grad_norm": 0.58203125, "learning_rate": 4.1878608082135397e-08, "loss": 0.9987, "step": 69135 }, { "epoch": 0.99, "grad_norm": 0.5078125, "learning_rate": 4.115724801740939e-08, "loss": 0.7926, "step": 69140 }, { "epoch": 0.99, "grad_norm": 0.56640625, "learning_rate": 4.044215348678604e-08, "loss": 0.8805, "step": 69145 }, { "epoch": 0.99, "grad_norm": 0.5625, "learning_rate": 3.973332453509615e-08, "loss": 0.9702, "step": 69150 }, { "epoch": 0.99, "grad_norm": 0.59375, "learning_rate": 3.903076120677085e-08, "loss": 0.995, "step": 69155 }, { "epoch": 0.99, "grad_norm": 0.515625, "learning_rate": 3.833446354584158e-08, "loss": 0.8779, "step": 69160 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 3.7644431595962314e-08, "loss": 0.8399, "step": 69165 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 3.6960665400365136e-08, "loss": 0.7854, "step": 69170 }, { "epoch": 0.99, "grad_norm": 0.51171875, "learning_rate": 3.628316500192686e-08, "loss": 0.9082, "step": 69175 }, { "epoch": 0.99, "grad_norm": 0.58203125, "learning_rate": 3.56119304431024e-08, "loss": 1.0525, "step": 69180 }, { "epoch": 0.99, "grad_norm": 0.55078125, "learning_rate": 3.494696176598034e-08, "loss": 0.8938, "step": 69185 }, { "epoch": 0.99, "grad_norm": 0.5625, "learning_rate": 3.428825901222732e-08, "loss": 0.9169, "step": 69190 }, { "epoch": 0.99, "grad_norm": 0.609375, "learning_rate": 3.363582222314365e-08, "loss": 0.9887, "step": 69195 }, { "epoch": 0.99, "grad_norm": 0.5390625, "learning_rate": 3.298965143961885e-08, "loss": 0.952, "step": 69200 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 3.234974670215385e-08, "loss": 1.1259, "step": 69205 }, { "epoch": 0.99, "grad_norm": 0.671875, "learning_rate": 3.1716108050861005e-08, "loss": 0.9038, "step": 69210 }, { "epoch": 0.99, "grad_norm": 0.57421875, "learning_rate": 3.1088735525453e-08, "loss": 1.0463, "step": 69215 }, { "epoch": 0.99, "grad_norm": 0.52734375, "learning_rate": 3.0467629165265025e-08, "loss": 0.9903, "step": 69220 }, { "epoch": 0.99, "grad_norm": 0.64453125, "learning_rate": 2.98527890092104e-08, "loss": 1.0275, "step": 69225 }, { "epoch": 0.99, "grad_norm": 0.6015625, "learning_rate": 2.9244215095847184e-08, "loss": 1.0958, "step": 69230 }, { "epoch": 0.99, "grad_norm": 0.50390625, "learning_rate": 2.8641907463311524e-08, "loss": 0.9467, "step": 69235 }, { "epoch": 0.99, "grad_norm": 0.58203125, "learning_rate": 2.804586614936211e-08, "loss": 0.9539, "step": 69240 }, { "epoch": 0.99, "grad_norm": 0.56640625, "learning_rate": 2.7456091191357945e-08, "loss": 0.9948, "step": 69245 }, { "epoch": 0.99, "grad_norm": 0.5546875, "learning_rate": 2.6872582626258357e-08, "loss": 1.0394, "step": 69250 }, { "epoch": 0.99, "grad_norm": 0.52734375, "learning_rate": 2.6295340490645192e-08, "loss": 1.0524, "step": 69255 }, { "epoch": 0.99, "grad_norm": 0.59375, "learning_rate": 2.5724364820711722e-08, "loss": 0.9632, "step": 69260 }, { "epoch": 0.99, "grad_norm": 0.56640625, "learning_rate": 2.515965565222933e-08, "loss": 0.9515, "step": 69265 }, { "epoch": 0.99, "grad_norm": 0.4921875, "learning_rate": 2.4601213020591928e-08, "loss": 0.8043, "step": 69270 }, { "epoch": 0.99, "grad_norm": 0.58984375, "learning_rate": 2.4049036960827053e-08, "loss": 0.8569, "step": 69275 }, { "epoch": 0.99, "grad_norm": 0.5234375, "learning_rate": 2.3503127507518153e-08, "loss": 0.8868, "step": 69280 }, { "epoch": 0.99, "grad_norm": 0.51953125, "learning_rate": 2.2963484694904504e-08, "loss": 0.8159, "step": 69285 }, { "epoch": 0.99, "grad_norm": 0.6484375, "learning_rate": 2.24301085568146e-08, "loss": 1.0896, "step": 69290 }, { "epoch": 0.99, "grad_norm": 0.5, "learning_rate": 2.1902999126655054e-08, "loss": 0.9988, "step": 69295 }, { "epoch": 0.99, "grad_norm": 0.5625, "learning_rate": 2.138215643748831e-08, "loss": 0.7695, "step": 69300 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 2.086758052194382e-08, "loss": 1.0433, "step": 69305 }, { "epoch": 0.99, "grad_norm": 0.609375, "learning_rate": 2.0359271412295766e-08, "loss": 0.9876, "step": 69310 }, { "epoch": 0.99, "grad_norm": 0.5859375, "learning_rate": 1.985722914039645e-08, "loss": 0.9633, "step": 69315 }, { "epoch": 0.99, "grad_norm": 0.5546875, "learning_rate": 1.936145373770959e-08, "loss": 1.1043, "step": 69320 }, { "epoch": 0.99, "grad_norm": 0.640625, "learning_rate": 1.887194523532143e-08, "loss": 1.0057, "step": 69325 }, { "epoch": 0.99, "grad_norm": 0.578125, "learning_rate": 1.8388703663907436e-08, "loss": 1.0526, "step": 69330 }, { "epoch": 0.99, "grad_norm": 0.5703125, "learning_rate": 1.791172905375449e-08, "loss": 0.9354, "step": 69335 }, { "epoch": 0.99, "grad_norm": 0.55859375, "learning_rate": 1.7441021434760896e-08, "loss": 0.8695, "step": 69340 }, { "epoch": 0.99, "grad_norm": 0.5546875, "learning_rate": 1.6976580836436385e-08, "loss": 0.9798, "step": 69345 }, { "epoch": 0.99, "grad_norm": 0.51953125, "learning_rate": 1.6518407287902105e-08, "loss": 0.8781, "step": 69350 }, { "epoch": 0.99, "grad_norm": 0.50390625, "learning_rate": 1.606650081785732e-08, "loss": 0.9757, "step": 69355 }, { "epoch": 0.99, "grad_norm": 0.57421875, "learning_rate": 1.562086145463493e-08, "loss": 0.9517, "step": 69360 }, { "epoch": 1.0, "grad_norm": 0.490234375, "learning_rate": 1.5181489226168132e-08, "loss": 0.8726, "step": 69365 }, { "epoch": 1.0, "grad_norm": 0.55078125, "learning_rate": 1.4748384160001571e-08, "loss": 1.044, "step": 69370 }, { "epoch": 1.0, "grad_norm": 0.5625, "learning_rate": 1.4321546283280197e-08, "loss": 0.9414, "step": 69375 }, { "epoch": 1.0, "grad_norm": 0.578125, "learning_rate": 1.3900975622760381e-08, "loss": 0.9418, "step": 69380 }, { "epoch": 1.0, "grad_norm": 0.462890625, "learning_rate": 1.3486672204798822e-08, "loss": 1.0239, "step": 69385 }, { "epoch": 1.0, "grad_norm": 0.61328125, "learning_rate": 1.3078636055374738e-08, "loss": 1.046, "step": 69390 }, { "epoch": 1.0, "grad_norm": 0.58203125, "learning_rate": 1.2676867200045461e-08, "loss": 0.9348, "step": 69395 }, { "epoch": 1.0, "grad_norm": 0.52734375, "learning_rate": 1.2281365664013056e-08, "loss": 0.8424, "step": 69400 }, { "epoch": 1.0, "grad_norm": 0.58203125, "learning_rate": 1.1892131472068801e-08, "loss": 1.0332, "step": 69405 }, { "epoch": 1.0, "grad_norm": 0.5234375, "learning_rate": 1.1509164648593195e-08, "loss": 1.0344, "step": 69410 }, { "epoch": 1.0, "grad_norm": 0.53515625, "learning_rate": 1.1132465217600363e-08, "loss": 1.0642, "step": 69415 }, { "epoch": 1.0, "grad_norm": 0.50390625, "learning_rate": 1.0762033202704746e-08, "loss": 0.9868, "step": 69420 }, { "epoch": 1.0, "grad_norm": 0.54296875, "learning_rate": 1.0397868627121111e-08, "loss": 0.951, "step": 69425 }, { "epoch": 1.0, "grad_norm": 0.6328125, "learning_rate": 1.0039971513675638e-08, "loss": 0.8531, "step": 69430 }, { "epoch": 1.0, "grad_norm": 0.50390625, "learning_rate": 9.68834188480594e-09, "loss": 1.0586, "step": 69435 }, { "epoch": 1.0, "grad_norm": 0.486328125, "learning_rate": 9.342979762549942e-09, "loss": 0.8869, "step": 69440 }, { "epoch": 1.0, "grad_norm": 0.54296875, "learning_rate": 9.003885168556991e-09, "loss": 0.9267, "step": 69445 }, { "epoch": 1.0, "grad_norm": 0.515625, "learning_rate": 8.67105812408786e-09, "loss": 1.0981, "step": 69450 }, { "epoch": 1.0, "grad_norm": 0.6328125, "learning_rate": 8.344498649981436e-09, "loss": 0.8177, "step": 69455 }, { "epoch": 1.0, "grad_norm": 0.61328125, "learning_rate": 8.024206766732435e-09, "loss": 1.07, "step": 69460 }, { "epoch": 1.0, "grad_norm": 0.515625, "learning_rate": 7.710182494413687e-09, "loss": 0.9687, "step": 69465 }, { "epoch": 1.0, "grad_norm": 0.578125, "learning_rate": 7.402425852687245e-09, "loss": 0.9563, "step": 69470 }, { "epoch": 1.0, "grad_norm": 0.59375, "learning_rate": 7.100936860870988e-09, "loss": 1.0244, "step": 69475 }, { "epoch": 1.0, "grad_norm": 0.5234375, "learning_rate": 6.8057155378498105e-09, "loss": 0.9991, "step": 69480 }, { "epoch": 1.0, "grad_norm": 0.5703125, "learning_rate": 6.5167619021200274e-09, "loss": 1.0204, "step": 69485 }, { "epoch": 1.0, "grad_norm": 0.55859375, "learning_rate": 6.2340759718115815e-09, "loss": 1.0564, "step": 69490 }, { "epoch": 1.0, "grad_norm": 0.498046875, "learning_rate": 5.957657764632529e-09, "loss": 0.9994, "step": 69495 }, { "epoch": 1.0, "grad_norm": 0.56640625, "learning_rate": 5.687507297913452e-09, "loss": 0.9983, "step": 69500 }, { "epoch": 1.0, "grad_norm": 0.5546875, "learning_rate": 5.42362458857415e-09, "loss": 0.8027, "step": 69505 }, { "epoch": 1.0, "grad_norm": 0.54296875, "learning_rate": 5.166009653179149e-09, "loss": 0.9545, "step": 69510 }, { "epoch": 1.0, "grad_norm": 0.498046875, "learning_rate": 4.9146625078599906e-09, "loss": 0.9419, "step": 69515 }, { "epoch": 1.0, "grad_norm": 0.515625, "learning_rate": 4.669583168370739e-09, "loss": 1.0382, "step": 69520 }, { "epoch": 1.0, "grad_norm": 0.5546875, "learning_rate": 4.4307716500768816e-09, "loss": 0.9006, "step": 69525 }, { "epoch": 1.0, "grad_norm": 0.5703125, "learning_rate": 4.198227967944224e-09, "loss": 0.8615, "step": 69530 }, { "epoch": 1.0, "grad_norm": 0.4921875, "learning_rate": 3.9719521365610964e-09, "loss": 1.0192, "step": 69535 }, { "epoch": 1.0, "grad_norm": 0.53125, "learning_rate": 3.751944170105048e-09, "loss": 1.0237, "step": 69540 }, { "epoch": 1.0, "grad_norm": 0.54296875, "learning_rate": 3.5382040823539464e-09, "loss": 0.9387, "step": 69545 }, { "epoch": 1.0, "grad_norm": 0.578125, "learning_rate": 3.330731886719285e-09, "loss": 0.8864, "step": 69550 }, { "epoch": 1.0, "grad_norm": 0.51171875, "learning_rate": 3.1295275961906733e-09, "loss": 0.9047, "step": 69555 }, { "epoch": 1.0, "grad_norm": 0.53515625, "learning_rate": 2.93459122340245e-09, "loss": 0.8942, "step": 69560 }, { "epoch": 1.0, "grad_norm": 0.53515625, "learning_rate": 2.745922780555965e-09, "loss": 0.8649, "step": 69565 }, { "epoch": 1.0, "grad_norm": 0.55078125, "learning_rate": 2.563522279486197e-09, "loss": 1.0669, "step": 69570 }, { "epoch": 1.0, "grad_norm": 0.5234375, "learning_rate": 2.3873897316173397e-09, "loss": 0.9068, "step": 69575 }, { "epoch": 1.0, "grad_norm": 0.57421875, "learning_rate": 2.2175251480072156e-09, "loss": 0.8953, "step": 69580 }, { "epoch": 1.0, "grad_norm": 0.56640625, "learning_rate": 2.0539285392806584e-09, "loss": 0.9438, "step": 69585 }, { "epoch": 1.0, "grad_norm": 0.59375, "learning_rate": 1.896599915696129e-09, "loss": 1.0569, "step": 69590 }, { "epoch": 1.0, "grad_norm": 0.58984375, "learning_rate": 1.74553928712351e-09, "loss": 0.9274, "step": 69595 }, { "epoch": 1.0, "grad_norm": 0.5625, "learning_rate": 1.6007466630330036e-09, "loss": 0.8161, "step": 69600 }, { "epoch": 1.0, "grad_norm": 0.55859375, "learning_rate": 1.4622220524951324e-09, "loss": 1.098, "step": 69605 }, { "epoch": 1.0, "grad_norm": 0.5859375, "learning_rate": 1.3299654642029425e-09, "loss": 0.9883, "step": 69610 }, { "epoch": 1.0, "grad_norm": 0.52734375, "learning_rate": 1.2039769064275952e-09, "loss": 0.9939, "step": 69615 }, { "epoch": 1.0, "grad_norm": 0.57421875, "learning_rate": 1.0842563870738788e-09, "loss": 0.9728, "step": 69620 }, { "epoch": 1.0, "grad_norm": 0.5234375, "learning_rate": 9.708039136580028e-10, "loss": 0.8537, "step": 69625 }, { "epoch": 1.0, "grad_norm": 0.51171875, "learning_rate": 8.636194932742925e-10, "loss": 0.9281, "step": 69630 }, { "epoch": 1.0, "grad_norm": 0.55859375, "learning_rate": 7.627031326395973e-10, "loss": 0.9818, "step": 69635 }, { "epoch": 1.0, "grad_norm": 0.59765625, "learning_rate": 6.680548381043927e-10, "loss": 0.979, "step": 69640 }, { "epoch": 1.0, "grad_norm": 0.56640625, "learning_rate": 5.796746155750654e-10, "loss": 0.9615, "step": 69645 }, { "epoch": 1.0, "grad_norm": 0.52734375, "learning_rate": 4.975624706027305e-10, "loss": 0.932, "step": 69650 }, { "epoch": 1.0, "grad_norm": 0.5390625, "learning_rate": 4.2171840833882257e-10, "loss": 0.9145, "step": 69655 }, { "epoch": 1.0, "grad_norm": 0.63671875, "learning_rate": 3.5214243353509646e-10, "loss": 1.0162, "step": 69660 }, { "epoch": 1.0, "grad_norm": 0.45703125, "learning_rate": 2.888345505436263e-10, "loss": 1.0425, "step": 69665 }, { "epoch": 1.0, "grad_norm": 0.5390625, "learning_rate": 2.3179476332790827e-10, "loss": 0.9054, "step": 69670 }, { "epoch": 1.0, "grad_norm": 0.466796875, "learning_rate": 1.8102307548506503e-10, "loss": 0.9386, "step": 69675 }, { "epoch": 1.0, "grad_norm": 0.55078125, "learning_rate": 1.3651949017923216e-10, "loss": 0.9464, "step": 69680 }, { "epoch": 1.0, "grad_norm": 0.5390625, "learning_rate": 9.828401021927392e-11, "loss": 0.9135, "step": 69685 }, { "epoch": 1.0, "grad_norm": 0.5390625, "learning_rate": 6.631663798106758e-11, "loss": 0.8707, "step": 69690 }, { "epoch": 1.0, "grad_norm": 0.5234375, "learning_rate": 4.061737547411681e-11, "loss": 0.9513, "step": 69695 }, { "epoch": 1.0, "grad_norm": 0.56640625, "learning_rate": 2.1186224308245018e-11, "loss": 0.9132, "step": 69700 }, { "epoch": 1.0, "grad_norm": 0.5625, "learning_rate": 8.023185715799742e-12, "loss": 1.0332, "step": 69705 }, { "epoch": 1.0, "grad_norm": 0.54296875, "learning_rate": 1.1282605072437947e-12, "loss": 0.8443, "step": 69710 }, { "epoch": 1.0, "eval_loss": 0.9567210078239441, "eval_runtime": 6401.306, "eval_samples_per_second": 2.411, "eval_steps_per_second": 1.205, "step": 69713 }, { "epoch": 1.0, "step": 69713, "total_flos": 1.225431715317547e+19, "train_loss": 0.9623549959262275, "train_runtime": 219590.8488, "train_samples_per_second": 0.635, "train_steps_per_second": 0.317 } ], "logging_steps": 5, "max_steps": 69713, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 1.225431715317547e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }