diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,97639 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 69713, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 0.5, + "learning_rate": 2.8686173264486516e-08, + "loss": 0.859, + "step": 1 + }, + { + "epoch": 0.0, + "grad_norm": 0.56640625, + "learning_rate": 1.434308663224326e-07, + "loss": 1.2508, + "step": 5 + }, + { + "epoch": 0.0, + "grad_norm": 0.62109375, + "learning_rate": 2.868617326448652e-07, + "loss": 1.1806, + "step": 10 + }, + { + "epoch": 0.0, + "grad_norm": 0.62890625, + "learning_rate": 4.3029259896729773e-07, + "loss": 1.1581, + "step": 15 + }, + { + "epoch": 0.0, + "grad_norm": 0.58984375, + "learning_rate": 5.737234652897304e-07, + "loss": 1.0476, + "step": 20 + }, + { + "epoch": 0.0, + "grad_norm": 0.98828125, + "learning_rate": 7.17154331612163e-07, + "loss": 1.126, + "step": 25 + }, + { + "epoch": 0.0, + "grad_norm": 0.72265625, + "learning_rate": 8.605851979345955e-07, + "loss": 1.1032, + "step": 30 + }, + { + "epoch": 0.0, + "grad_norm": 0.6171875, + "learning_rate": 1.0040160642570282e-06, + "loss": 1.1629, + "step": 35 + }, + { + "epoch": 0.0, + "grad_norm": 0.455078125, + "learning_rate": 1.1474469305794607e-06, + "loss": 1.2222, + "step": 40 + }, + { + "epoch": 0.0, + "grad_norm": 0.498046875, + "learning_rate": 1.2908777969018933e-06, + "loss": 1.307, + "step": 45 + }, + { + "epoch": 0.0, + "grad_norm": 0.56640625, + "learning_rate": 1.434308663224326e-06, + "loss": 1.0781, + "step": 50 + }, + { + "epoch": 0.0, + "grad_norm": 0.703125, + "learning_rate": 1.5777395295467586e-06, + "loss": 1.2634, + "step": 55 + }, + { + "epoch": 0.0, + "grad_norm": 0.546875, + "learning_rate": 1.721170395869191e-06, + "loss": 1.0647, + "step": 60 + }, + { + "epoch": 0.0, + "grad_norm": 0.6484375, + "learning_rate": 1.8646012621916239e-06, + "loss": 1.1197, + "step": 65 + }, + { + "epoch": 0.0, + "grad_norm": 0.451171875, + "learning_rate": 2.0080321285140564e-06, + "loss": 1.0787, + "step": 70 + }, + { + "epoch": 0.0, + "grad_norm": 0.6875, + "learning_rate": 2.151462994836489e-06, + "loss": 1.0495, + "step": 75 + }, + { + "epoch": 0.0, + "grad_norm": 0.5234375, + "learning_rate": 2.2948938611589215e-06, + "loss": 1.1097, + "step": 80 + }, + { + "epoch": 0.0, + "grad_norm": 0.64453125, + "learning_rate": 2.438324727481354e-06, + "loss": 1.0675, + "step": 85 + }, + { + "epoch": 0.0, + "grad_norm": 0.546875, + "learning_rate": 2.5817555938037866e-06, + "loss": 1.2212, + "step": 90 + }, + { + "epoch": 0.0, + "grad_norm": 0.60546875, + "learning_rate": 2.725186460126219e-06, + "loss": 1.0741, + "step": 95 + }, + { + "epoch": 0.0, + "grad_norm": 0.49609375, + "learning_rate": 2.868617326448652e-06, + "loss": 1.2163, + "step": 100 + }, + { + "epoch": 0.0, + "grad_norm": 0.6171875, + "learning_rate": 3.0120481927710846e-06, + "loss": 1.1092, + "step": 105 + }, + { + "epoch": 0.0, + "grad_norm": 0.55859375, + "learning_rate": 3.155479059093517e-06, + "loss": 1.1085, + "step": 110 + }, + { + "epoch": 0.0, + "grad_norm": 0.54296875, + "learning_rate": 3.2989099254159493e-06, + "loss": 0.9502, + "step": 115 + }, + { + "epoch": 0.0, + "grad_norm": 0.4921875, + "learning_rate": 3.442340791738382e-06, + "loss": 1.1783, + "step": 120 + }, + { + "epoch": 0.0, + "grad_norm": 0.466796875, + "learning_rate": 3.585771658060815e-06, + "loss": 1.1416, + "step": 125 + }, + { + "epoch": 0.0, + "grad_norm": 0.51171875, + "learning_rate": 3.7292025243832477e-06, + "loss": 1.0175, + "step": 130 + }, + { + "epoch": 0.0, + "grad_norm": 0.609375, + "learning_rate": 3.87263339070568e-06, + "loss": 1.1436, + "step": 135 + }, + { + "epoch": 0.0, + "grad_norm": 0.51953125, + "learning_rate": 4.016064257028113e-06, + "loss": 1.0344, + "step": 140 + }, + { + "epoch": 0.0, + "grad_norm": 0.49609375, + "learning_rate": 4.159495123350545e-06, + "loss": 1.1485, + "step": 145 + }, + { + "epoch": 0.0, + "grad_norm": 0.4921875, + "learning_rate": 4.302925989672978e-06, + "loss": 1.2148, + "step": 150 + }, + { + "epoch": 0.0, + "grad_norm": 0.62109375, + "learning_rate": 4.4463568559954104e-06, + "loss": 0.9588, + "step": 155 + }, + { + "epoch": 0.0, + "grad_norm": 0.55859375, + "learning_rate": 4.589787722317843e-06, + "loss": 1.1672, + "step": 160 + }, + { + "epoch": 0.0, + "grad_norm": 0.53515625, + "learning_rate": 4.7332185886402755e-06, + "loss": 1.0145, + "step": 165 + }, + { + "epoch": 0.0, + "grad_norm": 0.44140625, + "learning_rate": 4.876649454962708e-06, + "loss": 1.1636, + "step": 170 + }, + { + "epoch": 0.0, + "grad_norm": 0.490234375, + "learning_rate": 5.020080321285141e-06, + "loss": 1.1657, + "step": 175 + }, + { + "epoch": 0.0, + "grad_norm": 0.4609375, + "learning_rate": 5.163511187607573e-06, + "loss": 1.053, + "step": 180 + }, + { + "epoch": 0.0, + "grad_norm": 0.51953125, + "learning_rate": 5.306942053930006e-06, + "loss": 1.0516, + "step": 185 + }, + { + "epoch": 0.0, + "grad_norm": 0.56640625, + "learning_rate": 5.450372920252438e-06, + "loss": 1.0715, + "step": 190 + }, + { + "epoch": 0.0, + "grad_norm": 0.44921875, + "learning_rate": 5.593803786574872e-06, + "loss": 0.9784, + "step": 195 + }, + { + "epoch": 0.0, + "grad_norm": 0.5546875, + "learning_rate": 5.737234652897304e-06, + "loss": 1.0953, + "step": 200 + }, + { + "epoch": 0.0, + "grad_norm": 0.466796875, + "learning_rate": 5.880665519219737e-06, + "loss": 0.9477, + "step": 205 + }, + { + "epoch": 0.0, + "grad_norm": 0.5234375, + "learning_rate": 6.024096385542169e-06, + "loss": 1.1993, + "step": 210 + }, + { + "epoch": 0.0, + "grad_norm": 0.546875, + "learning_rate": 6.167527251864602e-06, + "loss": 1.1235, + "step": 215 + }, + { + "epoch": 0.0, + "grad_norm": 0.6015625, + "learning_rate": 6.310958118187034e-06, + "loss": 1.0081, + "step": 220 + }, + { + "epoch": 0.0, + "grad_norm": 0.478515625, + "learning_rate": 6.454388984509467e-06, + "loss": 1.0265, + "step": 225 + }, + { + "epoch": 0.0, + "grad_norm": 0.44921875, + "learning_rate": 6.5978198508318986e-06, + "loss": 0.9588, + "step": 230 + }, + { + "epoch": 0.0, + "grad_norm": 0.498046875, + "learning_rate": 6.741250717154332e-06, + "loss": 1.0807, + "step": 235 + }, + { + "epoch": 0.0, + "grad_norm": 0.6015625, + "learning_rate": 6.884681583476764e-06, + "loss": 1.214, + "step": 240 + }, + { + "epoch": 0.0, + "grad_norm": 0.494140625, + "learning_rate": 7.028112449799197e-06, + "loss": 1.1833, + "step": 245 + }, + { + "epoch": 0.0, + "grad_norm": 0.50390625, + "learning_rate": 7.17154331612163e-06, + "loss": 1.0432, + "step": 250 + }, + { + "epoch": 0.0, + "grad_norm": 0.5078125, + "learning_rate": 7.314974182444062e-06, + "loss": 1.1392, + "step": 255 + }, + { + "epoch": 0.0, + "grad_norm": 0.5546875, + "learning_rate": 7.4584050487664955e-06, + "loss": 1.08, + "step": 260 + }, + { + "epoch": 0.0, + "grad_norm": 0.5859375, + "learning_rate": 7.601835915088927e-06, + "loss": 0.9535, + "step": 265 + }, + { + "epoch": 0.0, + "grad_norm": 0.6171875, + "learning_rate": 7.74526678141136e-06, + "loss": 1.1902, + "step": 270 + }, + { + "epoch": 0.0, + "grad_norm": 0.61328125, + "learning_rate": 7.888697647733792e-06, + "loss": 1.0534, + "step": 275 + }, + { + "epoch": 0.0, + "grad_norm": 0.46484375, + "learning_rate": 8.032128514056226e-06, + "loss": 0.9991, + "step": 280 + }, + { + "epoch": 0.0, + "grad_norm": 0.5234375, + "learning_rate": 8.175559380378659e-06, + "loss": 1.03, + "step": 285 + }, + { + "epoch": 0.0, + "grad_norm": 0.498046875, + "learning_rate": 8.31899024670109e-06, + "loss": 1.0939, + "step": 290 + }, + { + "epoch": 0.0, + "grad_norm": 0.53515625, + "learning_rate": 8.462421113023524e-06, + "loss": 1.1392, + "step": 295 + }, + { + "epoch": 0.0, + "grad_norm": 0.49609375, + "learning_rate": 8.605851979345956e-06, + "loss": 0.9538, + "step": 300 + }, + { + "epoch": 0.0, + "grad_norm": 0.490234375, + "learning_rate": 8.74928284566839e-06, + "loss": 0.9449, + "step": 305 + }, + { + "epoch": 0.0, + "grad_norm": 0.51171875, + "learning_rate": 8.892713711990821e-06, + "loss": 1.1433, + "step": 310 + }, + { + "epoch": 0.0, + "grad_norm": 0.439453125, + "learning_rate": 9.036144578313253e-06, + "loss": 0.9169, + "step": 315 + }, + { + "epoch": 0.0, + "grad_norm": 0.50390625, + "learning_rate": 9.179575444635686e-06, + "loss": 0.9726, + "step": 320 + }, + { + "epoch": 0.0, + "grad_norm": 0.51171875, + "learning_rate": 9.323006310958118e-06, + "loss": 1.2255, + "step": 325 + }, + { + "epoch": 0.0, + "grad_norm": 0.60546875, + "learning_rate": 9.466437177280551e-06, + "loss": 1.2127, + "step": 330 + }, + { + "epoch": 0.0, + "grad_norm": 0.50390625, + "learning_rate": 9.609868043602983e-06, + "loss": 1.0853, + "step": 335 + }, + { + "epoch": 0.0, + "grad_norm": 0.421875, + "learning_rate": 9.753298909925416e-06, + "loss": 0.9329, + "step": 340 + }, + { + "epoch": 0.0, + "grad_norm": 0.4609375, + "learning_rate": 9.896729776247848e-06, + "loss": 0.8877, + "step": 345 + }, + { + "epoch": 0.01, + "grad_norm": 0.515625, + "learning_rate": 1.0040160642570281e-05, + "loss": 1.2255, + "step": 350 + }, + { + "epoch": 0.01, + "grad_norm": 0.70703125, + "learning_rate": 1.0183591508892715e-05, + "loss": 0.9594, + "step": 355 + }, + { + "epoch": 0.01, + "grad_norm": 0.51171875, + "learning_rate": 1.0327022375215146e-05, + "loss": 1.2497, + "step": 360 + }, + { + "epoch": 0.01, + "grad_norm": 0.498046875, + "learning_rate": 1.047045324153758e-05, + "loss": 0.9763, + "step": 365 + }, + { + "epoch": 0.01, + "grad_norm": 0.490234375, + "learning_rate": 1.0613884107860011e-05, + "loss": 0.9794, + "step": 370 + }, + { + "epoch": 0.01, + "grad_norm": 0.54296875, + "learning_rate": 1.0757314974182445e-05, + "loss": 0.9919, + "step": 375 + }, + { + "epoch": 0.01, + "grad_norm": 0.4765625, + "learning_rate": 1.0900745840504876e-05, + "loss": 0.9107, + "step": 380 + }, + { + "epoch": 0.01, + "grad_norm": 0.453125, + "learning_rate": 1.104417670682731e-05, + "loss": 1.1007, + "step": 385 + }, + { + "epoch": 0.01, + "grad_norm": 0.5546875, + "learning_rate": 1.1187607573149743e-05, + "loss": 1.1553, + "step": 390 + }, + { + "epoch": 0.01, + "grad_norm": 0.47265625, + "learning_rate": 1.1331038439472175e-05, + "loss": 0.9138, + "step": 395 + }, + { + "epoch": 0.01, + "grad_norm": 0.54296875, + "learning_rate": 1.1474469305794608e-05, + "loss": 0.9199, + "step": 400 + }, + { + "epoch": 0.01, + "grad_norm": 0.466796875, + "learning_rate": 1.161790017211704e-05, + "loss": 0.8803, + "step": 405 + }, + { + "epoch": 0.01, + "grad_norm": 0.52734375, + "learning_rate": 1.1761331038439473e-05, + "loss": 0.9837, + "step": 410 + }, + { + "epoch": 0.01, + "grad_norm": 0.416015625, + "learning_rate": 1.1904761904761905e-05, + "loss": 1.0048, + "step": 415 + }, + { + "epoch": 0.01, + "grad_norm": 0.5859375, + "learning_rate": 1.2048192771084338e-05, + "loss": 0.9014, + "step": 420 + }, + { + "epoch": 0.01, + "grad_norm": 0.498046875, + "learning_rate": 1.2191623637406772e-05, + "loss": 0.9297, + "step": 425 + }, + { + "epoch": 0.01, + "grad_norm": 0.50390625, + "learning_rate": 1.2335054503729204e-05, + "loss": 1.0564, + "step": 430 + }, + { + "epoch": 0.01, + "grad_norm": 0.486328125, + "learning_rate": 1.2478485370051635e-05, + "loss": 0.9937, + "step": 435 + }, + { + "epoch": 0.01, + "grad_norm": 0.494140625, + "learning_rate": 1.2621916236374069e-05, + "loss": 0.9515, + "step": 440 + }, + { + "epoch": 0.01, + "grad_norm": 0.51953125, + "learning_rate": 1.27653471026965e-05, + "loss": 1.1943, + "step": 445 + }, + { + "epoch": 0.01, + "grad_norm": 0.478515625, + "learning_rate": 1.2908777969018934e-05, + "loss": 1.0089, + "step": 450 + }, + { + "epoch": 0.01, + "grad_norm": 0.578125, + "learning_rate": 1.3052208835341367e-05, + "loss": 0.9401, + "step": 455 + }, + { + "epoch": 0.01, + "grad_norm": 0.5078125, + "learning_rate": 1.3195639701663797e-05, + "loss": 1.0221, + "step": 460 + }, + { + "epoch": 0.01, + "grad_norm": 0.51953125, + "learning_rate": 1.333907056798623e-05, + "loss": 0.9598, + "step": 465 + }, + { + "epoch": 0.01, + "grad_norm": 0.5390625, + "learning_rate": 1.3482501434308664e-05, + "loss": 1.02, + "step": 470 + }, + { + "epoch": 0.01, + "grad_norm": 0.498046875, + "learning_rate": 1.3625932300631097e-05, + "loss": 1.0364, + "step": 475 + }, + { + "epoch": 0.01, + "grad_norm": 0.482421875, + "learning_rate": 1.3769363166953527e-05, + "loss": 1.068, + "step": 480 + }, + { + "epoch": 0.01, + "grad_norm": 0.50390625, + "learning_rate": 1.391279403327596e-05, + "loss": 1.1302, + "step": 485 + }, + { + "epoch": 0.01, + "grad_norm": 0.44921875, + "learning_rate": 1.4056224899598394e-05, + "loss": 0.9552, + "step": 490 + }, + { + "epoch": 0.01, + "grad_norm": 0.54296875, + "learning_rate": 1.4199655765920827e-05, + "loss": 1.1418, + "step": 495 + }, + { + "epoch": 0.01, + "grad_norm": 0.474609375, + "learning_rate": 1.434308663224326e-05, + "loss": 0.9391, + "step": 500 + }, + { + "epoch": 0.01, + "grad_norm": 0.5625, + "learning_rate": 1.448651749856569e-05, + "loss": 1.0627, + "step": 505 + }, + { + "epoch": 0.01, + "grad_norm": 0.50390625, + "learning_rate": 1.4629948364888124e-05, + "loss": 1.0318, + "step": 510 + }, + { + "epoch": 0.01, + "grad_norm": 0.4921875, + "learning_rate": 1.4773379231210558e-05, + "loss": 0.9085, + "step": 515 + }, + { + "epoch": 0.01, + "grad_norm": 0.5, + "learning_rate": 1.4916810097532991e-05, + "loss": 0.9348, + "step": 520 + }, + { + "epoch": 0.01, + "grad_norm": 0.5078125, + "learning_rate": 1.5060240963855424e-05, + "loss": 1.0198, + "step": 525 + }, + { + "epoch": 0.01, + "grad_norm": 0.546875, + "learning_rate": 1.5203671830177854e-05, + "loss": 0.9923, + "step": 530 + }, + { + "epoch": 0.01, + "grad_norm": 0.462890625, + "learning_rate": 1.5347102696500288e-05, + "loss": 1.1061, + "step": 535 + }, + { + "epoch": 0.01, + "grad_norm": 0.52734375, + "learning_rate": 1.549053356282272e-05, + "loss": 0.9106, + "step": 540 + }, + { + "epoch": 0.01, + "grad_norm": 0.51171875, + "learning_rate": 1.5633964429145155e-05, + "loss": 0.9399, + "step": 545 + }, + { + "epoch": 0.01, + "grad_norm": 0.50390625, + "learning_rate": 1.5777395295467585e-05, + "loss": 0.9962, + "step": 550 + }, + { + "epoch": 0.01, + "grad_norm": 0.47265625, + "learning_rate": 1.5920826161790018e-05, + "loss": 1.0109, + "step": 555 + }, + { + "epoch": 0.01, + "grad_norm": 0.5, + "learning_rate": 1.606425702811245e-05, + "loss": 0.9186, + "step": 560 + }, + { + "epoch": 0.01, + "grad_norm": 0.474609375, + "learning_rate": 1.6207687894434885e-05, + "loss": 1.1396, + "step": 565 + }, + { + "epoch": 0.01, + "grad_norm": 0.470703125, + "learning_rate": 1.6351118760757318e-05, + "loss": 1.006, + "step": 570 + }, + { + "epoch": 0.01, + "grad_norm": 0.5078125, + "learning_rate": 1.6494549627079748e-05, + "loss": 0.9358, + "step": 575 + }, + { + "epoch": 0.01, + "grad_norm": 0.4609375, + "learning_rate": 1.663798049340218e-05, + "loss": 1.0365, + "step": 580 + }, + { + "epoch": 0.01, + "grad_norm": 0.474609375, + "learning_rate": 1.6781411359724615e-05, + "loss": 0.976, + "step": 585 + }, + { + "epoch": 0.01, + "grad_norm": 0.490234375, + "learning_rate": 1.6924842226047048e-05, + "loss": 0.9621, + "step": 590 + }, + { + "epoch": 0.01, + "grad_norm": 0.546875, + "learning_rate": 1.706827309236948e-05, + "loss": 1.0107, + "step": 595 + }, + { + "epoch": 0.01, + "grad_norm": 0.4921875, + "learning_rate": 1.721170395869191e-05, + "loss": 0.972, + "step": 600 + }, + { + "epoch": 0.01, + "grad_norm": 0.76171875, + "learning_rate": 1.7355134825014345e-05, + "loss": 1.0638, + "step": 605 + }, + { + "epoch": 0.01, + "grad_norm": 0.4921875, + "learning_rate": 1.749856569133678e-05, + "loss": 0.985, + "step": 610 + }, + { + "epoch": 0.01, + "grad_norm": 0.48046875, + "learning_rate": 1.764199655765921e-05, + "loss": 0.9469, + "step": 615 + }, + { + "epoch": 0.01, + "grad_norm": 0.57421875, + "learning_rate": 1.7785427423981642e-05, + "loss": 1.1185, + "step": 620 + }, + { + "epoch": 0.01, + "grad_norm": 0.54296875, + "learning_rate": 1.7928858290304075e-05, + "loss": 1.0834, + "step": 625 + }, + { + "epoch": 0.01, + "grad_norm": 0.5546875, + "learning_rate": 1.8072289156626505e-05, + "loss": 0.9151, + "step": 630 + }, + { + "epoch": 0.01, + "grad_norm": 0.609375, + "learning_rate": 1.821572002294894e-05, + "loss": 0.8918, + "step": 635 + }, + { + "epoch": 0.01, + "grad_norm": 0.478515625, + "learning_rate": 1.8359150889271372e-05, + "loss": 1.0947, + "step": 640 + }, + { + "epoch": 0.01, + "grad_norm": 0.52734375, + "learning_rate": 1.8502581755593802e-05, + "loss": 0.9554, + "step": 645 + }, + { + "epoch": 0.01, + "grad_norm": 0.50390625, + "learning_rate": 1.8646012621916235e-05, + "loss": 1.0884, + "step": 650 + }, + { + "epoch": 0.01, + "grad_norm": 0.546875, + "learning_rate": 1.878944348823867e-05, + "loss": 0.9896, + "step": 655 + }, + { + "epoch": 0.01, + "grad_norm": 0.51171875, + "learning_rate": 1.8932874354561102e-05, + "loss": 1.026, + "step": 660 + }, + { + "epoch": 0.01, + "grad_norm": 0.5546875, + "learning_rate": 1.9076305220883535e-05, + "loss": 1.047, + "step": 665 + }, + { + "epoch": 0.01, + "grad_norm": 0.55078125, + "learning_rate": 1.9219736087205965e-05, + "loss": 0.9945, + "step": 670 + }, + { + "epoch": 0.01, + "grad_norm": 0.6171875, + "learning_rate": 1.93631669535284e-05, + "loss": 1.0214, + "step": 675 + }, + { + "epoch": 0.01, + "grad_norm": 0.52734375, + "learning_rate": 1.9506597819850832e-05, + "loss": 0.9594, + "step": 680 + }, + { + "epoch": 0.01, + "grad_norm": 0.50390625, + "learning_rate": 1.9650028686173266e-05, + "loss": 0.9142, + "step": 685 + }, + { + "epoch": 0.01, + "grad_norm": 0.54296875, + "learning_rate": 1.9793459552495696e-05, + "loss": 1.0362, + "step": 690 + }, + { + "epoch": 0.01, + "grad_norm": 0.50390625, + "learning_rate": 1.993689041881813e-05, + "loss": 0.9183, + "step": 695 + }, + { + "epoch": 0.01, + "grad_norm": 0.52734375, + "learning_rate": 2.0080321285140562e-05, + "loss": 1.0075, + "step": 700 + }, + { + "epoch": 0.01, + "grad_norm": 0.458984375, + "learning_rate": 2.0223752151462996e-05, + "loss": 0.9477, + "step": 705 + }, + { + "epoch": 0.01, + "grad_norm": 0.640625, + "learning_rate": 2.036718301778543e-05, + "loss": 1.0359, + "step": 710 + }, + { + "epoch": 0.01, + "grad_norm": 0.49609375, + "learning_rate": 2.051061388410786e-05, + "loss": 1.0742, + "step": 715 + }, + { + "epoch": 0.01, + "grad_norm": 0.61328125, + "learning_rate": 2.0654044750430293e-05, + "loss": 0.8609, + "step": 720 + }, + { + "epoch": 0.01, + "grad_norm": 0.4921875, + "learning_rate": 2.0797475616752726e-05, + "loss": 1.1525, + "step": 725 + }, + { + "epoch": 0.01, + "grad_norm": 0.5078125, + "learning_rate": 2.094090648307516e-05, + "loss": 0.9171, + "step": 730 + }, + { + "epoch": 0.01, + "grad_norm": 0.5078125, + "learning_rate": 2.1084337349397593e-05, + "loss": 1.0555, + "step": 735 + }, + { + "epoch": 0.01, + "grad_norm": 0.470703125, + "learning_rate": 2.1227768215720023e-05, + "loss": 1.0368, + "step": 740 + }, + { + "epoch": 0.01, + "grad_norm": 0.51171875, + "learning_rate": 2.1371199082042456e-05, + "loss": 0.9625, + "step": 745 + }, + { + "epoch": 0.01, + "grad_norm": 0.5234375, + "learning_rate": 2.151462994836489e-05, + "loss": 1.0213, + "step": 750 + }, + { + "epoch": 0.01, + "grad_norm": 0.58203125, + "learning_rate": 2.1658060814687323e-05, + "loss": 1.1338, + "step": 755 + }, + { + "epoch": 0.01, + "grad_norm": 0.53515625, + "learning_rate": 2.1801491681009753e-05, + "loss": 0.9872, + "step": 760 + }, + { + "epoch": 0.01, + "grad_norm": 0.5390625, + "learning_rate": 2.1944922547332186e-05, + "loss": 1.0546, + "step": 765 + }, + { + "epoch": 0.01, + "grad_norm": 0.57421875, + "learning_rate": 2.208835341365462e-05, + "loss": 1.1086, + "step": 770 + }, + { + "epoch": 0.01, + "grad_norm": 0.515625, + "learning_rate": 2.2231784279977053e-05, + "loss": 0.936, + "step": 775 + }, + { + "epoch": 0.01, + "grad_norm": 0.55859375, + "learning_rate": 2.2375215146299486e-05, + "loss": 1.058, + "step": 780 + }, + { + "epoch": 0.01, + "grad_norm": 0.546875, + "learning_rate": 2.2518646012621916e-05, + "loss": 0.88, + "step": 785 + }, + { + "epoch": 0.01, + "grad_norm": 0.52734375, + "learning_rate": 2.266207687894435e-05, + "loss": 1.0216, + "step": 790 + }, + { + "epoch": 0.01, + "grad_norm": 0.56640625, + "learning_rate": 2.2805507745266783e-05, + "loss": 0.8847, + "step": 795 + }, + { + "epoch": 0.01, + "grad_norm": 3.5, + "learning_rate": 2.2948938611589217e-05, + "loss": 0.9893, + "step": 800 + }, + { + "epoch": 0.01, + "grad_norm": 0.52734375, + "learning_rate": 2.309236947791165e-05, + "loss": 0.9958, + "step": 805 + }, + { + "epoch": 0.01, + "grad_norm": 0.56640625, + "learning_rate": 2.323580034423408e-05, + "loss": 0.9778, + "step": 810 + }, + { + "epoch": 0.01, + "grad_norm": 0.51953125, + "learning_rate": 2.3379231210556513e-05, + "loss": 1.105, + "step": 815 + }, + { + "epoch": 0.01, + "grad_norm": 0.494140625, + "learning_rate": 2.3522662076878947e-05, + "loss": 1.0109, + "step": 820 + }, + { + "epoch": 0.01, + "grad_norm": 0.53515625, + "learning_rate": 2.366609294320138e-05, + "loss": 1.1851, + "step": 825 + }, + { + "epoch": 0.01, + "grad_norm": 0.51171875, + "learning_rate": 2.380952380952381e-05, + "loss": 0.9586, + "step": 830 + }, + { + "epoch": 0.01, + "grad_norm": 0.56640625, + "learning_rate": 2.3952954675846244e-05, + "loss": 1.0967, + "step": 835 + }, + { + "epoch": 0.01, + "grad_norm": 0.53515625, + "learning_rate": 2.4096385542168677e-05, + "loss": 1.1236, + "step": 840 + }, + { + "epoch": 0.01, + "grad_norm": 0.53515625, + "learning_rate": 2.423981640849111e-05, + "loss": 0.8882, + "step": 845 + }, + { + "epoch": 0.01, + "grad_norm": 0.478515625, + "learning_rate": 2.4383247274813544e-05, + "loss": 1.0636, + "step": 850 + }, + { + "epoch": 0.01, + "grad_norm": 0.62890625, + "learning_rate": 2.4526678141135974e-05, + "loss": 0.9651, + "step": 855 + }, + { + "epoch": 0.01, + "grad_norm": 0.52734375, + "learning_rate": 2.4670109007458407e-05, + "loss": 1.023, + "step": 860 + }, + { + "epoch": 0.01, + "grad_norm": 0.5390625, + "learning_rate": 2.481353987378084e-05, + "loss": 0.9432, + "step": 865 + }, + { + "epoch": 0.01, + "grad_norm": 0.55859375, + "learning_rate": 2.495697074010327e-05, + "loss": 1.2359, + "step": 870 + }, + { + "epoch": 0.01, + "grad_norm": 0.58984375, + "learning_rate": 2.5100401606425704e-05, + "loss": 1.031, + "step": 875 + }, + { + "epoch": 0.01, + "grad_norm": 0.5703125, + "learning_rate": 2.5243832472748137e-05, + "loss": 1.0033, + "step": 880 + }, + { + "epoch": 0.01, + "grad_norm": 0.53515625, + "learning_rate": 2.538726333907057e-05, + "loss": 0.9732, + "step": 885 + }, + { + "epoch": 0.01, + "grad_norm": 0.494140625, + "learning_rate": 2.5530694205393e-05, + "loss": 1.0595, + "step": 890 + }, + { + "epoch": 0.01, + "grad_norm": 0.4453125, + "learning_rate": 2.5674125071715434e-05, + "loss": 0.9085, + "step": 895 + }, + { + "epoch": 0.01, + "grad_norm": 0.5078125, + "learning_rate": 2.5817555938037867e-05, + "loss": 1.0979, + "step": 900 + }, + { + "epoch": 0.01, + "grad_norm": 0.55859375, + "learning_rate": 2.5960986804360297e-05, + "loss": 1.088, + "step": 905 + }, + { + "epoch": 0.01, + "grad_norm": 0.5, + "learning_rate": 2.6104417670682734e-05, + "loss": 1.0669, + "step": 910 + }, + { + "epoch": 0.01, + "grad_norm": 0.5859375, + "learning_rate": 2.6247848537005164e-05, + "loss": 0.9043, + "step": 915 + }, + { + "epoch": 0.01, + "grad_norm": 0.4765625, + "learning_rate": 2.6391279403327594e-05, + "loss": 0.9435, + "step": 920 + }, + { + "epoch": 0.01, + "grad_norm": 0.46875, + "learning_rate": 2.653471026965003e-05, + "loss": 0.9912, + "step": 925 + }, + { + "epoch": 0.01, + "grad_norm": 0.56640625, + "learning_rate": 2.667814113597246e-05, + "loss": 0.9251, + "step": 930 + }, + { + "epoch": 0.01, + "grad_norm": 0.4921875, + "learning_rate": 2.682157200229489e-05, + "loss": 0.9888, + "step": 935 + }, + { + "epoch": 0.01, + "grad_norm": 0.56640625, + "learning_rate": 2.6965002868617328e-05, + "loss": 0.9168, + "step": 940 + }, + { + "epoch": 0.01, + "grad_norm": 0.51953125, + "learning_rate": 2.7108433734939758e-05, + "loss": 0.9026, + "step": 945 + }, + { + "epoch": 0.01, + "grad_norm": 0.578125, + "learning_rate": 2.7251864601262195e-05, + "loss": 1.0875, + "step": 950 + }, + { + "epoch": 0.01, + "grad_norm": 0.55078125, + "learning_rate": 2.7395295467584625e-05, + "loss": 0.8695, + "step": 955 + }, + { + "epoch": 0.01, + "grad_norm": 0.4921875, + "learning_rate": 2.7538726333907055e-05, + "loss": 0.922, + "step": 960 + }, + { + "epoch": 0.01, + "grad_norm": 0.470703125, + "learning_rate": 2.768215720022949e-05, + "loss": 1.0311, + "step": 965 + }, + { + "epoch": 0.01, + "grad_norm": 0.5625, + "learning_rate": 2.782558806655192e-05, + "loss": 1.0643, + "step": 970 + }, + { + "epoch": 0.01, + "grad_norm": 0.5078125, + "learning_rate": 2.7969018932874358e-05, + "loss": 1.128, + "step": 975 + }, + { + "epoch": 0.01, + "grad_norm": 0.56640625, + "learning_rate": 2.8112449799196788e-05, + "loss": 1.0122, + "step": 980 + }, + { + "epoch": 0.01, + "grad_norm": 0.466796875, + "learning_rate": 2.8255880665519218e-05, + "loss": 1.1364, + "step": 985 + }, + { + "epoch": 0.01, + "grad_norm": 0.5078125, + "learning_rate": 2.8399311531841655e-05, + "loss": 1.0668, + "step": 990 + }, + { + "epoch": 0.01, + "grad_norm": 0.6015625, + "learning_rate": 2.8542742398164085e-05, + "loss": 0.9467, + "step": 995 + }, + { + "epoch": 0.01, + "grad_norm": 0.5, + "learning_rate": 2.868617326448652e-05, + "loss": 1.0225, + "step": 1000 + }, + { + "epoch": 0.01, + "grad_norm": 0.54296875, + "learning_rate": 2.882960413080895e-05, + "loss": 0.9799, + "step": 1005 + }, + { + "epoch": 0.01, + "grad_norm": 0.55859375, + "learning_rate": 2.897303499713138e-05, + "loss": 1.1023, + "step": 1010 + }, + { + "epoch": 0.01, + "grad_norm": 0.546875, + "learning_rate": 2.911646586345382e-05, + "loss": 0.9273, + "step": 1015 + }, + { + "epoch": 0.01, + "grad_norm": 0.50390625, + "learning_rate": 2.925989672977625e-05, + "loss": 1.0199, + "step": 1020 + }, + { + "epoch": 0.01, + "grad_norm": 0.48828125, + "learning_rate": 2.9403327596098685e-05, + "loss": 1.0596, + "step": 1025 + }, + { + "epoch": 0.01, + "grad_norm": 0.51953125, + "learning_rate": 2.9546758462421115e-05, + "loss": 0.8337, + "step": 1030 + }, + { + "epoch": 0.01, + "grad_norm": 0.57421875, + "learning_rate": 2.9690189328743545e-05, + "loss": 0.9971, + "step": 1035 + }, + { + "epoch": 0.01, + "grad_norm": 0.52734375, + "learning_rate": 2.9833620195065982e-05, + "loss": 0.9867, + "step": 1040 + }, + { + "epoch": 0.01, + "grad_norm": 0.5, + "learning_rate": 2.9977051061388412e-05, + "loss": 0.8743, + "step": 1045 + }, + { + "epoch": 0.02, + "grad_norm": 0.86328125, + "learning_rate": 3.012048192771085e-05, + "loss": 1.0844, + "step": 1050 + }, + { + "epoch": 0.02, + "grad_norm": 0.490234375, + "learning_rate": 3.026391279403328e-05, + "loss": 0.9911, + "step": 1055 + }, + { + "epoch": 0.02, + "grad_norm": 0.51171875, + "learning_rate": 3.040734366035571e-05, + "loss": 0.965, + "step": 1060 + }, + { + "epoch": 0.02, + "grad_norm": 0.56640625, + "learning_rate": 3.055077452667814e-05, + "loss": 0.9537, + "step": 1065 + }, + { + "epoch": 0.02, + "grad_norm": 0.53125, + "learning_rate": 3.0694205393000576e-05, + "loss": 0.9508, + "step": 1070 + }, + { + "epoch": 0.02, + "grad_norm": 0.55859375, + "learning_rate": 3.0837636259323e-05, + "loss": 1.0471, + "step": 1075 + }, + { + "epoch": 0.02, + "grad_norm": 0.51953125, + "learning_rate": 3.098106712564544e-05, + "loss": 1.1665, + "step": 1080 + }, + { + "epoch": 0.02, + "grad_norm": 0.54296875, + "learning_rate": 3.112449799196787e-05, + "loss": 0.8957, + "step": 1085 + }, + { + "epoch": 0.02, + "grad_norm": 0.53125, + "learning_rate": 3.126792885829031e-05, + "loss": 1.0339, + "step": 1090 + }, + { + "epoch": 0.02, + "grad_norm": 0.5234375, + "learning_rate": 3.1411359724612736e-05, + "loss": 0.9343, + "step": 1095 + }, + { + "epoch": 0.02, + "grad_norm": 0.65625, + "learning_rate": 3.155479059093517e-05, + "loss": 1.0931, + "step": 1100 + }, + { + "epoch": 0.02, + "grad_norm": 0.5546875, + "learning_rate": 3.16982214572576e-05, + "loss": 0.932, + "step": 1105 + }, + { + "epoch": 0.02, + "grad_norm": 0.5, + "learning_rate": 3.1841652323580036e-05, + "loss": 0.97, + "step": 1110 + }, + { + "epoch": 0.02, + "grad_norm": 0.51171875, + "learning_rate": 3.198508318990247e-05, + "loss": 1.0856, + "step": 1115 + }, + { + "epoch": 0.02, + "grad_norm": 0.59765625, + "learning_rate": 3.21285140562249e-05, + "loss": 0.8973, + "step": 1120 + }, + { + "epoch": 0.02, + "grad_norm": 0.53125, + "learning_rate": 3.227194492254733e-05, + "loss": 1.018, + "step": 1125 + }, + { + "epoch": 0.02, + "grad_norm": 0.58203125, + "learning_rate": 3.241537578886977e-05, + "loss": 1.0977, + "step": 1130 + }, + { + "epoch": 0.02, + "grad_norm": 0.53515625, + "learning_rate": 3.2558806655192196e-05, + "loss": 0.8744, + "step": 1135 + }, + { + "epoch": 0.02, + "grad_norm": 0.478515625, + "learning_rate": 3.2702237521514636e-05, + "loss": 0.8979, + "step": 1140 + }, + { + "epoch": 0.02, + "grad_norm": 0.578125, + "learning_rate": 3.284566838783706e-05, + "loss": 0.9762, + "step": 1145 + }, + { + "epoch": 0.02, + "grad_norm": 0.5234375, + "learning_rate": 3.2989099254159496e-05, + "loss": 0.9018, + "step": 1150 + }, + { + "epoch": 0.02, + "grad_norm": 0.55859375, + "learning_rate": 3.313253012048193e-05, + "loss": 1.0398, + "step": 1155 + }, + { + "epoch": 0.02, + "grad_norm": 0.51171875, + "learning_rate": 3.327596098680436e-05, + "loss": 1.0457, + "step": 1160 + }, + { + "epoch": 0.02, + "grad_norm": 0.5390625, + "learning_rate": 3.3419391853126796e-05, + "loss": 0.984, + "step": 1165 + }, + { + "epoch": 0.02, + "grad_norm": 0.478515625, + "learning_rate": 3.356282271944923e-05, + "loss": 0.8453, + "step": 1170 + }, + { + "epoch": 0.02, + "grad_norm": 0.4921875, + "learning_rate": 3.3706253585771656e-05, + "loss": 1.1258, + "step": 1175 + }, + { + "epoch": 0.02, + "grad_norm": 0.5078125, + "learning_rate": 3.3849684452094096e-05, + "loss": 0.9574, + "step": 1180 + }, + { + "epoch": 0.02, + "grad_norm": 0.53125, + "learning_rate": 3.399311531841652e-05, + "loss": 0.9937, + "step": 1185 + }, + { + "epoch": 0.02, + "grad_norm": 0.5234375, + "learning_rate": 3.413654618473896e-05, + "loss": 1.0114, + "step": 1190 + }, + { + "epoch": 0.02, + "grad_norm": 0.4609375, + "learning_rate": 3.427997705106139e-05, + "loss": 0.9564, + "step": 1195 + }, + { + "epoch": 0.02, + "grad_norm": 0.515625, + "learning_rate": 3.442340791738382e-05, + "loss": 1.1259, + "step": 1200 + }, + { + "epoch": 0.02, + "grad_norm": 0.466796875, + "learning_rate": 3.456683878370626e-05, + "loss": 0.9919, + "step": 1205 + }, + { + "epoch": 0.02, + "grad_norm": 0.58203125, + "learning_rate": 3.471026965002869e-05, + "loss": 1.0561, + "step": 1210 + }, + { + "epoch": 0.02, + "grad_norm": 0.474609375, + "learning_rate": 3.485370051635112e-05, + "loss": 0.9138, + "step": 1215 + }, + { + "epoch": 0.02, + "grad_norm": 0.51171875, + "learning_rate": 3.499713138267356e-05, + "loss": 0.9386, + "step": 1220 + }, + { + "epoch": 0.02, + "grad_norm": 0.48828125, + "learning_rate": 3.5140562248995983e-05, + "loss": 0.9422, + "step": 1225 + }, + { + "epoch": 0.02, + "grad_norm": 0.482421875, + "learning_rate": 3.528399311531842e-05, + "loss": 0.9768, + "step": 1230 + }, + { + "epoch": 0.02, + "grad_norm": 0.54296875, + "learning_rate": 3.542742398164085e-05, + "loss": 1.1266, + "step": 1235 + }, + { + "epoch": 0.02, + "grad_norm": 0.5234375, + "learning_rate": 3.5570854847963284e-05, + "loss": 1.0733, + "step": 1240 + }, + { + "epoch": 0.02, + "grad_norm": 0.5078125, + "learning_rate": 3.571428571428572e-05, + "loss": 1.001, + "step": 1245 + }, + { + "epoch": 0.02, + "grad_norm": 0.52734375, + "learning_rate": 3.585771658060815e-05, + "loss": 0.8879, + "step": 1250 + }, + { + "epoch": 0.02, + "grad_norm": 0.51953125, + "learning_rate": 3.6001147446930584e-05, + "loss": 1.0183, + "step": 1255 + }, + { + "epoch": 0.02, + "grad_norm": 0.435546875, + "learning_rate": 3.614457831325301e-05, + "loss": 0.9491, + "step": 1260 + }, + { + "epoch": 0.02, + "grad_norm": 0.53125, + "learning_rate": 3.6288009179575444e-05, + "loss": 0.8943, + "step": 1265 + }, + { + "epoch": 0.02, + "grad_norm": 0.50390625, + "learning_rate": 3.643144004589788e-05, + "loss": 1.1007, + "step": 1270 + }, + { + "epoch": 0.02, + "grad_norm": 0.48828125, + "learning_rate": 3.657487091222031e-05, + "loss": 0.9368, + "step": 1275 + }, + { + "epoch": 0.02, + "grad_norm": 0.5078125, + "learning_rate": 3.6718301778542744e-05, + "loss": 0.9788, + "step": 1280 + }, + { + "epoch": 0.02, + "grad_norm": 0.404296875, + "learning_rate": 3.686173264486518e-05, + "loss": 0.9342, + "step": 1285 + }, + { + "epoch": 0.02, + "grad_norm": 0.5, + "learning_rate": 3.7005163511187604e-05, + "loss": 1.0328, + "step": 1290 + }, + { + "epoch": 0.02, + "grad_norm": 0.380859375, + "learning_rate": 3.7148594377510044e-05, + "loss": 0.8514, + "step": 1295 + }, + { + "epoch": 0.02, + "grad_norm": 0.54296875, + "learning_rate": 3.729202524383247e-05, + "loss": 1.0245, + "step": 1300 + }, + { + "epoch": 0.02, + "grad_norm": 0.51171875, + "learning_rate": 3.743545611015491e-05, + "loss": 0.9722, + "step": 1305 + }, + { + "epoch": 0.02, + "grad_norm": 0.5546875, + "learning_rate": 3.757888697647734e-05, + "loss": 1.0261, + "step": 1310 + }, + { + "epoch": 0.02, + "grad_norm": 0.5703125, + "learning_rate": 3.772231784279977e-05, + "loss": 0.9276, + "step": 1315 + }, + { + "epoch": 0.02, + "grad_norm": 0.5, + "learning_rate": 3.7865748709122204e-05, + "loss": 1.0778, + "step": 1320 + }, + { + "epoch": 0.02, + "grad_norm": 0.482421875, + "learning_rate": 3.800917957544464e-05, + "loss": 1.0856, + "step": 1325 + }, + { + "epoch": 0.02, + "grad_norm": 0.4921875, + "learning_rate": 3.815261044176707e-05, + "loss": 0.928, + "step": 1330 + }, + { + "epoch": 0.02, + "grad_norm": 0.53125, + "learning_rate": 3.8296041308089504e-05, + "loss": 0.9267, + "step": 1335 + }, + { + "epoch": 0.02, + "grad_norm": 0.453125, + "learning_rate": 3.843947217441193e-05, + "loss": 0.9876, + "step": 1340 + }, + { + "epoch": 0.02, + "grad_norm": 0.6015625, + "learning_rate": 3.858290304073437e-05, + "loss": 1.0872, + "step": 1345 + }, + { + "epoch": 0.02, + "grad_norm": 0.4921875, + "learning_rate": 3.87263339070568e-05, + "loss": 0.8564, + "step": 1350 + }, + { + "epoch": 0.02, + "grad_norm": 0.51953125, + "learning_rate": 3.886976477337924e-05, + "loss": 1.1186, + "step": 1355 + }, + { + "epoch": 0.02, + "grad_norm": 0.498046875, + "learning_rate": 3.9013195639701665e-05, + "loss": 0.9905, + "step": 1360 + }, + { + "epoch": 0.02, + "grad_norm": 0.6015625, + "learning_rate": 3.91566265060241e-05, + "loss": 1.0624, + "step": 1365 + }, + { + "epoch": 0.02, + "grad_norm": 0.5, + "learning_rate": 3.930005737234653e-05, + "loss": 0.9626, + "step": 1370 + }, + { + "epoch": 0.02, + "grad_norm": 0.5234375, + "learning_rate": 3.9443488238668965e-05, + "loss": 0.9534, + "step": 1375 + }, + { + "epoch": 0.02, + "grad_norm": 0.515625, + "learning_rate": 3.958691910499139e-05, + "loss": 1.0367, + "step": 1380 + }, + { + "epoch": 0.02, + "grad_norm": 0.51953125, + "learning_rate": 3.973034997131383e-05, + "loss": 0.9886, + "step": 1385 + }, + { + "epoch": 0.02, + "grad_norm": 0.4765625, + "learning_rate": 3.987378083763626e-05, + "loss": 1.1053, + "step": 1390 + }, + { + "epoch": 0.02, + "grad_norm": 0.5, + "learning_rate": 4.00172117039587e-05, + "loss": 0.9696, + "step": 1395 + }, + { + "epoch": 0.02, + "grad_norm": 0.62890625, + "learning_rate": 4.0160642570281125e-05, + "loss": 1.128, + "step": 1400 + }, + { + "epoch": 0.02, + "grad_norm": 0.486328125, + "learning_rate": 4.030407343660356e-05, + "loss": 0.9621, + "step": 1405 + }, + { + "epoch": 0.02, + "grad_norm": 0.49609375, + "learning_rate": 4.044750430292599e-05, + "loss": 0.9351, + "step": 1410 + }, + { + "epoch": 0.02, + "grad_norm": 0.4140625, + "learning_rate": 4.0590935169248425e-05, + "loss": 0.9294, + "step": 1415 + }, + { + "epoch": 0.02, + "grad_norm": 0.51171875, + "learning_rate": 4.073436603557086e-05, + "loss": 1.0204, + "step": 1420 + }, + { + "epoch": 0.02, + "grad_norm": 0.5703125, + "learning_rate": 4.087779690189329e-05, + "loss": 1.0157, + "step": 1425 + }, + { + "epoch": 0.02, + "grad_norm": 0.5, + "learning_rate": 4.102122776821572e-05, + "loss": 0.9373, + "step": 1430 + }, + { + "epoch": 0.02, + "grad_norm": 0.546875, + "learning_rate": 4.116465863453816e-05, + "loss": 1.138, + "step": 1435 + }, + { + "epoch": 0.02, + "grad_norm": 0.5, + "learning_rate": 4.1308089500860585e-05, + "loss": 0.8953, + "step": 1440 + }, + { + "epoch": 0.02, + "grad_norm": 0.484375, + "learning_rate": 4.1451520367183025e-05, + "loss": 1.0174, + "step": 1445 + }, + { + "epoch": 0.02, + "grad_norm": 0.55078125, + "learning_rate": 4.159495123350545e-05, + "loss": 0.9657, + "step": 1450 + }, + { + "epoch": 0.02, + "grad_norm": 0.45703125, + "learning_rate": 4.1738382099827885e-05, + "loss": 1.029, + "step": 1455 + }, + { + "epoch": 0.02, + "grad_norm": 0.515625, + "learning_rate": 4.188181296615032e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.02, + "grad_norm": 0.5390625, + "learning_rate": 4.202524383247275e-05, + "loss": 0.9021, + "step": 1465 + }, + { + "epoch": 0.02, + "grad_norm": 0.515625, + "learning_rate": 4.2168674698795186e-05, + "loss": 0.8547, + "step": 1470 + }, + { + "epoch": 0.02, + "grad_norm": 0.490234375, + "learning_rate": 4.231210556511762e-05, + "loss": 0.9427, + "step": 1475 + }, + { + "epoch": 0.02, + "grad_norm": 0.4765625, + "learning_rate": 4.2455536431440046e-05, + "loss": 0.9815, + "step": 1480 + }, + { + "epoch": 0.02, + "grad_norm": 0.49609375, + "learning_rate": 4.259896729776248e-05, + "loss": 0.9458, + "step": 1485 + }, + { + "epoch": 0.02, + "grad_norm": 0.48046875, + "learning_rate": 4.274239816408491e-05, + "loss": 1.0863, + "step": 1490 + }, + { + "epoch": 0.02, + "grad_norm": 0.58203125, + "learning_rate": 4.2885829030407346e-05, + "loss": 0.9778, + "step": 1495 + }, + { + "epoch": 0.02, + "grad_norm": 0.51953125, + "learning_rate": 4.302925989672978e-05, + "loss": 1.0188, + "step": 1500 + }, + { + "epoch": 0.02, + "grad_norm": 0.486328125, + "learning_rate": 4.317269076305221e-05, + "loss": 0.8671, + "step": 1505 + }, + { + "epoch": 0.02, + "grad_norm": 0.46484375, + "learning_rate": 4.3316121629374646e-05, + "loss": 0.9179, + "step": 1510 + }, + { + "epoch": 0.02, + "grad_norm": 0.47265625, + "learning_rate": 4.345955249569707e-05, + "loss": 1.0784, + "step": 1515 + }, + { + "epoch": 0.02, + "grad_norm": 0.435546875, + "learning_rate": 4.3602983362019506e-05, + "loss": 0.9223, + "step": 1520 + }, + { + "epoch": 0.02, + "grad_norm": 0.515625, + "learning_rate": 4.374641422834194e-05, + "loss": 1.0345, + "step": 1525 + }, + { + "epoch": 0.02, + "grad_norm": 0.482421875, + "learning_rate": 4.388984509466437e-05, + "loss": 1.042, + "step": 1530 + }, + { + "epoch": 0.02, + "grad_norm": 0.51953125, + "learning_rate": 4.4033275960986806e-05, + "loss": 0.9997, + "step": 1535 + }, + { + "epoch": 0.02, + "grad_norm": 0.52734375, + "learning_rate": 4.417670682730924e-05, + "loss": 1.0368, + "step": 1540 + }, + { + "epoch": 0.02, + "grad_norm": 0.51953125, + "learning_rate": 4.4320137693631666e-05, + "loss": 0.9121, + "step": 1545 + }, + { + "epoch": 0.02, + "grad_norm": 0.484375, + "learning_rate": 4.4463568559954106e-05, + "loss": 0.9296, + "step": 1550 + }, + { + "epoch": 0.02, + "grad_norm": 0.4296875, + "learning_rate": 4.460699942627653e-05, + "loss": 0.9765, + "step": 1555 + }, + { + "epoch": 0.02, + "grad_norm": 0.48046875, + "learning_rate": 4.475043029259897e-05, + "loss": 1.0278, + "step": 1560 + }, + { + "epoch": 0.02, + "grad_norm": 0.53125, + "learning_rate": 4.48938611589214e-05, + "loss": 1.0368, + "step": 1565 + }, + { + "epoch": 0.02, + "grad_norm": 0.4921875, + "learning_rate": 4.503729202524383e-05, + "loss": 0.9063, + "step": 1570 + }, + { + "epoch": 0.02, + "grad_norm": 0.53515625, + "learning_rate": 4.5180722891566266e-05, + "loss": 0.9922, + "step": 1575 + }, + { + "epoch": 0.02, + "grad_norm": 0.45703125, + "learning_rate": 4.53241537578887e-05, + "loss": 0.9436, + "step": 1580 + }, + { + "epoch": 0.02, + "grad_norm": 0.5078125, + "learning_rate": 4.546758462421113e-05, + "loss": 1.1216, + "step": 1585 + }, + { + "epoch": 0.02, + "grad_norm": 0.50390625, + "learning_rate": 4.5611015490533566e-05, + "loss": 1.095, + "step": 1590 + }, + { + "epoch": 0.02, + "grad_norm": 0.474609375, + "learning_rate": 4.575444635685599e-05, + "loss": 1.0015, + "step": 1595 + }, + { + "epoch": 0.02, + "grad_norm": 0.466796875, + "learning_rate": 4.589787722317843e-05, + "loss": 0.9483, + "step": 1600 + }, + { + "epoch": 0.02, + "grad_norm": 0.490234375, + "learning_rate": 4.604130808950086e-05, + "loss": 0.9021, + "step": 1605 + }, + { + "epoch": 0.02, + "grad_norm": 0.494140625, + "learning_rate": 4.61847389558233e-05, + "loss": 0.9578, + "step": 1610 + }, + { + "epoch": 0.02, + "grad_norm": 0.484375, + "learning_rate": 4.632816982214573e-05, + "loss": 1.0509, + "step": 1615 + }, + { + "epoch": 0.02, + "grad_norm": 0.51171875, + "learning_rate": 4.647160068846816e-05, + "loss": 0.9595, + "step": 1620 + }, + { + "epoch": 0.02, + "grad_norm": 0.5078125, + "learning_rate": 4.6615031554790593e-05, + "loss": 1.0539, + "step": 1625 + }, + { + "epoch": 0.02, + "grad_norm": 0.427734375, + "learning_rate": 4.675846242111303e-05, + "loss": 1.0104, + "step": 1630 + }, + { + "epoch": 0.02, + "grad_norm": 0.54296875, + "learning_rate": 4.690189328743546e-05, + "loss": 1.0165, + "step": 1635 + }, + { + "epoch": 0.02, + "grad_norm": 0.478515625, + "learning_rate": 4.7045324153757894e-05, + "loss": 0.9361, + "step": 1640 + }, + { + "epoch": 0.02, + "grad_norm": 0.458984375, + "learning_rate": 4.718875502008032e-05, + "loss": 0.928, + "step": 1645 + }, + { + "epoch": 0.02, + "grad_norm": 0.470703125, + "learning_rate": 4.733218588640276e-05, + "loss": 0.9363, + "step": 1650 + }, + { + "epoch": 0.02, + "grad_norm": 0.52734375, + "learning_rate": 4.747561675272519e-05, + "loss": 0.9143, + "step": 1655 + }, + { + "epoch": 0.02, + "grad_norm": 0.48828125, + "learning_rate": 4.761904761904762e-05, + "loss": 0.9955, + "step": 1660 + }, + { + "epoch": 0.02, + "grad_norm": 0.4921875, + "learning_rate": 4.7762478485370054e-05, + "loss": 0.9988, + "step": 1665 + }, + { + "epoch": 0.02, + "grad_norm": 0.458984375, + "learning_rate": 4.790590935169249e-05, + "loss": 0.9563, + "step": 1670 + }, + { + "epoch": 0.02, + "grad_norm": 0.5703125, + "learning_rate": 4.804934021801492e-05, + "loss": 1.0039, + "step": 1675 + }, + { + "epoch": 0.02, + "grad_norm": 0.462890625, + "learning_rate": 4.8192771084337354e-05, + "loss": 1.1524, + "step": 1680 + }, + { + "epoch": 0.02, + "grad_norm": 0.50390625, + "learning_rate": 4.833620195065978e-05, + "loss": 1.0741, + "step": 1685 + }, + { + "epoch": 0.02, + "grad_norm": 0.546875, + "learning_rate": 4.847963281698222e-05, + "loss": 0.9556, + "step": 1690 + }, + { + "epoch": 0.02, + "grad_norm": 0.4375, + "learning_rate": 4.862306368330465e-05, + "loss": 0.8395, + "step": 1695 + }, + { + "epoch": 0.02, + "grad_norm": 0.515625, + "learning_rate": 4.876649454962709e-05, + "loss": 1.0093, + "step": 1700 + }, + { + "epoch": 0.02, + "grad_norm": 0.4375, + "learning_rate": 4.8909925415949514e-05, + "loss": 0.9998, + "step": 1705 + }, + { + "epoch": 0.02, + "grad_norm": 0.5234375, + "learning_rate": 4.905335628227195e-05, + "loss": 1.0079, + "step": 1710 + }, + { + "epoch": 0.02, + "grad_norm": 0.50390625, + "learning_rate": 4.919678714859438e-05, + "loss": 0.9187, + "step": 1715 + }, + { + "epoch": 0.02, + "grad_norm": 0.4375, + "learning_rate": 4.9340218014916814e-05, + "loss": 1.0315, + "step": 1720 + }, + { + "epoch": 0.02, + "grad_norm": 0.474609375, + "learning_rate": 4.948364888123925e-05, + "loss": 0.833, + "step": 1725 + }, + { + "epoch": 0.02, + "grad_norm": 0.50390625, + "learning_rate": 4.962707974756168e-05, + "loss": 1.0263, + "step": 1730 + }, + { + "epoch": 0.02, + "grad_norm": 0.51953125, + "learning_rate": 4.977051061388411e-05, + "loss": 0.9684, + "step": 1735 + }, + { + "epoch": 0.02, + "grad_norm": 0.4921875, + "learning_rate": 4.991394148020654e-05, + "loss": 0.969, + "step": 1740 + }, + { + "epoch": 0.03, + "grad_norm": 0.474609375, + "learning_rate": 5.0057372346528974e-05, + "loss": 1.0403, + "step": 1745 + }, + { + "epoch": 0.03, + "grad_norm": 0.462890625, + "learning_rate": 5.020080321285141e-05, + "loss": 0.9688, + "step": 1750 + }, + { + "epoch": 0.03, + "grad_norm": 0.47265625, + "learning_rate": 5.0344234079173834e-05, + "loss": 0.928, + "step": 1755 + }, + { + "epoch": 0.03, + "grad_norm": 0.478515625, + "learning_rate": 5.0487664945496275e-05, + "loss": 0.8397, + "step": 1760 + }, + { + "epoch": 0.03, + "grad_norm": 0.494140625, + "learning_rate": 5.063109581181871e-05, + "loss": 1.1001, + "step": 1765 + }, + { + "epoch": 0.03, + "grad_norm": 0.53515625, + "learning_rate": 5.077452667814114e-05, + "loss": 0.9803, + "step": 1770 + }, + { + "epoch": 0.03, + "grad_norm": 0.4453125, + "learning_rate": 5.091795754446357e-05, + "loss": 1.1502, + "step": 1775 + }, + { + "epoch": 0.03, + "grad_norm": 0.48046875, + "learning_rate": 5.1061388410786e-05, + "loss": 0.9695, + "step": 1780 + }, + { + "epoch": 0.03, + "grad_norm": 0.5078125, + "learning_rate": 5.120481927710844e-05, + "loss": 0.969, + "step": 1785 + }, + { + "epoch": 0.03, + "grad_norm": 0.48828125, + "learning_rate": 5.134825014343087e-05, + "loss": 1.1124, + "step": 1790 + }, + { + "epoch": 0.03, + "grad_norm": 0.515625, + "learning_rate": 5.14916810097533e-05, + "loss": 0.9795, + "step": 1795 + }, + { + "epoch": 0.03, + "grad_norm": 0.53515625, + "learning_rate": 5.1635111876075735e-05, + "loss": 1.0894, + "step": 1800 + }, + { + "epoch": 0.03, + "grad_norm": 0.48828125, + "learning_rate": 5.177854274239816e-05, + "loss": 0.9983, + "step": 1805 + }, + { + "epoch": 0.03, + "grad_norm": 0.462890625, + "learning_rate": 5.1921973608720595e-05, + "loss": 0.9976, + "step": 1810 + }, + { + "epoch": 0.03, + "grad_norm": 0.474609375, + "learning_rate": 5.2065404475043035e-05, + "loss": 1.0442, + "step": 1815 + }, + { + "epoch": 0.03, + "grad_norm": 0.5, + "learning_rate": 5.220883534136547e-05, + "loss": 1.1065, + "step": 1820 + }, + { + "epoch": 0.03, + "grad_norm": 0.458984375, + "learning_rate": 5.2352266207687895e-05, + "loss": 0.9084, + "step": 1825 + }, + { + "epoch": 0.03, + "grad_norm": 0.45703125, + "learning_rate": 5.249569707401033e-05, + "loss": 1.0342, + "step": 1830 + }, + { + "epoch": 0.03, + "grad_norm": 0.58203125, + "learning_rate": 5.263912794033277e-05, + "loss": 1.0483, + "step": 1835 + }, + { + "epoch": 0.03, + "grad_norm": 0.439453125, + "learning_rate": 5.278255880665519e-05, + "loss": 0.9213, + "step": 1840 + }, + { + "epoch": 0.03, + "grad_norm": 0.451171875, + "learning_rate": 5.292598967297763e-05, + "loss": 1.0645, + "step": 1845 + }, + { + "epoch": 0.03, + "grad_norm": 0.6171875, + "learning_rate": 5.306942053930006e-05, + "loss": 1.0508, + "step": 1850 + }, + { + "epoch": 0.03, + "grad_norm": 0.427734375, + "learning_rate": 5.321285140562249e-05, + "loss": 0.8046, + "step": 1855 + }, + { + "epoch": 0.03, + "grad_norm": 0.50390625, + "learning_rate": 5.335628227194492e-05, + "loss": 0.8508, + "step": 1860 + }, + { + "epoch": 0.03, + "grad_norm": 0.4765625, + "learning_rate": 5.349971313826736e-05, + "loss": 1.0346, + "step": 1865 + }, + { + "epoch": 0.03, + "grad_norm": 0.4375, + "learning_rate": 5.364314400458978e-05, + "loss": 0.8905, + "step": 1870 + }, + { + "epoch": 0.03, + "grad_norm": 0.4921875, + "learning_rate": 5.378657487091222e-05, + "loss": 1.133, + "step": 1875 + }, + { + "epoch": 0.03, + "grad_norm": 0.435546875, + "learning_rate": 5.3930005737234656e-05, + "loss": 1.1266, + "step": 1880 + }, + { + "epoch": 0.03, + "grad_norm": 0.4140625, + "learning_rate": 5.4073436603557096e-05, + "loss": 1.0006, + "step": 1885 + }, + { + "epoch": 0.03, + "grad_norm": 0.451171875, + "learning_rate": 5.4216867469879516e-05, + "loss": 0.8921, + "step": 1890 + }, + { + "epoch": 0.03, + "grad_norm": 0.5078125, + "learning_rate": 5.4360298336201956e-05, + "loss": 1.0013, + "step": 1895 + }, + { + "epoch": 0.03, + "grad_norm": 0.49609375, + "learning_rate": 5.450372920252439e-05, + "loss": 0.9136, + "step": 1900 + }, + { + "epoch": 0.03, + "grad_norm": 0.44921875, + "learning_rate": 5.4647160068846816e-05, + "loss": 0.8111, + "step": 1905 + }, + { + "epoch": 0.03, + "grad_norm": 0.4765625, + "learning_rate": 5.479059093516925e-05, + "loss": 0.9691, + "step": 1910 + }, + { + "epoch": 0.03, + "grad_norm": 0.359375, + "learning_rate": 5.493402180149169e-05, + "loss": 0.8102, + "step": 1915 + }, + { + "epoch": 0.03, + "grad_norm": 0.47265625, + "learning_rate": 5.507745266781411e-05, + "loss": 0.9446, + "step": 1920 + }, + { + "epoch": 0.03, + "grad_norm": 0.5234375, + "learning_rate": 5.522088353413655e-05, + "loss": 0.9894, + "step": 1925 + }, + { + "epoch": 0.03, + "grad_norm": 0.48046875, + "learning_rate": 5.536431440045898e-05, + "loss": 0.9952, + "step": 1930 + }, + { + "epoch": 0.03, + "grad_norm": 0.482421875, + "learning_rate": 5.5507745266781416e-05, + "loss": 0.8921, + "step": 1935 + }, + { + "epoch": 0.03, + "grad_norm": 0.50390625, + "learning_rate": 5.565117613310384e-05, + "loss": 1.0289, + "step": 1940 + }, + { + "epoch": 0.03, + "grad_norm": 0.5078125, + "learning_rate": 5.579460699942628e-05, + "loss": 1.0987, + "step": 1945 + }, + { + "epoch": 0.03, + "grad_norm": 0.455078125, + "learning_rate": 5.5938037865748716e-05, + "loss": 0.8982, + "step": 1950 + }, + { + "epoch": 0.03, + "grad_norm": 0.50390625, + "learning_rate": 5.608146873207114e-05, + "loss": 0.9589, + "step": 1955 + }, + { + "epoch": 0.03, + "grad_norm": 0.427734375, + "learning_rate": 5.6224899598393576e-05, + "loss": 1.0076, + "step": 1960 + }, + { + "epoch": 0.03, + "grad_norm": 0.4765625, + "learning_rate": 5.636833046471601e-05, + "loss": 1.0098, + "step": 1965 + }, + { + "epoch": 0.03, + "grad_norm": 0.4140625, + "learning_rate": 5.6511761331038436e-05, + "loss": 0.9846, + "step": 1970 + }, + { + "epoch": 0.03, + "grad_norm": 0.5078125, + "learning_rate": 5.6655192197360876e-05, + "loss": 1.0777, + "step": 1975 + }, + { + "epoch": 0.03, + "grad_norm": 0.421875, + "learning_rate": 5.679862306368331e-05, + "loss": 0.9375, + "step": 1980 + }, + { + "epoch": 0.03, + "grad_norm": 0.48046875, + "learning_rate": 5.694205393000574e-05, + "loss": 1.0631, + "step": 1985 + }, + { + "epoch": 0.03, + "grad_norm": 0.4609375, + "learning_rate": 5.708548479632817e-05, + "loss": 0.9297, + "step": 1990 + }, + { + "epoch": 0.03, + "grad_norm": 0.4140625, + "learning_rate": 5.72289156626506e-05, + "loss": 0.7953, + "step": 1995 + }, + { + "epoch": 0.03, + "grad_norm": 0.46875, + "learning_rate": 5.737234652897304e-05, + "loss": 1.0055, + "step": 2000 + }, + { + "epoch": 0.03, + "grad_norm": 0.435546875, + "learning_rate": 5.751577739529547e-05, + "loss": 1.0526, + "step": 2005 + }, + { + "epoch": 0.03, + "grad_norm": 0.515625, + "learning_rate": 5.76592082616179e-05, + "loss": 0.9813, + "step": 2010 + }, + { + "epoch": 0.03, + "grad_norm": 0.47265625, + "learning_rate": 5.780263912794034e-05, + "loss": 1.0185, + "step": 2015 + }, + { + "epoch": 0.03, + "grad_norm": 0.474609375, + "learning_rate": 5.794606999426276e-05, + "loss": 0.9883, + "step": 2020 + }, + { + "epoch": 0.03, + "grad_norm": 0.412109375, + "learning_rate": 5.80895008605852e-05, + "loss": 0.9408, + "step": 2025 + }, + { + "epoch": 0.03, + "grad_norm": 0.462890625, + "learning_rate": 5.823293172690764e-05, + "loss": 1.0352, + "step": 2030 + }, + { + "epoch": 0.03, + "grad_norm": 0.462890625, + "learning_rate": 5.8376362593230063e-05, + "loss": 0.8128, + "step": 2035 + }, + { + "epoch": 0.03, + "grad_norm": 0.45703125, + "learning_rate": 5.85197934595525e-05, + "loss": 1.0551, + "step": 2040 + }, + { + "epoch": 0.03, + "grad_norm": 0.44921875, + "learning_rate": 5.866322432587493e-05, + "loss": 0.9347, + "step": 2045 + }, + { + "epoch": 0.03, + "grad_norm": 0.462890625, + "learning_rate": 5.880665519219737e-05, + "loss": 0.8477, + "step": 2050 + }, + { + "epoch": 0.03, + "grad_norm": 0.57421875, + "learning_rate": 5.895008605851979e-05, + "loss": 0.9384, + "step": 2055 + }, + { + "epoch": 0.03, + "grad_norm": 0.478515625, + "learning_rate": 5.909351692484223e-05, + "loss": 1.0149, + "step": 2060 + }, + { + "epoch": 0.03, + "grad_norm": 0.439453125, + "learning_rate": 5.9236947791164664e-05, + "loss": 1.0872, + "step": 2065 + }, + { + "epoch": 0.03, + "grad_norm": 0.443359375, + "learning_rate": 5.938037865748709e-05, + "loss": 0.8893, + "step": 2070 + }, + { + "epoch": 0.03, + "grad_norm": 0.40625, + "learning_rate": 5.9523809523809524e-05, + "loss": 0.9899, + "step": 2075 + }, + { + "epoch": 0.03, + "grad_norm": 0.447265625, + "learning_rate": 5.9667240390131964e-05, + "loss": 0.9359, + "step": 2080 + }, + { + "epoch": 0.03, + "grad_norm": 0.4296875, + "learning_rate": 5.9810671256454384e-05, + "loss": 0.9945, + "step": 2085 + }, + { + "epoch": 0.03, + "grad_norm": 0.486328125, + "learning_rate": 5.9954102122776824e-05, + "loss": 0.8843, + "step": 2090 + }, + { + "epoch": 0.03, + "grad_norm": 0.423828125, + "learning_rate": 6.009753298909926e-05, + "loss": 0.9516, + "step": 2095 + }, + { + "epoch": 0.03, + "grad_norm": 0.484375, + "learning_rate": 6.02409638554217e-05, + "loss": 0.9624, + "step": 2100 + }, + { + "epoch": 0.03, + "grad_norm": 0.43359375, + "learning_rate": 6.038439472174412e-05, + "loss": 0.8447, + "step": 2105 + }, + { + "epoch": 0.03, + "grad_norm": 0.53515625, + "learning_rate": 6.052782558806656e-05, + "loss": 0.979, + "step": 2110 + }, + { + "epoch": 0.03, + "grad_norm": 0.46875, + "learning_rate": 6.067125645438899e-05, + "loss": 0.9816, + "step": 2115 + }, + { + "epoch": 0.03, + "grad_norm": 0.453125, + "learning_rate": 6.081468732071142e-05, + "loss": 1.0292, + "step": 2120 + }, + { + "epoch": 0.03, + "grad_norm": 0.408203125, + "learning_rate": 6.095811818703385e-05, + "loss": 1.0244, + "step": 2125 + }, + { + "epoch": 0.03, + "grad_norm": 0.484375, + "learning_rate": 6.110154905335628e-05, + "loss": 1.1965, + "step": 2130 + }, + { + "epoch": 0.03, + "grad_norm": 0.4296875, + "learning_rate": 6.124497991967871e-05, + "loss": 0.9508, + "step": 2135 + }, + { + "epoch": 0.03, + "grad_norm": 0.421875, + "learning_rate": 6.138841078600115e-05, + "loss": 1.2246, + "step": 2140 + }, + { + "epoch": 0.03, + "grad_norm": 0.3984375, + "learning_rate": 6.153184165232359e-05, + "loss": 0.9361, + "step": 2145 + }, + { + "epoch": 0.03, + "grad_norm": 0.46484375, + "learning_rate": 6.1675272518646e-05, + "loss": 1.0158, + "step": 2150 + }, + { + "epoch": 0.03, + "grad_norm": 0.47265625, + "learning_rate": 6.181870338496844e-05, + "loss": 0.885, + "step": 2155 + }, + { + "epoch": 0.03, + "grad_norm": 0.458984375, + "learning_rate": 6.196213425129088e-05, + "loss": 0.8979, + "step": 2160 + }, + { + "epoch": 0.03, + "grad_norm": 0.54296875, + "learning_rate": 6.210556511761332e-05, + "loss": 1.0512, + "step": 2165 + }, + { + "epoch": 0.03, + "grad_norm": 0.404296875, + "learning_rate": 6.224899598393574e-05, + "loss": 0.9623, + "step": 2170 + }, + { + "epoch": 0.03, + "grad_norm": 0.486328125, + "learning_rate": 6.239242685025818e-05, + "loss": 0.8594, + "step": 2175 + }, + { + "epoch": 0.03, + "grad_norm": 0.4453125, + "learning_rate": 6.253585771658062e-05, + "loss": 0.9633, + "step": 2180 + }, + { + "epoch": 0.03, + "grad_norm": 0.470703125, + "learning_rate": 6.267928858290304e-05, + "loss": 0.9947, + "step": 2185 + }, + { + "epoch": 0.03, + "grad_norm": 0.4609375, + "learning_rate": 6.282271944922547e-05, + "loss": 0.8975, + "step": 2190 + }, + { + "epoch": 0.03, + "grad_norm": 0.41796875, + "learning_rate": 6.296615031554791e-05, + "loss": 0.9396, + "step": 2195 + }, + { + "epoch": 0.03, + "grad_norm": 0.490234375, + "learning_rate": 6.310958118187034e-05, + "loss": 1.024, + "step": 2200 + }, + { + "epoch": 0.03, + "grad_norm": 0.39453125, + "learning_rate": 6.325301204819278e-05, + "loss": 1.0322, + "step": 2205 + }, + { + "epoch": 0.03, + "grad_norm": 0.5, + "learning_rate": 6.33964429145152e-05, + "loss": 0.9946, + "step": 2210 + }, + { + "epoch": 0.03, + "grad_norm": 0.421875, + "learning_rate": 6.353987378083765e-05, + "loss": 1.0883, + "step": 2215 + }, + { + "epoch": 0.03, + "grad_norm": 0.455078125, + "learning_rate": 6.368330464716007e-05, + "loss": 1.0424, + "step": 2220 + }, + { + "epoch": 0.03, + "grad_norm": 0.44140625, + "learning_rate": 6.382673551348251e-05, + "loss": 0.8524, + "step": 2225 + }, + { + "epoch": 0.03, + "grad_norm": 0.40234375, + "learning_rate": 6.397016637980494e-05, + "loss": 0.9997, + "step": 2230 + }, + { + "epoch": 0.03, + "grad_norm": 0.5546875, + "learning_rate": 6.411359724612737e-05, + "loss": 0.824, + "step": 2235 + }, + { + "epoch": 0.03, + "grad_norm": 0.435546875, + "learning_rate": 6.42570281124498e-05, + "loss": 1.0312, + "step": 2240 + }, + { + "epoch": 0.03, + "grad_norm": 0.451171875, + "learning_rate": 6.440045897877223e-05, + "loss": 0.92, + "step": 2245 + }, + { + "epoch": 0.03, + "grad_norm": 0.423828125, + "learning_rate": 6.454388984509466e-05, + "loss": 0.879, + "step": 2250 + }, + { + "epoch": 0.03, + "grad_norm": 0.451171875, + "learning_rate": 6.46873207114171e-05, + "loss": 1.0229, + "step": 2255 + }, + { + "epoch": 0.03, + "grad_norm": 0.3828125, + "learning_rate": 6.483075157773954e-05, + "loss": 1.0514, + "step": 2260 + }, + { + "epoch": 0.03, + "grad_norm": 0.408203125, + "learning_rate": 6.497418244406197e-05, + "loss": 0.9588, + "step": 2265 + }, + { + "epoch": 0.03, + "grad_norm": 0.470703125, + "learning_rate": 6.511761331038439e-05, + "loss": 1.0641, + "step": 2270 + }, + { + "epoch": 0.03, + "grad_norm": 0.486328125, + "learning_rate": 6.526104417670683e-05, + "loss": 0.9099, + "step": 2275 + }, + { + "epoch": 0.03, + "grad_norm": 0.462890625, + "learning_rate": 6.540447504302927e-05, + "loss": 1.0107, + "step": 2280 + }, + { + "epoch": 0.03, + "grad_norm": 0.48046875, + "learning_rate": 6.55479059093517e-05, + "loss": 1.0007, + "step": 2285 + }, + { + "epoch": 0.03, + "grad_norm": 0.462890625, + "learning_rate": 6.569133677567413e-05, + "loss": 1.0989, + "step": 2290 + }, + { + "epoch": 0.03, + "grad_norm": 0.45703125, + "learning_rate": 6.583476764199657e-05, + "loss": 1.0797, + "step": 2295 + }, + { + "epoch": 0.03, + "grad_norm": 0.466796875, + "learning_rate": 6.597819850831899e-05, + "loss": 0.8466, + "step": 2300 + }, + { + "epoch": 0.03, + "grad_norm": 0.447265625, + "learning_rate": 6.612162937464142e-05, + "loss": 1.0248, + "step": 2305 + }, + { + "epoch": 0.03, + "grad_norm": 0.447265625, + "learning_rate": 6.626506024096386e-05, + "loss": 0.9221, + "step": 2310 + }, + { + "epoch": 0.03, + "grad_norm": 0.494140625, + "learning_rate": 6.640849110728629e-05, + "loss": 0.8596, + "step": 2315 + }, + { + "epoch": 0.03, + "grad_norm": 0.4296875, + "learning_rate": 6.655192197360873e-05, + "loss": 1.0407, + "step": 2320 + }, + { + "epoch": 0.03, + "grad_norm": 0.494140625, + "learning_rate": 6.669535283993115e-05, + "loss": 1.082, + "step": 2325 + }, + { + "epoch": 0.03, + "grad_norm": 0.4453125, + "learning_rate": 6.683878370625359e-05, + "loss": 1.0036, + "step": 2330 + }, + { + "epoch": 0.03, + "grad_norm": 0.4296875, + "learning_rate": 6.698221457257602e-05, + "loss": 0.9212, + "step": 2335 + }, + { + "epoch": 0.03, + "grad_norm": 0.44921875, + "learning_rate": 6.712564543889846e-05, + "loss": 1.2177, + "step": 2340 + }, + { + "epoch": 0.03, + "grad_norm": 0.419921875, + "learning_rate": 6.726907630522089e-05, + "loss": 0.9034, + "step": 2345 + }, + { + "epoch": 0.03, + "grad_norm": 0.40625, + "learning_rate": 6.741250717154331e-05, + "loss": 0.8041, + "step": 2350 + }, + { + "epoch": 0.03, + "grad_norm": 0.46484375, + "learning_rate": 6.755593803786575e-05, + "loss": 1.0537, + "step": 2355 + }, + { + "epoch": 0.03, + "grad_norm": 0.4375, + "learning_rate": 6.769936890418819e-05, + "loss": 1.0058, + "step": 2360 + }, + { + "epoch": 0.03, + "grad_norm": 0.671875, + "learning_rate": 6.78427997705106e-05, + "loss": 0.8552, + "step": 2365 + }, + { + "epoch": 0.03, + "grad_norm": 0.443359375, + "learning_rate": 6.798623063683305e-05, + "loss": 0.9354, + "step": 2370 + }, + { + "epoch": 0.03, + "grad_norm": 0.466796875, + "learning_rate": 6.812966150315549e-05, + "loss": 0.9095, + "step": 2375 + }, + { + "epoch": 0.03, + "grad_norm": 0.41015625, + "learning_rate": 6.827309236947793e-05, + "loss": 0.9488, + "step": 2380 + }, + { + "epoch": 0.03, + "grad_norm": 0.515625, + "learning_rate": 6.841652323580034e-05, + "loss": 0.8641, + "step": 2385 + }, + { + "epoch": 0.03, + "grad_norm": 0.5078125, + "learning_rate": 6.855995410212278e-05, + "loss": 1.0436, + "step": 2390 + }, + { + "epoch": 0.03, + "grad_norm": 0.474609375, + "learning_rate": 6.870338496844522e-05, + "loss": 0.9638, + "step": 2395 + }, + { + "epoch": 0.03, + "grad_norm": 0.43359375, + "learning_rate": 6.884681583476765e-05, + "loss": 0.9359, + "step": 2400 + }, + { + "epoch": 0.03, + "grad_norm": 0.43359375, + "learning_rate": 6.899024670109007e-05, + "loss": 1.011, + "step": 2405 + }, + { + "epoch": 0.03, + "grad_norm": 0.470703125, + "learning_rate": 6.913367756741251e-05, + "loss": 1.0113, + "step": 2410 + }, + { + "epoch": 0.03, + "grad_norm": 0.4296875, + "learning_rate": 6.927710843373494e-05, + "loss": 1.056, + "step": 2415 + }, + { + "epoch": 0.03, + "grad_norm": 0.474609375, + "learning_rate": 6.942053930005738e-05, + "loss": 0.9155, + "step": 2420 + }, + { + "epoch": 0.03, + "grad_norm": 0.51171875, + "learning_rate": 6.95639701663798e-05, + "loss": 0.9205, + "step": 2425 + }, + { + "epoch": 0.03, + "grad_norm": 0.423828125, + "learning_rate": 6.970740103270223e-05, + "loss": 0.9412, + "step": 2430 + }, + { + "epoch": 0.03, + "grad_norm": 0.455078125, + "learning_rate": 6.985083189902467e-05, + "loss": 1.0245, + "step": 2435 + }, + { + "epoch": 0.04, + "grad_norm": 0.421875, + "learning_rate": 6.999426276534711e-05, + "loss": 0.9259, + "step": 2440 + }, + { + "epoch": 0.04, + "grad_norm": 0.392578125, + "learning_rate": 7.013769363166954e-05, + "loss": 0.8526, + "step": 2445 + }, + { + "epoch": 0.04, + "grad_norm": 0.447265625, + "learning_rate": 7.028112449799197e-05, + "loss": 0.8853, + "step": 2450 + }, + { + "epoch": 0.04, + "grad_norm": 0.435546875, + "learning_rate": 7.042455536431441e-05, + "loss": 0.9233, + "step": 2455 + }, + { + "epoch": 0.04, + "grad_norm": 0.421875, + "learning_rate": 7.056798623063683e-05, + "loss": 0.8997, + "step": 2460 + }, + { + "epoch": 0.04, + "grad_norm": 0.486328125, + "learning_rate": 7.071141709695926e-05, + "loss": 1.0118, + "step": 2465 + }, + { + "epoch": 0.04, + "grad_norm": 0.40234375, + "learning_rate": 7.08548479632817e-05, + "loss": 1.0767, + "step": 2470 + }, + { + "epoch": 0.04, + "grad_norm": 0.5234375, + "learning_rate": 7.099827882960414e-05, + "loss": 0.9788, + "step": 2475 + }, + { + "epoch": 0.04, + "grad_norm": 0.46484375, + "learning_rate": 7.114170969592657e-05, + "loss": 0.9593, + "step": 2480 + }, + { + "epoch": 0.04, + "grad_norm": 0.466796875, + "learning_rate": 7.1285140562249e-05, + "loss": 0.9277, + "step": 2485 + }, + { + "epoch": 0.04, + "grad_norm": 0.4453125, + "learning_rate": 7.142857142857143e-05, + "loss": 1.0196, + "step": 2490 + }, + { + "epoch": 0.04, + "grad_norm": 0.486328125, + "learning_rate": 7.157200229489387e-05, + "loss": 0.9658, + "step": 2495 + }, + { + "epoch": 0.04, + "grad_norm": 0.44921875, + "learning_rate": 7.17154331612163e-05, + "loss": 0.986, + "step": 2500 + }, + { + "epoch": 0.04, + "grad_norm": 0.412109375, + "learning_rate": 7.185886402753873e-05, + "loss": 1.0481, + "step": 2505 + }, + { + "epoch": 0.04, + "grad_norm": 0.470703125, + "learning_rate": 7.200229489386117e-05, + "loss": 1.1599, + "step": 2510 + }, + { + "epoch": 0.04, + "grad_norm": 0.408203125, + "learning_rate": 7.21457257601836e-05, + "loss": 0.8659, + "step": 2515 + }, + { + "epoch": 0.04, + "grad_norm": 0.4765625, + "learning_rate": 7.228915662650602e-05, + "loss": 1.0566, + "step": 2520 + }, + { + "epoch": 0.04, + "grad_norm": 0.41796875, + "learning_rate": 7.243258749282846e-05, + "loss": 0.8413, + "step": 2525 + }, + { + "epoch": 0.04, + "grad_norm": 0.4296875, + "learning_rate": 7.257601835915089e-05, + "loss": 0.9329, + "step": 2530 + }, + { + "epoch": 0.04, + "grad_norm": 0.484375, + "learning_rate": 7.271944922547333e-05, + "loss": 0.9888, + "step": 2535 + }, + { + "epoch": 0.04, + "grad_norm": 0.408203125, + "learning_rate": 7.286288009179575e-05, + "loss": 0.8587, + "step": 2540 + }, + { + "epoch": 0.04, + "grad_norm": 0.453125, + "learning_rate": 7.30063109581182e-05, + "loss": 0.962, + "step": 2545 + }, + { + "epoch": 0.04, + "grad_norm": 0.447265625, + "learning_rate": 7.314974182444062e-05, + "loss": 0.9628, + "step": 2550 + }, + { + "epoch": 0.04, + "grad_norm": 0.392578125, + "learning_rate": 7.329317269076306e-05, + "loss": 1.0271, + "step": 2555 + }, + { + "epoch": 0.04, + "grad_norm": 0.40234375, + "learning_rate": 7.343660355708549e-05, + "loss": 0.8804, + "step": 2560 + }, + { + "epoch": 0.04, + "grad_norm": 0.380859375, + "learning_rate": 7.358003442340791e-05, + "loss": 0.8014, + "step": 2565 + }, + { + "epoch": 0.04, + "grad_norm": 0.416015625, + "learning_rate": 7.372346528973035e-05, + "loss": 0.9217, + "step": 2570 + }, + { + "epoch": 0.04, + "grad_norm": 0.451171875, + "learning_rate": 7.38668961560528e-05, + "loss": 0.7977, + "step": 2575 + }, + { + "epoch": 0.04, + "grad_norm": 0.484375, + "learning_rate": 7.401032702237521e-05, + "loss": 1.0367, + "step": 2580 + }, + { + "epoch": 0.04, + "grad_norm": 0.4921875, + "learning_rate": 7.415375788869765e-05, + "loss": 0.9485, + "step": 2585 + }, + { + "epoch": 0.04, + "grad_norm": 0.439453125, + "learning_rate": 7.429718875502009e-05, + "loss": 1.1512, + "step": 2590 + }, + { + "epoch": 0.04, + "grad_norm": 0.41015625, + "learning_rate": 7.444061962134251e-05, + "loss": 0.7874, + "step": 2595 + }, + { + "epoch": 0.04, + "grad_norm": 0.43359375, + "learning_rate": 7.458405048766494e-05, + "loss": 1.0102, + "step": 2600 + }, + { + "epoch": 0.04, + "grad_norm": 0.404296875, + "learning_rate": 7.472748135398738e-05, + "loss": 1.0031, + "step": 2605 + }, + { + "epoch": 0.04, + "grad_norm": 0.447265625, + "learning_rate": 7.487091222030982e-05, + "loss": 0.9819, + "step": 2610 + }, + { + "epoch": 0.04, + "grad_norm": 0.46875, + "learning_rate": 7.501434308663225e-05, + "loss": 1.0839, + "step": 2615 + }, + { + "epoch": 0.04, + "grad_norm": 0.59375, + "learning_rate": 7.515777395295467e-05, + "loss": 1.0095, + "step": 2620 + }, + { + "epoch": 0.04, + "grad_norm": 0.44140625, + "learning_rate": 7.530120481927712e-05, + "loss": 0.87, + "step": 2625 + }, + { + "epoch": 0.04, + "grad_norm": 0.4609375, + "learning_rate": 7.544463568559954e-05, + "loss": 0.9675, + "step": 2630 + }, + { + "epoch": 0.04, + "grad_norm": 0.423828125, + "learning_rate": 7.558806655192198e-05, + "loss": 0.9699, + "step": 2635 + }, + { + "epoch": 0.04, + "grad_norm": 0.439453125, + "learning_rate": 7.573149741824441e-05, + "loss": 1.1126, + "step": 2640 + }, + { + "epoch": 0.04, + "grad_norm": 0.400390625, + "learning_rate": 7.587492828456684e-05, + "loss": 0.8104, + "step": 2645 + }, + { + "epoch": 0.04, + "grad_norm": 0.443359375, + "learning_rate": 7.601835915088928e-05, + "loss": 0.951, + "step": 2650 + }, + { + "epoch": 0.04, + "grad_norm": 0.39453125, + "learning_rate": 7.616179001721172e-05, + "loss": 1.0582, + "step": 2655 + }, + { + "epoch": 0.04, + "grad_norm": 0.4765625, + "learning_rate": 7.630522088353414e-05, + "loss": 1.0965, + "step": 2660 + }, + { + "epoch": 0.04, + "grad_norm": 0.439453125, + "learning_rate": 7.644865174985657e-05, + "loss": 0.9515, + "step": 2665 + }, + { + "epoch": 0.04, + "grad_norm": 0.45703125, + "learning_rate": 7.659208261617901e-05, + "loss": 1.0292, + "step": 2670 + }, + { + "epoch": 0.04, + "grad_norm": 0.443359375, + "learning_rate": 7.673551348250144e-05, + "loss": 1.0915, + "step": 2675 + }, + { + "epoch": 0.04, + "grad_norm": 0.4140625, + "learning_rate": 7.687894434882386e-05, + "loss": 1.055, + "step": 2680 + }, + { + "epoch": 0.04, + "grad_norm": 0.52734375, + "learning_rate": 7.70223752151463e-05, + "loss": 1.0428, + "step": 2685 + }, + { + "epoch": 0.04, + "grad_norm": 0.4375, + "learning_rate": 7.716580608146874e-05, + "loss": 0.9814, + "step": 2690 + }, + { + "epoch": 0.04, + "grad_norm": 0.470703125, + "learning_rate": 7.730923694779117e-05, + "loss": 0.8716, + "step": 2695 + }, + { + "epoch": 0.04, + "grad_norm": 0.4375, + "learning_rate": 7.74526678141136e-05, + "loss": 0.8861, + "step": 2700 + }, + { + "epoch": 0.04, + "grad_norm": 0.466796875, + "learning_rate": 7.759609868043604e-05, + "loss": 1.0297, + "step": 2705 + }, + { + "epoch": 0.04, + "grad_norm": 0.435546875, + "learning_rate": 7.773952954675848e-05, + "loss": 0.9579, + "step": 2710 + }, + { + "epoch": 0.04, + "grad_norm": 0.482421875, + "learning_rate": 7.78829604130809e-05, + "loss": 0.9681, + "step": 2715 + }, + { + "epoch": 0.04, + "grad_norm": 0.400390625, + "learning_rate": 7.802639127940333e-05, + "loss": 0.829, + "step": 2720 + }, + { + "epoch": 0.04, + "grad_norm": 0.388671875, + "learning_rate": 7.816982214572577e-05, + "loss": 0.9794, + "step": 2725 + }, + { + "epoch": 0.04, + "grad_norm": 0.46484375, + "learning_rate": 7.83132530120482e-05, + "loss": 1.0554, + "step": 2730 + }, + { + "epoch": 0.04, + "grad_norm": 0.41796875, + "learning_rate": 7.845668387837064e-05, + "loss": 0.8973, + "step": 2735 + }, + { + "epoch": 0.04, + "grad_norm": 0.42578125, + "learning_rate": 7.860011474469306e-05, + "loss": 1.0593, + "step": 2740 + }, + { + "epoch": 0.04, + "grad_norm": 0.4453125, + "learning_rate": 7.874354561101549e-05, + "loss": 1.0436, + "step": 2745 + }, + { + "epoch": 0.04, + "grad_norm": 0.462890625, + "learning_rate": 7.888697647733793e-05, + "loss": 1.091, + "step": 2750 + }, + { + "epoch": 0.04, + "grad_norm": 0.443359375, + "learning_rate": 7.903040734366036e-05, + "loss": 1.1119, + "step": 2755 + }, + { + "epoch": 0.04, + "grad_norm": 0.43359375, + "learning_rate": 7.917383820998278e-05, + "loss": 1.0251, + "step": 2760 + }, + { + "epoch": 0.04, + "grad_norm": 0.4453125, + "learning_rate": 7.931726907630522e-05, + "loss": 0.888, + "step": 2765 + }, + { + "epoch": 0.04, + "grad_norm": 0.458984375, + "learning_rate": 7.946069994262766e-05, + "loss": 0.9257, + "step": 2770 + }, + { + "epoch": 0.04, + "grad_norm": 0.48046875, + "learning_rate": 7.960413080895009e-05, + "loss": 0.997, + "step": 2775 + }, + { + "epoch": 0.04, + "grad_norm": 0.376953125, + "learning_rate": 7.974756167527252e-05, + "loss": 0.9598, + "step": 2780 + }, + { + "epoch": 0.04, + "grad_norm": 0.443359375, + "learning_rate": 7.989099254159496e-05, + "loss": 0.8065, + "step": 2785 + }, + { + "epoch": 0.04, + "grad_norm": 0.43359375, + "learning_rate": 8.00344234079174e-05, + "loss": 0.9678, + "step": 2790 + }, + { + "epoch": 0.04, + "grad_norm": 0.38671875, + "learning_rate": 8.017785427423982e-05, + "loss": 0.9295, + "step": 2795 + }, + { + "epoch": 0.04, + "grad_norm": 0.45703125, + "learning_rate": 8.032128514056225e-05, + "loss": 0.8496, + "step": 2800 + }, + { + "epoch": 0.04, + "grad_norm": 0.453125, + "learning_rate": 8.046471600688469e-05, + "loss": 0.9453, + "step": 2805 + }, + { + "epoch": 0.04, + "grad_norm": 0.5625, + "learning_rate": 8.060814687320712e-05, + "loss": 0.9759, + "step": 2810 + }, + { + "epoch": 0.04, + "grad_norm": 0.44140625, + "learning_rate": 8.075157773952954e-05, + "loss": 0.8887, + "step": 2815 + }, + { + "epoch": 0.04, + "grad_norm": 0.408203125, + "learning_rate": 8.089500860585198e-05, + "loss": 0.9728, + "step": 2820 + }, + { + "epoch": 0.04, + "grad_norm": 0.40625, + "learning_rate": 8.103843947217442e-05, + "loss": 0.8913, + "step": 2825 + }, + { + "epoch": 0.04, + "grad_norm": 0.365234375, + "learning_rate": 8.118187033849685e-05, + "loss": 0.9973, + "step": 2830 + }, + { + "epoch": 0.04, + "grad_norm": 0.412109375, + "learning_rate": 8.132530120481928e-05, + "loss": 1.0904, + "step": 2835 + }, + { + "epoch": 0.04, + "grad_norm": 0.43359375, + "learning_rate": 8.146873207114172e-05, + "loss": 0.8934, + "step": 2840 + }, + { + "epoch": 0.04, + "grad_norm": 0.458984375, + "learning_rate": 8.161216293746414e-05, + "loss": 1.0938, + "step": 2845 + }, + { + "epoch": 0.04, + "grad_norm": 0.41796875, + "learning_rate": 8.175559380378658e-05, + "loss": 0.9017, + "step": 2850 + }, + { + "epoch": 0.04, + "grad_norm": 0.470703125, + "learning_rate": 8.189902467010901e-05, + "loss": 1.0773, + "step": 2855 + }, + { + "epoch": 0.04, + "grad_norm": 0.39453125, + "learning_rate": 8.204245553643144e-05, + "loss": 0.9509, + "step": 2860 + }, + { + "epoch": 0.04, + "grad_norm": 0.458984375, + "learning_rate": 8.218588640275388e-05, + "loss": 0.8704, + "step": 2865 + }, + { + "epoch": 0.04, + "grad_norm": 0.416015625, + "learning_rate": 8.232931726907632e-05, + "loss": 0.9692, + "step": 2870 + }, + { + "epoch": 0.04, + "grad_norm": 0.5390625, + "learning_rate": 8.247274813539873e-05, + "loss": 1.0793, + "step": 2875 + }, + { + "epoch": 0.04, + "grad_norm": 0.46875, + "learning_rate": 8.261617900172117e-05, + "loss": 0.8818, + "step": 2880 + }, + { + "epoch": 0.04, + "grad_norm": 0.392578125, + "learning_rate": 8.275960986804361e-05, + "loss": 1.0164, + "step": 2885 + }, + { + "epoch": 0.04, + "grad_norm": 0.439453125, + "learning_rate": 8.290304073436605e-05, + "loss": 0.9334, + "step": 2890 + }, + { + "epoch": 0.04, + "grad_norm": 0.380859375, + "learning_rate": 8.304647160068846e-05, + "loss": 1.1717, + "step": 2895 + }, + { + "epoch": 0.04, + "grad_norm": 0.451171875, + "learning_rate": 8.31899024670109e-05, + "loss": 1.071, + "step": 2900 + }, + { + "epoch": 0.04, + "grad_norm": 0.482421875, + "learning_rate": 8.333333333333334e-05, + "loss": 0.92, + "step": 2905 + }, + { + "epoch": 0.04, + "grad_norm": 0.458984375, + "learning_rate": 8.347676419965577e-05, + "loss": 1.0631, + "step": 2910 + }, + { + "epoch": 0.04, + "grad_norm": 0.412109375, + "learning_rate": 8.36201950659782e-05, + "loss": 0.9019, + "step": 2915 + }, + { + "epoch": 0.04, + "grad_norm": 0.42578125, + "learning_rate": 8.376362593230064e-05, + "loss": 0.8708, + "step": 2920 + }, + { + "epoch": 0.04, + "grad_norm": 0.42578125, + "learning_rate": 8.390705679862306e-05, + "loss": 0.9173, + "step": 2925 + }, + { + "epoch": 0.04, + "grad_norm": 0.49609375, + "learning_rate": 8.40504876649455e-05, + "loss": 1.0932, + "step": 2930 + }, + { + "epoch": 0.04, + "grad_norm": 0.46875, + "learning_rate": 8.419391853126793e-05, + "loss": 0.9486, + "step": 2935 + }, + { + "epoch": 0.04, + "grad_norm": 0.4140625, + "learning_rate": 8.433734939759037e-05, + "loss": 1.0511, + "step": 2940 + }, + { + "epoch": 0.04, + "grad_norm": 0.451171875, + "learning_rate": 8.44807802639128e-05, + "loss": 0.9129, + "step": 2945 + }, + { + "epoch": 0.04, + "grad_norm": 0.3828125, + "learning_rate": 8.462421113023524e-05, + "loss": 1.0939, + "step": 2950 + }, + { + "epoch": 0.04, + "grad_norm": 0.431640625, + "learning_rate": 8.476764199655766e-05, + "loss": 0.8591, + "step": 2955 + }, + { + "epoch": 0.04, + "grad_norm": 0.384765625, + "learning_rate": 8.491107286288009e-05, + "loss": 1.0119, + "step": 2960 + }, + { + "epoch": 0.04, + "grad_norm": 0.451171875, + "learning_rate": 8.505450372920253e-05, + "loss": 1.0331, + "step": 2965 + }, + { + "epoch": 0.04, + "grad_norm": 0.462890625, + "learning_rate": 8.519793459552496e-05, + "loss": 1.0119, + "step": 2970 + }, + { + "epoch": 0.04, + "grad_norm": 0.43359375, + "learning_rate": 8.534136546184738e-05, + "loss": 0.9088, + "step": 2975 + }, + { + "epoch": 0.04, + "grad_norm": 0.423828125, + "learning_rate": 8.548479632816982e-05, + "loss": 0.9238, + "step": 2980 + }, + { + "epoch": 0.04, + "grad_norm": 0.48828125, + "learning_rate": 8.562822719449226e-05, + "loss": 0.9416, + "step": 2985 + }, + { + "epoch": 0.04, + "grad_norm": 0.41796875, + "learning_rate": 8.577165806081469e-05, + "loss": 1.0492, + "step": 2990 + }, + { + "epoch": 0.04, + "grad_norm": 0.427734375, + "learning_rate": 8.591508892713712e-05, + "loss": 1.008, + "step": 2995 + }, + { + "epoch": 0.04, + "grad_norm": 0.44921875, + "learning_rate": 8.605851979345956e-05, + "loss": 1.0736, + "step": 3000 + }, + { + "epoch": 0.04, + "grad_norm": 0.423828125, + "learning_rate": 8.6201950659782e-05, + "loss": 0.8645, + "step": 3005 + }, + { + "epoch": 0.04, + "grad_norm": 0.404296875, + "learning_rate": 8.634538152610442e-05, + "loss": 1.0537, + "step": 3010 + }, + { + "epoch": 0.04, + "grad_norm": 0.41796875, + "learning_rate": 8.648881239242685e-05, + "loss": 1.0242, + "step": 3015 + }, + { + "epoch": 0.04, + "grad_norm": 0.42578125, + "learning_rate": 8.663224325874929e-05, + "loss": 1.052, + "step": 3020 + }, + { + "epoch": 0.04, + "grad_norm": 0.484375, + "learning_rate": 8.677567412507172e-05, + "loss": 1.1053, + "step": 3025 + }, + { + "epoch": 0.04, + "grad_norm": 0.4609375, + "learning_rate": 8.691910499139414e-05, + "loss": 1.0429, + "step": 3030 + }, + { + "epoch": 0.04, + "grad_norm": 0.439453125, + "learning_rate": 8.706253585771659e-05, + "loss": 0.9297, + "step": 3035 + }, + { + "epoch": 0.04, + "grad_norm": 0.42578125, + "learning_rate": 8.720596672403901e-05, + "loss": 0.9675, + "step": 3040 + }, + { + "epoch": 0.04, + "grad_norm": 0.44921875, + "learning_rate": 8.734939759036145e-05, + "loss": 1.048, + "step": 3045 + }, + { + "epoch": 0.04, + "grad_norm": 0.453125, + "learning_rate": 8.749282845668388e-05, + "loss": 0.8463, + "step": 3050 + }, + { + "epoch": 0.04, + "grad_norm": 0.369140625, + "learning_rate": 8.763625932300632e-05, + "loss": 0.8302, + "step": 3055 + }, + { + "epoch": 0.04, + "grad_norm": 0.484375, + "learning_rate": 8.777969018932875e-05, + "loss": 0.9625, + "step": 3060 + }, + { + "epoch": 0.04, + "grad_norm": 0.431640625, + "learning_rate": 8.792312105565119e-05, + "loss": 1.0129, + "step": 3065 + }, + { + "epoch": 0.04, + "grad_norm": 0.42578125, + "learning_rate": 8.806655192197361e-05, + "loss": 0.8039, + "step": 3070 + }, + { + "epoch": 0.04, + "grad_norm": 0.41015625, + "learning_rate": 8.820998278829604e-05, + "loss": 0.9156, + "step": 3075 + }, + { + "epoch": 0.04, + "grad_norm": 0.421875, + "learning_rate": 8.835341365461848e-05, + "loss": 1.033, + "step": 3080 + }, + { + "epoch": 0.04, + "grad_norm": 0.44140625, + "learning_rate": 8.849684452094092e-05, + "loss": 1.03, + "step": 3085 + }, + { + "epoch": 0.04, + "grad_norm": 0.5390625, + "learning_rate": 8.864027538726333e-05, + "loss": 1.0026, + "step": 3090 + }, + { + "epoch": 0.04, + "grad_norm": 0.458984375, + "learning_rate": 8.878370625358577e-05, + "loss": 1.0639, + "step": 3095 + }, + { + "epoch": 0.04, + "grad_norm": 0.4609375, + "learning_rate": 8.892713711990821e-05, + "loss": 0.9634, + "step": 3100 + }, + { + "epoch": 0.04, + "grad_norm": 0.490234375, + "learning_rate": 8.907056798623065e-05, + "loss": 0.9578, + "step": 3105 + }, + { + "epoch": 0.04, + "grad_norm": 0.48828125, + "learning_rate": 8.921399885255307e-05, + "loss": 0.9326, + "step": 3110 + }, + { + "epoch": 0.04, + "grad_norm": 0.421875, + "learning_rate": 8.93574297188755e-05, + "loss": 0.9253, + "step": 3115 + }, + { + "epoch": 0.04, + "grad_norm": 0.3984375, + "learning_rate": 8.950086058519795e-05, + "loss": 0.918, + "step": 3120 + }, + { + "epoch": 0.04, + "grad_norm": 0.546875, + "learning_rate": 8.964429145152037e-05, + "loss": 1.0224, + "step": 3125 + }, + { + "epoch": 0.04, + "grad_norm": 0.388671875, + "learning_rate": 8.97877223178428e-05, + "loss": 0.898, + "step": 3130 + }, + { + "epoch": 0.04, + "grad_norm": 0.404296875, + "learning_rate": 8.993115318416524e-05, + "loss": 1.1698, + "step": 3135 + }, + { + "epoch": 0.05, + "grad_norm": 0.404296875, + "learning_rate": 9.007458405048767e-05, + "loss": 0.9055, + "step": 3140 + }, + { + "epoch": 0.05, + "grad_norm": 0.4609375, + "learning_rate": 9.02180149168101e-05, + "loss": 1.0444, + "step": 3145 + }, + { + "epoch": 0.05, + "grad_norm": 0.439453125, + "learning_rate": 9.036144578313253e-05, + "loss": 1.0148, + "step": 3150 + }, + { + "epoch": 0.05, + "grad_norm": 0.423828125, + "learning_rate": 9.050487664945496e-05, + "loss": 0.8328, + "step": 3155 + }, + { + "epoch": 0.05, + "grad_norm": 0.39453125, + "learning_rate": 9.06483075157774e-05, + "loss": 0.9466, + "step": 3160 + }, + { + "epoch": 0.05, + "grad_norm": 0.40625, + "learning_rate": 9.079173838209984e-05, + "loss": 0.9638, + "step": 3165 + }, + { + "epoch": 0.05, + "grad_norm": 0.421875, + "learning_rate": 9.093516924842227e-05, + "loss": 0.9799, + "step": 3170 + }, + { + "epoch": 0.05, + "grad_norm": 0.423828125, + "learning_rate": 9.107860011474469e-05, + "loss": 1.0191, + "step": 3175 + }, + { + "epoch": 0.05, + "grad_norm": 0.45703125, + "learning_rate": 9.122203098106713e-05, + "loss": 1.0255, + "step": 3180 + }, + { + "epoch": 0.05, + "grad_norm": 0.404296875, + "learning_rate": 9.136546184738956e-05, + "loss": 0.9925, + "step": 3185 + }, + { + "epoch": 0.05, + "grad_norm": 0.41796875, + "learning_rate": 9.150889271371199e-05, + "loss": 0.8852, + "step": 3190 + }, + { + "epoch": 0.05, + "grad_norm": 0.45703125, + "learning_rate": 9.165232358003443e-05, + "loss": 1.1207, + "step": 3195 + }, + { + "epoch": 0.05, + "grad_norm": 0.41015625, + "learning_rate": 9.179575444635687e-05, + "loss": 1.052, + "step": 3200 + }, + { + "epoch": 0.05, + "grad_norm": 0.390625, + "learning_rate": 9.193918531267929e-05, + "loss": 0.918, + "step": 3205 + }, + { + "epoch": 0.05, + "grad_norm": 0.4375, + "learning_rate": 9.208261617900172e-05, + "loss": 0.9535, + "step": 3210 + }, + { + "epoch": 0.05, + "grad_norm": 0.447265625, + "learning_rate": 9.222604704532416e-05, + "loss": 1.1085, + "step": 3215 + }, + { + "epoch": 0.05, + "grad_norm": 0.388671875, + "learning_rate": 9.23694779116466e-05, + "loss": 1.0227, + "step": 3220 + }, + { + "epoch": 0.05, + "grad_norm": 0.41796875, + "learning_rate": 9.251290877796903e-05, + "loss": 0.9191, + "step": 3225 + }, + { + "epoch": 0.05, + "grad_norm": 0.490234375, + "learning_rate": 9.265633964429145e-05, + "loss": 1.0532, + "step": 3230 + }, + { + "epoch": 0.05, + "grad_norm": 0.416015625, + "learning_rate": 9.27997705106139e-05, + "loss": 0.9121, + "step": 3235 + }, + { + "epoch": 0.05, + "grad_norm": 0.478515625, + "learning_rate": 9.294320137693632e-05, + "loss": 1.0063, + "step": 3240 + }, + { + "epoch": 0.05, + "grad_norm": 0.408203125, + "learning_rate": 9.308663224325875e-05, + "loss": 1.1222, + "step": 3245 + }, + { + "epoch": 0.05, + "grad_norm": 0.390625, + "learning_rate": 9.323006310958119e-05, + "loss": 0.8751, + "step": 3250 + }, + { + "epoch": 0.05, + "grad_norm": 0.3984375, + "learning_rate": 9.337349397590361e-05, + "loss": 0.9919, + "step": 3255 + }, + { + "epoch": 0.05, + "grad_norm": 0.462890625, + "learning_rate": 9.351692484222605e-05, + "loss": 0.965, + "step": 3260 + }, + { + "epoch": 0.05, + "grad_norm": 0.421875, + "learning_rate": 9.366035570854848e-05, + "loss": 0.9367, + "step": 3265 + }, + { + "epoch": 0.05, + "grad_norm": 0.439453125, + "learning_rate": 9.380378657487092e-05, + "loss": 0.9339, + "step": 3270 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 9.394721744119335e-05, + "loss": 1.1373, + "step": 3275 + }, + { + "epoch": 0.05, + "grad_norm": 0.431640625, + "learning_rate": 9.409064830751579e-05, + "loss": 0.9003, + "step": 3280 + }, + { + "epoch": 0.05, + "grad_norm": 0.48046875, + "learning_rate": 9.423407917383821e-05, + "loss": 1.0328, + "step": 3285 + }, + { + "epoch": 0.05, + "grad_norm": 0.455078125, + "learning_rate": 9.437751004016064e-05, + "loss": 0.9807, + "step": 3290 + }, + { + "epoch": 0.05, + "grad_norm": 0.45703125, + "learning_rate": 9.452094090648308e-05, + "loss": 1.1077, + "step": 3295 + }, + { + "epoch": 0.05, + "grad_norm": 0.490234375, + "learning_rate": 9.466437177280552e-05, + "loss": 1.0933, + "step": 3300 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 9.480780263912795e-05, + "loss": 0.9658, + "step": 3305 + }, + { + "epoch": 0.05, + "grad_norm": 0.4296875, + "learning_rate": 9.495123350545037e-05, + "loss": 0.9943, + "step": 3310 + }, + { + "epoch": 0.05, + "grad_norm": 0.408203125, + "learning_rate": 9.509466437177281e-05, + "loss": 1.0197, + "step": 3315 + }, + { + "epoch": 0.05, + "grad_norm": 0.423828125, + "learning_rate": 9.523809523809524e-05, + "loss": 1.0082, + "step": 3320 + }, + { + "epoch": 0.05, + "grad_norm": 0.412109375, + "learning_rate": 9.538152610441767e-05, + "loss": 0.9112, + "step": 3325 + }, + { + "epoch": 0.05, + "grad_norm": 0.46484375, + "learning_rate": 9.552495697074011e-05, + "loss": 0.9961, + "step": 3330 + }, + { + "epoch": 0.05, + "grad_norm": 0.416015625, + "learning_rate": 9.566838783706255e-05, + "loss": 1.0198, + "step": 3335 + }, + { + "epoch": 0.05, + "grad_norm": 0.474609375, + "learning_rate": 9.581181870338497e-05, + "loss": 0.9685, + "step": 3340 + }, + { + "epoch": 0.05, + "grad_norm": 0.474609375, + "learning_rate": 9.59552495697074e-05, + "loss": 1.0192, + "step": 3345 + }, + { + "epoch": 0.05, + "grad_norm": 0.408203125, + "learning_rate": 9.609868043602984e-05, + "loss": 1.1994, + "step": 3350 + }, + { + "epoch": 0.05, + "grad_norm": 0.4375, + "learning_rate": 9.624211130235227e-05, + "loss": 0.954, + "step": 3355 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 9.638554216867471e-05, + "loss": 0.8963, + "step": 3360 + }, + { + "epoch": 0.05, + "grad_norm": 0.42578125, + "learning_rate": 9.652897303499713e-05, + "loss": 0.9157, + "step": 3365 + }, + { + "epoch": 0.05, + "grad_norm": 0.4296875, + "learning_rate": 9.667240390131956e-05, + "loss": 0.8979, + "step": 3370 + }, + { + "epoch": 0.05, + "grad_norm": 0.42578125, + "learning_rate": 9.6815834767642e-05, + "loss": 1.1164, + "step": 3375 + }, + { + "epoch": 0.05, + "grad_norm": 0.447265625, + "learning_rate": 9.695926563396444e-05, + "loss": 1.0539, + "step": 3380 + }, + { + "epoch": 0.05, + "grad_norm": 0.427734375, + "learning_rate": 9.710269650028687e-05, + "loss": 0.9551, + "step": 3385 + }, + { + "epoch": 0.05, + "grad_norm": 0.4140625, + "learning_rate": 9.72461273666093e-05, + "loss": 1.0195, + "step": 3390 + }, + { + "epoch": 0.05, + "grad_norm": 0.421875, + "learning_rate": 9.738955823293173e-05, + "loss": 0.9593, + "step": 3395 + }, + { + "epoch": 0.05, + "grad_norm": 0.431640625, + "learning_rate": 9.753298909925417e-05, + "loss": 0.8717, + "step": 3400 + }, + { + "epoch": 0.05, + "grad_norm": 0.4296875, + "learning_rate": 9.767641996557659e-05, + "loss": 0.8758, + "step": 3405 + }, + { + "epoch": 0.05, + "grad_norm": 0.427734375, + "learning_rate": 9.781985083189903e-05, + "loss": 0.9862, + "step": 3410 + }, + { + "epoch": 0.05, + "grad_norm": 0.388671875, + "learning_rate": 9.796328169822147e-05, + "loss": 0.957, + "step": 3415 + }, + { + "epoch": 0.05, + "grad_norm": 0.42578125, + "learning_rate": 9.81067125645439e-05, + "loss": 0.894, + "step": 3420 + }, + { + "epoch": 0.05, + "grad_norm": 0.412109375, + "learning_rate": 9.825014343086632e-05, + "loss": 0.9824, + "step": 3425 + }, + { + "epoch": 0.05, + "grad_norm": 0.388671875, + "learning_rate": 9.839357429718876e-05, + "loss": 1.0242, + "step": 3430 + }, + { + "epoch": 0.05, + "grad_norm": 0.4375, + "learning_rate": 9.853700516351119e-05, + "loss": 0.8554, + "step": 3435 + }, + { + "epoch": 0.05, + "grad_norm": 0.466796875, + "learning_rate": 9.868043602983363e-05, + "loss": 1.0288, + "step": 3440 + }, + { + "epoch": 0.05, + "grad_norm": 0.5, + "learning_rate": 9.882386689615606e-05, + "loss": 1.0798, + "step": 3445 + }, + { + "epoch": 0.05, + "grad_norm": 0.412109375, + "learning_rate": 9.89672977624785e-05, + "loss": 0.8537, + "step": 3450 + }, + { + "epoch": 0.05, + "grad_norm": 0.443359375, + "learning_rate": 9.911072862880092e-05, + "loss": 0.9823, + "step": 3455 + }, + { + "epoch": 0.05, + "grad_norm": 0.38671875, + "learning_rate": 9.925415949512336e-05, + "loss": 0.9169, + "step": 3460 + }, + { + "epoch": 0.05, + "grad_norm": 0.4296875, + "learning_rate": 9.939759036144579e-05, + "loss": 0.8519, + "step": 3465 + }, + { + "epoch": 0.05, + "grad_norm": 0.396484375, + "learning_rate": 9.954102122776822e-05, + "loss": 0.9385, + "step": 3470 + }, + { + "epoch": 0.05, + "grad_norm": 0.376953125, + "learning_rate": 9.968445209409066e-05, + "loss": 0.936, + "step": 3475 + }, + { + "epoch": 0.05, + "grad_norm": 0.380859375, + "learning_rate": 9.982788296041308e-05, + "loss": 0.982, + "step": 3480 + }, + { + "epoch": 0.05, + "grad_norm": 0.453125, + "learning_rate": 9.997131382673551e-05, + "loss": 0.8798, + "step": 3485 + }, + { + "epoch": 0.05, + "grad_norm": 0.421875, + "learning_rate": 0.00010011474469305795, + "loss": 1.1016, + "step": 3490 + }, + { + "epoch": 0.05, + "grad_norm": 0.439453125, + "learning_rate": 0.00010025817555938038, + "loss": 1.2804, + "step": 3495 + }, + { + "epoch": 0.05, + "grad_norm": 0.41796875, + "learning_rate": 0.00010040160642570282, + "loss": 1.0197, + "step": 3500 + }, + { + "epoch": 0.05, + "grad_norm": 0.419921875, + "learning_rate": 0.00010054503729202524, + "loss": 1.0637, + "step": 3505 + }, + { + "epoch": 0.05, + "grad_norm": 0.41015625, + "learning_rate": 0.00010068846815834767, + "loss": 0.9281, + "step": 3510 + }, + { + "epoch": 0.05, + "grad_norm": 0.412109375, + "learning_rate": 0.00010083189902467012, + "loss": 0.9208, + "step": 3515 + }, + { + "epoch": 0.05, + "grad_norm": 0.50390625, + "learning_rate": 0.00010097532989099255, + "loss": 0.9596, + "step": 3520 + }, + { + "epoch": 0.05, + "grad_norm": 0.47265625, + "learning_rate": 0.00010111876075731499, + "loss": 0.917, + "step": 3525 + }, + { + "epoch": 0.05, + "grad_norm": 0.37109375, + "learning_rate": 0.00010126219162363742, + "loss": 0.9712, + "step": 3530 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 0.00010140562248995984, + "loss": 1.081, + "step": 3535 + }, + { + "epoch": 0.05, + "grad_norm": 0.400390625, + "learning_rate": 0.00010154905335628228, + "loss": 1.0385, + "step": 3540 + }, + { + "epoch": 0.05, + "grad_norm": 0.41796875, + "learning_rate": 0.00010169248422260471, + "loss": 0.9869, + "step": 3545 + }, + { + "epoch": 0.05, + "grad_norm": 0.4140625, + "learning_rate": 0.00010183591508892714, + "loss": 0.8981, + "step": 3550 + }, + { + "epoch": 0.05, + "grad_norm": 0.47265625, + "learning_rate": 0.00010197934595524959, + "loss": 0.8882, + "step": 3555 + }, + { + "epoch": 0.05, + "grad_norm": 0.400390625, + "learning_rate": 0.000102122776821572, + "loss": 0.8624, + "step": 3560 + }, + { + "epoch": 0.05, + "grad_norm": 0.388671875, + "learning_rate": 0.00010226620768789443, + "loss": 0.9483, + "step": 3565 + }, + { + "epoch": 0.05, + "grad_norm": 0.51171875, + "learning_rate": 0.00010240963855421688, + "loss": 0.9354, + "step": 3570 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 0.00010255306942053931, + "loss": 0.9673, + "step": 3575 + }, + { + "epoch": 0.05, + "grad_norm": 0.421875, + "learning_rate": 0.00010269650028686174, + "loss": 0.9585, + "step": 3580 + }, + { + "epoch": 0.05, + "grad_norm": 0.423828125, + "learning_rate": 0.00010283993115318418, + "loss": 1.0179, + "step": 3585 + }, + { + "epoch": 0.05, + "grad_norm": 0.396484375, + "learning_rate": 0.0001029833620195066, + "loss": 0.9306, + "step": 3590 + }, + { + "epoch": 0.05, + "grad_norm": 0.546875, + "learning_rate": 0.00010312679288582903, + "loss": 1.0562, + "step": 3595 + }, + { + "epoch": 0.05, + "grad_norm": 0.466796875, + "learning_rate": 0.00010327022375215147, + "loss": 0.8476, + "step": 3600 + }, + { + "epoch": 0.05, + "grad_norm": 0.4140625, + "learning_rate": 0.0001034136546184739, + "loss": 1.0539, + "step": 3605 + }, + { + "epoch": 0.05, + "grad_norm": 0.392578125, + "learning_rate": 0.00010355708548479632, + "loss": 0.9696, + "step": 3610 + }, + { + "epoch": 0.05, + "grad_norm": 0.416015625, + "learning_rate": 0.00010370051635111878, + "loss": 1.0788, + "step": 3615 + }, + { + "epoch": 0.05, + "grad_norm": 0.4375, + "learning_rate": 0.00010384394721744119, + "loss": 1.1724, + "step": 3620 + }, + { + "epoch": 0.05, + "grad_norm": 0.4140625, + "learning_rate": 0.00010398737808376362, + "loss": 1.0326, + "step": 3625 + }, + { + "epoch": 0.05, + "grad_norm": 0.416015625, + "learning_rate": 0.00010413080895008607, + "loss": 0.9767, + "step": 3630 + }, + { + "epoch": 0.05, + "grad_norm": 0.49609375, + "learning_rate": 0.0001042742398164085, + "loss": 0.9976, + "step": 3635 + }, + { + "epoch": 0.05, + "grad_norm": 0.376953125, + "learning_rate": 0.00010441767068273094, + "loss": 0.898, + "step": 3640 + }, + { + "epoch": 0.05, + "grad_norm": 0.40625, + "learning_rate": 0.00010456110154905336, + "loss": 0.8513, + "step": 3645 + }, + { + "epoch": 0.05, + "grad_norm": 0.439453125, + "learning_rate": 0.00010470453241537579, + "loss": 1.0365, + "step": 3650 + }, + { + "epoch": 0.05, + "grad_norm": 0.46875, + "learning_rate": 0.00010484796328169823, + "loss": 1.0731, + "step": 3655 + }, + { + "epoch": 0.05, + "grad_norm": 0.478515625, + "learning_rate": 0.00010499139414802066, + "loss": 0.9298, + "step": 3660 + }, + { + "epoch": 0.05, + "grad_norm": 0.65625, + "learning_rate": 0.00010513482501434308, + "loss": 1.0778, + "step": 3665 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 0.00010527825588066554, + "loss": 0.9948, + "step": 3670 + }, + { + "epoch": 0.05, + "grad_norm": 0.53515625, + "learning_rate": 0.00010542168674698796, + "loss": 0.9008, + "step": 3675 + }, + { + "epoch": 0.05, + "grad_norm": 0.408203125, + "learning_rate": 0.00010556511761331038, + "loss": 0.9447, + "step": 3680 + }, + { + "epoch": 0.05, + "grad_norm": 0.51171875, + "learning_rate": 0.00010570854847963283, + "loss": 0.816, + "step": 3685 + }, + { + "epoch": 0.05, + "grad_norm": 0.4765625, + "learning_rate": 0.00010585197934595526, + "loss": 0.9181, + "step": 3690 + }, + { + "epoch": 0.05, + "grad_norm": 0.453125, + "learning_rate": 0.00010599541021227768, + "loss": 0.9687, + "step": 3695 + }, + { + "epoch": 0.05, + "grad_norm": 0.419921875, + "learning_rate": 0.00010613884107860012, + "loss": 0.8236, + "step": 3700 + }, + { + "epoch": 0.05, + "grad_norm": 0.447265625, + "learning_rate": 0.00010628227194492255, + "loss": 0.862, + "step": 3705 + }, + { + "epoch": 0.05, + "grad_norm": 0.443359375, + "learning_rate": 0.00010642570281124498, + "loss": 1.0777, + "step": 3710 + }, + { + "epoch": 0.05, + "grad_norm": 0.44140625, + "learning_rate": 0.00010656913367756742, + "loss": 0.8843, + "step": 3715 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 0.00010671256454388984, + "loss": 0.8764, + "step": 3720 + }, + { + "epoch": 0.05, + "grad_norm": 0.443359375, + "learning_rate": 0.00010685599541021227, + "loss": 0.9723, + "step": 3725 + }, + { + "epoch": 0.05, + "grad_norm": 0.431640625, + "learning_rate": 0.00010699942627653472, + "loss": 0.9971, + "step": 3730 + }, + { + "epoch": 0.05, + "grad_norm": 0.435546875, + "learning_rate": 0.00010714285714285715, + "loss": 1.03, + "step": 3735 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 0.00010728628800917956, + "loss": 1.0012, + "step": 3740 + }, + { + "epoch": 0.05, + "grad_norm": 0.435546875, + "learning_rate": 0.00010742971887550202, + "loss": 0.9739, + "step": 3745 + }, + { + "epoch": 0.05, + "grad_norm": 0.41796875, + "learning_rate": 0.00010757314974182444, + "loss": 1.0166, + "step": 3750 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 0.00010771658060814688, + "loss": 1.0096, + "step": 3755 + }, + { + "epoch": 0.05, + "grad_norm": 0.44921875, + "learning_rate": 0.00010786001147446931, + "loss": 0.9461, + "step": 3760 + }, + { + "epoch": 0.05, + "grad_norm": 0.478515625, + "learning_rate": 0.00010800344234079174, + "loss": 0.802, + "step": 3765 + }, + { + "epoch": 0.05, + "grad_norm": 0.515625, + "learning_rate": 0.00010814687320711419, + "loss": 1.0664, + "step": 3770 + }, + { + "epoch": 0.05, + "grad_norm": 0.408203125, + "learning_rate": 0.0001082903040734366, + "loss": 0.9477, + "step": 3775 + }, + { + "epoch": 0.05, + "grad_norm": 0.439453125, + "learning_rate": 0.00010843373493975903, + "loss": 1.0207, + "step": 3780 + }, + { + "epoch": 0.05, + "grad_norm": 0.43359375, + "learning_rate": 0.00010857716580608148, + "loss": 0.979, + "step": 3785 + }, + { + "epoch": 0.05, + "grad_norm": 0.392578125, + "learning_rate": 0.00010872059667240391, + "loss": 0.896, + "step": 3790 + }, + { + "epoch": 0.05, + "grad_norm": 0.423828125, + "learning_rate": 0.00010886402753872634, + "loss": 1.0602, + "step": 3795 + }, + { + "epoch": 0.05, + "grad_norm": 0.439453125, + "learning_rate": 0.00010900745840504878, + "loss": 0.8848, + "step": 3800 + }, + { + "epoch": 0.05, + "grad_norm": 0.43359375, + "learning_rate": 0.0001091508892713712, + "loss": 0.9814, + "step": 3805 + }, + { + "epoch": 0.05, + "grad_norm": 0.4140625, + "learning_rate": 0.00010929432013769363, + "loss": 0.8746, + "step": 3810 + }, + { + "epoch": 0.05, + "grad_norm": 0.482421875, + "learning_rate": 0.00010943775100401607, + "loss": 0.9627, + "step": 3815 + }, + { + "epoch": 0.05, + "grad_norm": 0.412109375, + "learning_rate": 0.0001095811818703385, + "loss": 0.943, + "step": 3820 + }, + { + "epoch": 0.05, + "grad_norm": 0.423828125, + "learning_rate": 0.00010972461273666092, + "loss": 0.9082, + "step": 3825 + }, + { + "epoch": 0.05, + "grad_norm": 0.41796875, + "learning_rate": 0.00010986804360298338, + "loss": 1.1245, + "step": 3830 + }, + { + "epoch": 0.06, + "grad_norm": 0.4453125, + "learning_rate": 0.00011001147446930579, + "loss": 1.0683, + "step": 3835 + }, + { + "epoch": 0.06, + "grad_norm": 0.427734375, + "learning_rate": 0.00011015490533562822, + "loss": 0.9948, + "step": 3840 + }, + { + "epoch": 0.06, + "grad_norm": 0.421875, + "learning_rate": 0.00011029833620195067, + "loss": 0.933, + "step": 3845 + }, + { + "epoch": 0.06, + "grad_norm": 0.337890625, + "learning_rate": 0.0001104417670682731, + "loss": 1.0216, + "step": 3850 + }, + { + "epoch": 0.06, + "grad_norm": 0.41015625, + "learning_rate": 0.00011058519793459554, + "loss": 0.999, + "step": 3855 + }, + { + "epoch": 0.06, + "grad_norm": 0.443359375, + "learning_rate": 0.00011072862880091797, + "loss": 1.0413, + "step": 3860 + }, + { + "epoch": 0.06, + "grad_norm": 0.4765625, + "learning_rate": 0.00011087205966724039, + "loss": 1.0061, + "step": 3865 + }, + { + "epoch": 0.06, + "grad_norm": 0.408203125, + "learning_rate": 0.00011101549053356283, + "loss": 0.8976, + "step": 3870 + }, + { + "epoch": 0.06, + "grad_norm": 0.423828125, + "learning_rate": 0.00011115892139988526, + "loss": 0.9141, + "step": 3875 + }, + { + "epoch": 0.06, + "grad_norm": 0.447265625, + "learning_rate": 0.00011130235226620769, + "loss": 1.0766, + "step": 3880 + }, + { + "epoch": 0.06, + "grad_norm": 0.375, + "learning_rate": 0.00011144578313253014, + "loss": 0.9397, + "step": 3885 + }, + { + "epoch": 0.06, + "grad_norm": 0.55078125, + "learning_rate": 0.00011158921399885257, + "loss": 1.0154, + "step": 3890 + }, + { + "epoch": 0.06, + "grad_norm": 0.408203125, + "learning_rate": 0.00011173264486517498, + "loss": 0.9654, + "step": 3895 + }, + { + "epoch": 0.06, + "grad_norm": 0.4453125, + "learning_rate": 0.00011187607573149743, + "loss": 1.0002, + "step": 3900 + }, + { + "epoch": 0.06, + "grad_norm": 0.3828125, + "learning_rate": 0.00011201950659781986, + "loss": 0.9276, + "step": 3905 + }, + { + "epoch": 0.06, + "grad_norm": 0.41015625, + "learning_rate": 0.00011216293746414229, + "loss": 0.9197, + "step": 3910 + }, + { + "epoch": 0.06, + "grad_norm": 0.41015625, + "learning_rate": 0.00011230636833046473, + "loss": 1.0522, + "step": 3915 + }, + { + "epoch": 0.06, + "grad_norm": 0.451171875, + "learning_rate": 0.00011244979919678715, + "loss": 0.9447, + "step": 3920 + }, + { + "epoch": 0.06, + "grad_norm": 0.4765625, + "learning_rate": 0.00011259323006310958, + "loss": 0.9358, + "step": 3925 + }, + { + "epoch": 0.06, + "grad_norm": 0.46484375, + "learning_rate": 0.00011273666092943202, + "loss": 1.0351, + "step": 3930 + }, + { + "epoch": 0.06, + "grad_norm": 0.37109375, + "learning_rate": 0.00011288009179575445, + "loss": 0.9158, + "step": 3935 + }, + { + "epoch": 0.06, + "grad_norm": 0.59375, + "learning_rate": 0.00011302352266207687, + "loss": 0.9975, + "step": 3940 + }, + { + "epoch": 0.06, + "grad_norm": 0.50390625, + "learning_rate": 0.00011316695352839933, + "loss": 0.9493, + "step": 3945 + }, + { + "epoch": 0.06, + "grad_norm": 0.42578125, + "learning_rate": 0.00011331038439472175, + "loss": 1.0005, + "step": 3950 + }, + { + "epoch": 0.06, + "grad_norm": 0.4609375, + "learning_rate": 0.00011345381526104417, + "loss": 1.1862, + "step": 3955 + }, + { + "epoch": 0.06, + "grad_norm": 0.408203125, + "learning_rate": 0.00011359724612736662, + "loss": 1.122, + "step": 3960 + }, + { + "epoch": 0.06, + "grad_norm": 0.447265625, + "learning_rate": 0.00011374067699368905, + "loss": 1.0702, + "step": 3965 + }, + { + "epoch": 0.06, + "grad_norm": 0.392578125, + "learning_rate": 0.00011388410786001149, + "loss": 0.8827, + "step": 3970 + }, + { + "epoch": 0.06, + "grad_norm": 0.408203125, + "learning_rate": 0.00011402753872633391, + "loss": 0.8097, + "step": 3975 + }, + { + "epoch": 0.06, + "grad_norm": 0.412109375, + "learning_rate": 0.00011417096959265634, + "loss": 0.935, + "step": 3980 + }, + { + "epoch": 0.06, + "grad_norm": 0.46875, + "learning_rate": 0.00011431440045897879, + "loss": 0.9727, + "step": 3985 + }, + { + "epoch": 0.06, + "grad_norm": 0.46484375, + "learning_rate": 0.0001144578313253012, + "loss": 0.9867, + "step": 3990 + }, + { + "epoch": 0.06, + "grad_norm": 0.43359375, + "learning_rate": 0.00011460126219162363, + "loss": 1.0205, + "step": 3995 + }, + { + "epoch": 0.06, + "grad_norm": 0.4375, + "learning_rate": 0.00011474469305794609, + "loss": 0.9801, + "step": 4000 + }, + { + "epoch": 0.06, + "grad_norm": 0.427734375, + "learning_rate": 0.00011488812392426851, + "loss": 0.9266, + "step": 4005 + }, + { + "epoch": 0.06, + "grad_norm": 0.50390625, + "learning_rate": 0.00011503155479059094, + "loss": 0.9448, + "step": 4010 + }, + { + "epoch": 0.06, + "grad_norm": 0.455078125, + "learning_rate": 0.00011517498565691338, + "loss": 0.9925, + "step": 4015 + }, + { + "epoch": 0.06, + "grad_norm": 0.396484375, + "learning_rate": 0.0001153184165232358, + "loss": 1.0278, + "step": 4020 + }, + { + "epoch": 0.06, + "grad_norm": 0.4296875, + "learning_rate": 0.00011546184738955823, + "loss": 0.942, + "step": 4025 + }, + { + "epoch": 0.06, + "grad_norm": 0.412109375, + "learning_rate": 0.00011560527825588067, + "loss": 0.9767, + "step": 4030 + }, + { + "epoch": 0.06, + "grad_norm": 0.416015625, + "learning_rate": 0.0001157487091222031, + "loss": 0.9365, + "step": 4035 + }, + { + "epoch": 0.06, + "grad_norm": 0.423828125, + "learning_rate": 0.00011589213998852553, + "loss": 0.9618, + "step": 4040 + }, + { + "epoch": 0.06, + "grad_norm": 0.474609375, + "learning_rate": 0.00011603557085484798, + "loss": 0.8656, + "step": 4045 + }, + { + "epoch": 0.06, + "grad_norm": 0.44140625, + "learning_rate": 0.0001161790017211704, + "loss": 0.8396, + "step": 4050 + }, + { + "epoch": 0.06, + "grad_norm": 0.416015625, + "learning_rate": 0.00011632243258749282, + "loss": 0.9391, + "step": 4055 + }, + { + "epoch": 0.06, + "grad_norm": 0.4609375, + "learning_rate": 0.00011646586345381527, + "loss": 0.9302, + "step": 4060 + }, + { + "epoch": 0.06, + "grad_norm": 0.455078125, + "learning_rate": 0.0001166092943201377, + "loss": 1.0018, + "step": 4065 + }, + { + "epoch": 0.06, + "grad_norm": 0.37109375, + "learning_rate": 0.00011675272518646013, + "loss": 0.9767, + "step": 4070 + }, + { + "epoch": 0.06, + "grad_norm": 0.45703125, + "learning_rate": 0.00011689615605278257, + "loss": 0.9765, + "step": 4075 + }, + { + "epoch": 0.06, + "grad_norm": 0.42578125, + "learning_rate": 0.000117039586919105, + "loss": 1.0398, + "step": 4080 + }, + { + "epoch": 0.06, + "grad_norm": 0.41015625, + "learning_rate": 0.00011718301778542743, + "loss": 0.8486, + "step": 4085 + }, + { + "epoch": 0.06, + "grad_norm": 0.439453125, + "learning_rate": 0.00011732644865174986, + "loss": 0.8443, + "step": 4090 + }, + { + "epoch": 0.06, + "grad_norm": 0.443359375, + "learning_rate": 0.00011746987951807229, + "loss": 0.9544, + "step": 4095 + }, + { + "epoch": 0.06, + "grad_norm": 0.4609375, + "learning_rate": 0.00011761331038439474, + "loss": 1.1874, + "step": 4100 + }, + { + "epoch": 0.06, + "grad_norm": 0.41796875, + "learning_rate": 0.00011775674125071717, + "loss": 1.1332, + "step": 4105 + }, + { + "epoch": 0.06, + "grad_norm": 0.431640625, + "learning_rate": 0.00011790017211703958, + "loss": 0.9719, + "step": 4110 + }, + { + "epoch": 0.06, + "grad_norm": 0.44140625, + "learning_rate": 0.00011804360298336203, + "loss": 0.909, + "step": 4115 + }, + { + "epoch": 0.06, + "grad_norm": 0.40234375, + "learning_rate": 0.00011818703384968446, + "loss": 1.0488, + "step": 4120 + }, + { + "epoch": 0.06, + "grad_norm": 0.462890625, + "learning_rate": 0.00011833046471600689, + "loss": 1.1675, + "step": 4125 + }, + { + "epoch": 0.06, + "grad_norm": 0.396484375, + "learning_rate": 0.00011847389558232933, + "loss": 0.9763, + "step": 4130 + }, + { + "epoch": 0.06, + "grad_norm": 0.451171875, + "learning_rate": 0.00011861732644865175, + "loss": 0.9416, + "step": 4135 + }, + { + "epoch": 0.06, + "grad_norm": 0.47265625, + "learning_rate": 0.00011876075731497418, + "loss": 0.9931, + "step": 4140 + }, + { + "epoch": 0.06, + "grad_norm": 0.439453125, + "learning_rate": 0.00011890418818129662, + "loss": 0.8678, + "step": 4145 + }, + { + "epoch": 0.06, + "grad_norm": 0.482421875, + "learning_rate": 0.00011904761904761905, + "loss": 1.0077, + "step": 4150 + }, + { + "epoch": 0.06, + "grad_norm": 0.4453125, + "learning_rate": 0.00011919104991394147, + "loss": 0.8943, + "step": 4155 + }, + { + "epoch": 0.06, + "grad_norm": 0.44921875, + "learning_rate": 0.00011933448078026393, + "loss": 1.0197, + "step": 4160 + }, + { + "epoch": 0.06, + "grad_norm": 0.43359375, + "learning_rate": 0.00011947791164658635, + "loss": 0.9257, + "step": 4165 + }, + { + "epoch": 0.06, + "grad_norm": 0.42578125, + "learning_rate": 0.00011962134251290877, + "loss": 0.9073, + "step": 4170 + }, + { + "epoch": 0.06, + "grad_norm": 0.412109375, + "learning_rate": 0.00011976477337923122, + "loss": 0.9657, + "step": 4175 + }, + { + "epoch": 0.06, + "grad_norm": 0.427734375, + "learning_rate": 0.00011990820424555365, + "loss": 0.9148, + "step": 4180 + }, + { + "epoch": 0.06, + "grad_norm": 0.4609375, + "learning_rate": 0.00012005163511187607, + "loss": 0.9801, + "step": 4185 + }, + { + "epoch": 0.06, + "grad_norm": 0.578125, + "learning_rate": 0.00012019506597819851, + "loss": 1.0169, + "step": 4190 + }, + { + "epoch": 0.06, + "grad_norm": 0.453125, + "learning_rate": 0.00012033849684452094, + "loss": 0.9711, + "step": 4195 + }, + { + "epoch": 0.06, + "grad_norm": 0.431640625, + "learning_rate": 0.0001204819277108434, + "loss": 1.0298, + "step": 4200 + }, + { + "epoch": 0.06, + "grad_norm": 0.427734375, + "learning_rate": 0.00012062535857716581, + "loss": 0.9817, + "step": 4205 + }, + { + "epoch": 0.06, + "grad_norm": 0.361328125, + "learning_rate": 0.00012076878944348823, + "loss": 0.8817, + "step": 4210 + }, + { + "epoch": 0.06, + "grad_norm": 0.427734375, + "learning_rate": 0.00012091222030981069, + "loss": 0.8855, + "step": 4215 + }, + { + "epoch": 0.06, + "grad_norm": 0.416015625, + "learning_rate": 0.00012105565117613311, + "loss": 1.0517, + "step": 4220 + }, + { + "epoch": 0.06, + "grad_norm": 0.37890625, + "learning_rate": 0.00012119908204245554, + "loss": 0.9723, + "step": 4225 + }, + { + "epoch": 0.06, + "grad_norm": 0.4140625, + "learning_rate": 0.00012134251290877798, + "loss": 0.8449, + "step": 4230 + }, + { + "epoch": 0.06, + "grad_norm": 0.384765625, + "learning_rate": 0.00012148594377510041, + "loss": 0.9528, + "step": 4235 + }, + { + "epoch": 0.06, + "grad_norm": 0.451171875, + "learning_rate": 0.00012162937464142283, + "loss": 1.0405, + "step": 4240 + }, + { + "epoch": 0.06, + "grad_norm": 0.419921875, + "learning_rate": 0.00012177280550774528, + "loss": 0.9225, + "step": 4245 + }, + { + "epoch": 0.06, + "grad_norm": 0.427734375, + "learning_rate": 0.0001219162363740677, + "loss": 0.8293, + "step": 4250 + }, + { + "epoch": 0.06, + "grad_norm": 0.41796875, + "learning_rate": 0.00012205966724039013, + "loss": 0.8751, + "step": 4255 + }, + { + "epoch": 0.06, + "grad_norm": 0.494140625, + "learning_rate": 0.00012220309810671257, + "loss": 1.0213, + "step": 4260 + }, + { + "epoch": 0.06, + "grad_norm": 0.423828125, + "learning_rate": 0.000122346528973035, + "loss": 0.9466, + "step": 4265 + }, + { + "epoch": 0.06, + "grad_norm": 0.404296875, + "learning_rate": 0.00012248995983935742, + "loss": 0.8652, + "step": 4270 + }, + { + "epoch": 0.06, + "grad_norm": 0.423828125, + "learning_rate": 0.00012263339070567986, + "loss": 0.9067, + "step": 4275 + }, + { + "epoch": 0.06, + "grad_norm": 0.375, + "learning_rate": 0.0001227768215720023, + "loss": 0.8576, + "step": 4280 + }, + { + "epoch": 0.06, + "grad_norm": 0.53125, + "learning_rate": 0.00012292025243832472, + "loss": 1.1626, + "step": 4285 + }, + { + "epoch": 0.06, + "grad_norm": 0.427734375, + "learning_rate": 0.00012306368330464718, + "loss": 0.9502, + "step": 4290 + }, + { + "epoch": 0.06, + "grad_norm": 0.435546875, + "learning_rate": 0.0001232071141709696, + "loss": 1.006, + "step": 4295 + }, + { + "epoch": 0.06, + "grad_norm": 0.375, + "learning_rate": 0.000123350545037292, + "loss": 0.9994, + "step": 4300 + }, + { + "epoch": 0.06, + "grad_norm": 0.43359375, + "learning_rate": 0.00012349397590361448, + "loss": 1.0563, + "step": 4305 + }, + { + "epoch": 0.06, + "grad_norm": 0.4375, + "learning_rate": 0.0001236374067699369, + "loss": 0.9123, + "step": 4310 + }, + { + "epoch": 0.06, + "grad_norm": 0.43359375, + "learning_rate": 0.00012378083763625933, + "loss": 0.9732, + "step": 4315 + }, + { + "epoch": 0.06, + "grad_norm": 0.43359375, + "learning_rate": 0.00012392426850258177, + "loss": 0.9571, + "step": 4320 + }, + { + "epoch": 0.06, + "grad_norm": 0.412109375, + "learning_rate": 0.00012406769936890418, + "loss": 0.9954, + "step": 4325 + }, + { + "epoch": 0.06, + "grad_norm": 0.4140625, + "learning_rate": 0.00012421113023522665, + "loss": 1.0072, + "step": 4330 + }, + { + "epoch": 0.06, + "grad_norm": 0.453125, + "learning_rate": 0.00012435456110154906, + "loss": 0.9763, + "step": 4335 + }, + { + "epoch": 0.06, + "grad_norm": 0.42578125, + "learning_rate": 0.00012449799196787148, + "loss": 1.0008, + "step": 4340 + }, + { + "epoch": 0.06, + "grad_norm": 0.482421875, + "learning_rate": 0.00012464142283419394, + "loss": 1.0088, + "step": 4345 + }, + { + "epoch": 0.06, + "grad_norm": 0.388671875, + "learning_rate": 0.00012478485370051636, + "loss": 1.1205, + "step": 4350 + }, + { + "epoch": 0.06, + "grad_norm": 0.421875, + "learning_rate": 0.00012492828456683877, + "loss": 0.991, + "step": 4355 + }, + { + "epoch": 0.06, + "grad_norm": 0.48828125, + "learning_rate": 0.00012507171543316124, + "loss": 0.8901, + "step": 4360 + }, + { + "epoch": 0.06, + "grad_norm": 0.46484375, + "learning_rate": 0.00012521514629948365, + "loss": 1.1043, + "step": 4365 + }, + { + "epoch": 0.06, + "grad_norm": 0.42578125, + "learning_rate": 0.0001253585771658061, + "loss": 1.0119, + "step": 4370 + }, + { + "epoch": 0.06, + "grad_norm": 0.4609375, + "learning_rate": 0.00012550200803212853, + "loss": 1.0429, + "step": 4375 + }, + { + "epoch": 0.06, + "grad_norm": 1.015625, + "learning_rate": 0.00012564543889845094, + "loss": 1.0057, + "step": 4380 + }, + { + "epoch": 0.06, + "grad_norm": 0.43359375, + "learning_rate": 0.00012578886976477338, + "loss": 0.9982, + "step": 4385 + }, + { + "epoch": 0.06, + "grad_norm": 0.482421875, + "learning_rate": 0.00012593230063109582, + "loss": 0.9193, + "step": 4390 + }, + { + "epoch": 0.06, + "grad_norm": 0.421875, + "learning_rate": 0.00012607573149741824, + "loss": 0.9164, + "step": 4395 + }, + { + "epoch": 0.06, + "grad_norm": 0.44921875, + "learning_rate": 0.00012621916236374068, + "loss": 1.0736, + "step": 4400 + }, + { + "epoch": 0.06, + "grad_norm": 0.482421875, + "learning_rate": 0.00012636259323006312, + "loss": 1.0426, + "step": 4405 + }, + { + "epoch": 0.06, + "grad_norm": 0.419921875, + "learning_rate": 0.00012650602409638556, + "loss": 0.9148, + "step": 4410 + }, + { + "epoch": 0.06, + "grad_norm": 0.453125, + "learning_rate": 0.000126649454962708, + "loss": 1.1345, + "step": 4415 + }, + { + "epoch": 0.06, + "grad_norm": 0.4453125, + "learning_rate": 0.0001267928858290304, + "loss": 0.9112, + "step": 4420 + }, + { + "epoch": 0.06, + "grad_norm": 0.482421875, + "learning_rate": 0.00012693631669535285, + "loss": 0.9311, + "step": 4425 + }, + { + "epoch": 0.06, + "grad_norm": 0.455078125, + "learning_rate": 0.0001270797475616753, + "loss": 1.0584, + "step": 4430 + }, + { + "epoch": 0.06, + "grad_norm": 0.421875, + "learning_rate": 0.0001272231784279977, + "loss": 1.0231, + "step": 4435 + }, + { + "epoch": 0.06, + "grad_norm": 0.48046875, + "learning_rate": 0.00012736660929432014, + "loss": 1.1119, + "step": 4440 + }, + { + "epoch": 0.06, + "grad_norm": 0.431640625, + "learning_rate": 0.00012751004016064258, + "loss": 0.9418, + "step": 4445 + }, + { + "epoch": 0.06, + "grad_norm": 0.57421875, + "learning_rate": 0.00012765347102696502, + "loss": 0.9222, + "step": 4450 + }, + { + "epoch": 0.06, + "grad_norm": 0.4765625, + "learning_rate": 0.00012779690189328744, + "loss": 1.0592, + "step": 4455 + }, + { + "epoch": 0.06, + "grad_norm": 0.45703125, + "learning_rate": 0.00012794033275960988, + "loss": 1.0891, + "step": 4460 + }, + { + "epoch": 0.06, + "grad_norm": 0.431640625, + "learning_rate": 0.00012808376362593232, + "loss": 1.0498, + "step": 4465 + }, + { + "epoch": 0.06, + "grad_norm": 0.59765625, + "learning_rate": 0.00012822719449225473, + "loss": 1.0035, + "step": 4470 + }, + { + "epoch": 0.06, + "grad_norm": 0.439453125, + "learning_rate": 0.00012837062535857717, + "loss": 0.8811, + "step": 4475 + }, + { + "epoch": 0.06, + "grad_norm": 0.44921875, + "learning_rate": 0.0001285140562248996, + "loss": 1.0189, + "step": 4480 + }, + { + "epoch": 0.06, + "grad_norm": 0.439453125, + "learning_rate": 0.00012865748709122202, + "loss": 1.0349, + "step": 4485 + }, + { + "epoch": 0.06, + "grad_norm": 0.423828125, + "learning_rate": 0.00012880091795754446, + "loss": 0.954, + "step": 4490 + }, + { + "epoch": 0.06, + "grad_norm": 0.5, + "learning_rate": 0.0001289443488238669, + "loss": 0.8879, + "step": 4495 + }, + { + "epoch": 0.06, + "grad_norm": 0.453125, + "learning_rate": 0.00012908777969018932, + "loss": 0.9027, + "step": 4500 + }, + { + "epoch": 0.06, + "grad_norm": 0.4296875, + "learning_rate": 0.00012923121055651178, + "loss": 0.8992, + "step": 4505 + }, + { + "epoch": 0.06, + "grad_norm": 0.48828125, + "learning_rate": 0.0001293746414228342, + "loss": 1.3137, + "step": 4510 + }, + { + "epoch": 0.06, + "grad_norm": 0.42578125, + "learning_rate": 0.0001295180722891566, + "loss": 1.0226, + "step": 4515 + }, + { + "epoch": 0.06, + "grad_norm": 0.4765625, + "learning_rate": 0.00012966150315547908, + "loss": 0.9629, + "step": 4520 + }, + { + "epoch": 0.06, + "grad_norm": 0.443359375, + "learning_rate": 0.0001298049340218015, + "loss": 0.9313, + "step": 4525 + }, + { + "epoch": 0.06, + "grad_norm": 0.44921875, + "learning_rate": 0.00012994836488812393, + "loss": 0.9058, + "step": 4530 + }, + { + "epoch": 0.07, + "grad_norm": 0.427734375, + "learning_rate": 0.00013009179575444637, + "loss": 1.0217, + "step": 4535 + }, + { + "epoch": 0.07, + "grad_norm": 0.4609375, + "learning_rate": 0.00013023522662076878, + "loss": 0.9383, + "step": 4540 + }, + { + "epoch": 0.07, + "grad_norm": 0.41015625, + "learning_rate": 0.00013037865748709125, + "loss": 1.0084, + "step": 4545 + }, + { + "epoch": 0.07, + "grad_norm": 0.478515625, + "learning_rate": 0.00013052208835341366, + "loss": 1.0465, + "step": 4550 + }, + { + "epoch": 0.07, + "grad_norm": 0.333984375, + "learning_rate": 0.00013066551921973608, + "loss": 0.8307, + "step": 4555 + }, + { + "epoch": 0.07, + "grad_norm": 0.4453125, + "learning_rate": 0.00013080895008605854, + "loss": 0.9803, + "step": 4560 + }, + { + "epoch": 0.07, + "grad_norm": 0.4921875, + "learning_rate": 0.00013095238095238096, + "loss": 1.0079, + "step": 4565 + }, + { + "epoch": 0.07, + "grad_norm": 0.4453125, + "learning_rate": 0.0001310958118187034, + "loss": 1.0062, + "step": 4570 + }, + { + "epoch": 0.07, + "grad_norm": 0.423828125, + "learning_rate": 0.00013123924268502584, + "loss": 1.0667, + "step": 4575 + }, + { + "epoch": 0.07, + "grad_norm": 0.4375, + "learning_rate": 0.00013138267355134825, + "loss": 0.9888, + "step": 4580 + }, + { + "epoch": 0.07, + "grad_norm": 0.49609375, + "learning_rate": 0.0001315261044176707, + "loss": 1.1235, + "step": 4585 + }, + { + "epoch": 0.07, + "grad_norm": 0.44921875, + "learning_rate": 0.00013166953528399313, + "loss": 0.998, + "step": 4590 + }, + { + "epoch": 0.07, + "grad_norm": 0.447265625, + "learning_rate": 0.00013181296615031554, + "loss": 1.0042, + "step": 4595 + }, + { + "epoch": 0.07, + "grad_norm": 0.46484375, + "learning_rate": 0.00013195639701663798, + "loss": 0.9648, + "step": 4600 + }, + { + "epoch": 0.07, + "grad_norm": 0.4296875, + "learning_rate": 0.00013209982788296042, + "loss": 0.8943, + "step": 4605 + }, + { + "epoch": 0.07, + "grad_norm": 0.40234375, + "learning_rate": 0.00013224325874928284, + "loss": 0.9704, + "step": 4610 + }, + { + "epoch": 0.07, + "grad_norm": 0.423828125, + "learning_rate": 0.00013238668961560528, + "loss": 1.0426, + "step": 4615 + }, + { + "epoch": 0.07, + "grad_norm": 0.41015625, + "learning_rate": 0.00013253012048192772, + "loss": 0.9591, + "step": 4620 + }, + { + "epoch": 0.07, + "grad_norm": 0.48828125, + "learning_rate": 0.00013267355134825016, + "loss": 0.991, + "step": 4625 + }, + { + "epoch": 0.07, + "grad_norm": 0.455078125, + "learning_rate": 0.00013281698221457257, + "loss": 0.971, + "step": 4630 + }, + { + "epoch": 0.07, + "grad_norm": 0.490234375, + "learning_rate": 0.000132960413080895, + "loss": 1.1203, + "step": 4635 + }, + { + "epoch": 0.07, + "grad_norm": 0.52734375, + "learning_rate": 0.00013310384394721745, + "loss": 1.0527, + "step": 4640 + }, + { + "epoch": 0.07, + "grad_norm": 0.484375, + "learning_rate": 0.0001332472748135399, + "loss": 1.057, + "step": 4645 + }, + { + "epoch": 0.07, + "grad_norm": 0.4296875, + "learning_rate": 0.0001333907056798623, + "loss": 0.889, + "step": 4650 + }, + { + "epoch": 0.07, + "grad_norm": 0.46484375, + "learning_rate": 0.00013353413654618475, + "loss": 0.952, + "step": 4655 + }, + { + "epoch": 0.07, + "grad_norm": 0.41796875, + "learning_rate": 0.00013367756741250719, + "loss": 0.9844, + "step": 4660 + }, + { + "epoch": 0.07, + "grad_norm": 0.5234375, + "learning_rate": 0.00013382099827882963, + "loss": 0.899, + "step": 4665 + }, + { + "epoch": 0.07, + "grad_norm": 0.427734375, + "learning_rate": 0.00013396442914515204, + "loss": 1.0083, + "step": 4670 + }, + { + "epoch": 0.07, + "grad_norm": 0.494140625, + "learning_rate": 0.00013410786001147448, + "loss": 1.1103, + "step": 4675 + }, + { + "epoch": 0.07, + "grad_norm": 0.439453125, + "learning_rate": 0.00013425129087779692, + "loss": 1.0983, + "step": 4680 + }, + { + "epoch": 0.07, + "grad_norm": 0.42578125, + "learning_rate": 0.00013439472174411933, + "loss": 0.9257, + "step": 4685 + }, + { + "epoch": 0.07, + "grad_norm": 0.439453125, + "learning_rate": 0.00013453815261044177, + "loss": 0.9193, + "step": 4690 + }, + { + "epoch": 0.07, + "grad_norm": 0.453125, + "learning_rate": 0.0001346815834767642, + "loss": 0.9014, + "step": 4695 + }, + { + "epoch": 0.07, + "grad_norm": 0.4296875, + "learning_rate": 0.00013482501434308663, + "loss": 0.9115, + "step": 4700 + }, + { + "epoch": 0.07, + "grad_norm": 0.44140625, + "learning_rate": 0.00013496844520940907, + "loss": 0.9923, + "step": 4705 + }, + { + "epoch": 0.07, + "grad_norm": 0.423828125, + "learning_rate": 0.0001351118760757315, + "loss": 0.9589, + "step": 4710 + }, + { + "epoch": 0.07, + "grad_norm": 0.423828125, + "learning_rate": 0.00013525530694205392, + "loss": 0.8925, + "step": 4715 + }, + { + "epoch": 0.07, + "grad_norm": 0.46875, + "learning_rate": 0.00013539873780837639, + "loss": 1.1727, + "step": 4720 + }, + { + "epoch": 0.07, + "grad_norm": 0.4296875, + "learning_rate": 0.0001355421686746988, + "loss": 1.0245, + "step": 4725 + }, + { + "epoch": 0.07, + "grad_norm": 0.396484375, + "learning_rate": 0.0001356855995410212, + "loss": 0.9471, + "step": 4730 + }, + { + "epoch": 0.07, + "grad_norm": 0.388671875, + "learning_rate": 0.00013582903040734368, + "loss": 0.8213, + "step": 4735 + }, + { + "epoch": 0.07, + "grad_norm": 0.43359375, + "learning_rate": 0.0001359724612736661, + "loss": 0.9894, + "step": 4740 + }, + { + "epoch": 0.07, + "grad_norm": 0.451171875, + "learning_rate": 0.00013611589213998853, + "loss": 0.9985, + "step": 4745 + }, + { + "epoch": 0.07, + "grad_norm": 0.443359375, + "learning_rate": 0.00013625932300631097, + "loss": 0.9631, + "step": 4750 + }, + { + "epoch": 0.07, + "grad_norm": 0.408203125, + "learning_rate": 0.00013640275387263339, + "loss": 0.9722, + "step": 4755 + }, + { + "epoch": 0.07, + "grad_norm": 0.392578125, + "learning_rate": 0.00013654618473895585, + "loss": 0.9535, + "step": 4760 + }, + { + "epoch": 0.07, + "grad_norm": 0.41015625, + "learning_rate": 0.00013668961560527827, + "loss": 0.8753, + "step": 4765 + }, + { + "epoch": 0.07, + "grad_norm": 0.443359375, + "learning_rate": 0.00013683304647160068, + "loss": 0.9648, + "step": 4770 + }, + { + "epoch": 0.07, + "grad_norm": 0.451171875, + "learning_rate": 0.00013697647733792315, + "loss": 0.8829, + "step": 4775 + }, + { + "epoch": 0.07, + "grad_norm": 0.48046875, + "learning_rate": 0.00013711990820424556, + "loss": 1.0206, + "step": 4780 + }, + { + "epoch": 0.07, + "grad_norm": 0.408203125, + "learning_rate": 0.000137263339070568, + "loss": 0.9263, + "step": 4785 + }, + { + "epoch": 0.07, + "grad_norm": 0.453125, + "learning_rate": 0.00013740676993689044, + "loss": 0.9042, + "step": 4790 + }, + { + "epoch": 0.07, + "grad_norm": 0.44921875, + "learning_rate": 0.00013755020080321285, + "loss": 0.9451, + "step": 4795 + }, + { + "epoch": 0.07, + "grad_norm": 0.51171875, + "learning_rate": 0.0001376936316695353, + "loss": 1.0014, + "step": 4800 + }, + { + "epoch": 0.07, + "grad_norm": 0.45703125, + "learning_rate": 0.00013783706253585773, + "loss": 0.9152, + "step": 4805 + }, + { + "epoch": 0.07, + "grad_norm": 0.380859375, + "learning_rate": 0.00013798049340218015, + "loss": 1.0523, + "step": 4810 + }, + { + "epoch": 0.07, + "grad_norm": 0.51171875, + "learning_rate": 0.00013812392426850259, + "loss": 1.0063, + "step": 4815 + }, + { + "epoch": 0.07, + "grad_norm": 0.439453125, + "learning_rate": 0.00013826735513482503, + "loss": 0.9768, + "step": 4820 + }, + { + "epoch": 0.07, + "grad_norm": 0.419921875, + "learning_rate": 0.00013841078600114744, + "loss": 1.2043, + "step": 4825 + }, + { + "epoch": 0.07, + "grad_norm": 0.458984375, + "learning_rate": 0.00013855421686746988, + "loss": 0.9456, + "step": 4830 + }, + { + "epoch": 0.07, + "grad_norm": 0.4375, + "learning_rate": 0.00013869764773379232, + "loss": 0.8694, + "step": 4835 + }, + { + "epoch": 0.07, + "grad_norm": 0.447265625, + "learning_rate": 0.00013884107860011476, + "loss": 0.89, + "step": 4840 + }, + { + "epoch": 0.07, + "grad_norm": 0.4140625, + "learning_rate": 0.00013898450946643717, + "loss": 0.9385, + "step": 4845 + }, + { + "epoch": 0.07, + "grad_norm": 0.423828125, + "learning_rate": 0.0001391279403327596, + "loss": 0.8744, + "step": 4850 + }, + { + "epoch": 0.07, + "grad_norm": 0.470703125, + "learning_rate": 0.00013927137119908205, + "loss": 0.9686, + "step": 4855 + }, + { + "epoch": 0.07, + "grad_norm": 0.431640625, + "learning_rate": 0.00013941480206540447, + "loss": 0.9721, + "step": 4860 + }, + { + "epoch": 0.07, + "grad_norm": 0.44140625, + "learning_rate": 0.0001395582329317269, + "loss": 0.9944, + "step": 4865 + }, + { + "epoch": 0.07, + "grad_norm": 0.47265625, + "learning_rate": 0.00013970166379804935, + "loss": 1.0323, + "step": 4870 + }, + { + "epoch": 0.07, + "grad_norm": 0.4765625, + "learning_rate": 0.0001398450946643718, + "loss": 1.0255, + "step": 4875 + }, + { + "epoch": 0.07, + "grad_norm": 0.4453125, + "learning_rate": 0.00013998852553069423, + "loss": 0.9475, + "step": 4880 + }, + { + "epoch": 0.07, + "grad_norm": 0.443359375, + "learning_rate": 0.00014013195639701664, + "loss": 1.0105, + "step": 4885 + }, + { + "epoch": 0.07, + "grad_norm": 0.431640625, + "learning_rate": 0.00014027538726333908, + "loss": 0.9109, + "step": 4890 + }, + { + "epoch": 0.07, + "grad_norm": 0.439453125, + "learning_rate": 0.00014041881812966152, + "loss": 0.9437, + "step": 4895 + }, + { + "epoch": 0.07, + "grad_norm": 0.4453125, + "learning_rate": 0.00014056224899598393, + "loss": 1.0219, + "step": 4900 + }, + { + "epoch": 0.07, + "grad_norm": 0.447265625, + "learning_rate": 0.00014070567986230637, + "loss": 0.9312, + "step": 4905 + }, + { + "epoch": 0.07, + "grad_norm": 0.4375, + "learning_rate": 0.00014084911072862881, + "loss": 1.0337, + "step": 4910 + }, + { + "epoch": 0.07, + "grad_norm": 0.38671875, + "learning_rate": 0.00014099254159495123, + "loss": 1.1155, + "step": 4915 + }, + { + "epoch": 0.07, + "grad_norm": 0.51171875, + "learning_rate": 0.00014113597246127367, + "loss": 1.0321, + "step": 4920 + }, + { + "epoch": 0.07, + "grad_norm": 0.4375, + "learning_rate": 0.0001412794033275961, + "loss": 0.9464, + "step": 4925 + }, + { + "epoch": 0.07, + "grad_norm": 0.55078125, + "learning_rate": 0.00014142283419391852, + "loss": 1.0167, + "step": 4930 + }, + { + "epoch": 0.07, + "grad_norm": 0.5546875, + "learning_rate": 0.000141566265060241, + "loss": 1.1417, + "step": 4935 + }, + { + "epoch": 0.07, + "grad_norm": 0.44140625, + "learning_rate": 0.0001417096959265634, + "loss": 1.0258, + "step": 4940 + }, + { + "epoch": 0.07, + "grad_norm": 0.443359375, + "learning_rate": 0.00014185312679288581, + "loss": 0.9996, + "step": 4945 + }, + { + "epoch": 0.07, + "grad_norm": 0.44921875, + "learning_rate": 0.00014199655765920828, + "loss": 1.102, + "step": 4950 + }, + { + "epoch": 0.07, + "grad_norm": 0.43359375, + "learning_rate": 0.0001421399885255307, + "loss": 0.9078, + "step": 4955 + }, + { + "epoch": 0.07, + "grad_norm": 0.4921875, + "learning_rate": 0.00014228341939185313, + "loss": 0.9108, + "step": 4960 + }, + { + "epoch": 0.07, + "grad_norm": 0.427734375, + "learning_rate": 0.00014242685025817557, + "loss": 0.9866, + "step": 4965 + }, + { + "epoch": 0.07, + "grad_norm": 0.54296875, + "learning_rate": 0.000142570281124498, + "loss": 1.0493, + "step": 4970 + }, + { + "epoch": 0.07, + "grad_norm": 0.4375, + "learning_rate": 0.00014271371199082045, + "loss": 0.8932, + "step": 4975 + }, + { + "epoch": 0.07, + "grad_norm": 0.439453125, + "learning_rate": 0.00014285714285714287, + "loss": 1.0283, + "step": 4980 + }, + { + "epoch": 0.07, + "grad_norm": 0.384765625, + "learning_rate": 0.00014300057372346528, + "loss": 0.9218, + "step": 4985 + }, + { + "epoch": 0.07, + "grad_norm": 0.451171875, + "learning_rate": 0.00014314400458978775, + "loss": 0.8918, + "step": 4990 + }, + { + "epoch": 0.07, + "grad_norm": 0.443359375, + "learning_rate": 0.00014328743545611016, + "loss": 0.9241, + "step": 4995 + }, + { + "epoch": 0.07, + "grad_norm": 0.5390625, + "learning_rate": 0.0001434308663224326, + "loss": 1.0046, + "step": 5000 + }, + { + "epoch": 0.07, + "grad_norm": 0.423828125, + "learning_rate": 0.00014357429718875504, + "loss": 0.9486, + "step": 5005 + }, + { + "epoch": 0.07, + "grad_norm": 0.4453125, + "learning_rate": 0.00014371772805507745, + "loss": 0.8616, + "step": 5010 + }, + { + "epoch": 0.07, + "grad_norm": 0.451171875, + "learning_rate": 0.0001438611589213999, + "loss": 0.952, + "step": 5015 + }, + { + "epoch": 0.07, + "grad_norm": 0.455078125, + "learning_rate": 0.00014400458978772233, + "loss": 0.867, + "step": 5020 + }, + { + "epoch": 0.07, + "grad_norm": 0.462890625, + "learning_rate": 0.00014414802065404475, + "loss": 1.0847, + "step": 5025 + }, + { + "epoch": 0.07, + "grad_norm": 0.435546875, + "learning_rate": 0.0001442914515203672, + "loss": 0.8939, + "step": 5030 + }, + { + "epoch": 0.07, + "grad_norm": 0.453125, + "learning_rate": 0.00014443488238668963, + "loss": 0.9756, + "step": 5035 + }, + { + "epoch": 0.07, + "grad_norm": 0.47265625, + "learning_rate": 0.00014457831325301204, + "loss": 1.0356, + "step": 5040 + }, + { + "epoch": 0.07, + "grad_norm": 0.458984375, + "learning_rate": 0.00014472174411933448, + "loss": 0.8602, + "step": 5045 + }, + { + "epoch": 0.07, + "grad_norm": 0.333984375, + "learning_rate": 0.00014486517498565692, + "loss": 0.8666, + "step": 5050 + }, + { + "epoch": 0.07, + "grad_norm": 0.447265625, + "learning_rate": 0.00014500860585197936, + "loss": 0.9651, + "step": 5055 + }, + { + "epoch": 0.07, + "grad_norm": 0.447265625, + "learning_rate": 0.00014515203671830177, + "loss": 1.02, + "step": 5060 + }, + { + "epoch": 0.07, + "grad_norm": 0.49609375, + "learning_rate": 0.00014529546758462422, + "loss": 1.0457, + "step": 5065 + }, + { + "epoch": 0.07, + "grad_norm": 0.4296875, + "learning_rate": 0.00014543889845094666, + "loss": 1.1339, + "step": 5070 + }, + { + "epoch": 0.07, + "grad_norm": 0.376953125, + "learning_rate": 0.00014558232931726907, + "loss": 0.8031, + "step": 5075 + }, + { + "epoch": 0.07, + "grad_norm": 0.474609375, + "learning_rate": 0.0001457257601835915, + "loss": 0.9769, + "step": 5080 + }, + { + "epoch": 0.07, + "grad_norm": 0.43359375, + "learning_rate": 0.00014586919104991395, + "loss": 0.8861, + "step": 5085 + }, + { + "epoch": 0.07, + "grad_norm": 0.5703125, + "learning_rate": 0.0001460126219162364, + "loss": 0.9878, + "step": 5090 + }, + { + "epoch": 0.07, + "grad_norm": 0.44921875, + "learning_rate": 0.00014615605278255883, + "loss": 0.904, + "step": 5095 + }, + { + "epoch": 0.07, + "grad_norm": 0.384765625, + "learning_rate": 0.00014629948364888124, + "loss": 0.8937, + "step": 5100 + }, + { + "epoch": 0.07, + "grad_norm": 0.54296875, + "learning_rate": 0.00014644291451520368, + "loss": 1.0381, + "step": 5105 + }, + { + "epoch": 0.07, + "grad_norm": 0.5390625, + "learning_rate": 0.00014658634538152612, + "loss": 1.0496, + "step": 5110 + }, + { + "epoch": 0.07, + "grad_norm": 0.46875, + "learning_rate": 0.00014672977624784854, + "loss": 1.0499, + "step": 5115 + }, + { + "epoch": 0.07, + "grad_norm": 0.4453125, + "learning_rate": 0.00014687320711417098, + "loss": 1.0203, + "step": 5120 + }, + { + "epoch": 0.07, + "grad_norm": 0.48046875, + "learning_rate": 0.00014701663798049342, + "loss": 1.0739, + "step": 5125 + }, + { + "epoch": 0.07, + "grad_norm": 0.46484375, + "learning_rate": 0.00014716006884681583, + "loss": 0.9814, + "step": 5130 + }, + { + "epoch": 0.07, + "grad_norm": 0.462890625, + "learning_rate": 0.00014730349971313827, + "loss": 1.0477, + "step": 5135 + }, + { + "epoch": 0.07, + "grad_norm": 0.50390625, + "learning_rate": 0.0001474469305794607, + "loss": 0.9457, + "step": 5140 + }, + { + "epoch": 0.07, + "grad_norm": 0.470703125, + "learning_rate": 0.00014759036144578312, + "loss": 0.869, + "step": 5145 + }, + { + "epoch": 0.07, + "grad_norm": 0.44921875, + "learning_rate": 0.0001477337923121056, + "loss": 0.9091, + "step": 5150 + }, + { + "epoch": 0.07, + "grad_norm": 0.490234375, + "learning_rate": 0.000147877223178428, + "loss": 0.8955, + "step": 5155 + }, + { + "epoch": 0.07, + "grad_norm": 0.40625, + "learning_rate": 0.00014802065404475042, + "loss": 0.9003, + "step": 5160 + }, + { + "epoch": 0.07, + "grad_norm": 0.453125, + "learning_rate": 0.00014816408491107288, + "loss": 0.8217, + "step": 5165 + }, + { + "epoch": 0.07, + "grad_norm": 0.388671875, + "learning_rate": 0.0001483075157773953, + "loss": 0.9767, + "step": 5170 + }, + { + "epoch": 0.07, + "grad_norm": 0.46484375, + "learning_rate": 0.00014845094664371774, + "loss": 1.0299, + "step": 5175 + }, + { + "epoch": 0.07, + "grad_norm": 0.474609375, + "learning_rate": 0.00014859437751004018, + "loss": 1.0436, + "step": 5180 + }, + { + "epoch": 0.07, + "grad_norm": 0.45703125, + "learning_rate": 0.0001487378083763626, + "loss": 0.9742, + "step": 5185 + }, + { + "epoch": 0.07, + "grad_norm": 0.470703125, + "learning_rate": 0.00014888123924268503, + "loss": 0.9181, + "step": 5190 + }, + { + "epoch": 0.07, + "grad_norm": 0.54296875, + "learning_rate": 0.00014902467010900747, + "loss": 0.9368, + "step": 5195 + }, + { + "epoch": 0.07, + "grad_norm": 0.453125, + "learning_rate": 0.00014916810097532988, + "loss": 0.9426, + "step": 5200 + }, + { + "epoch": 0.07, + "grad_norm": 0.50390625, + "learning_rate": 0.00014931153184165235, + "loss": 1.0936, + "step": 5205 + }, + { + "epoch": 0.07, + "grad_norm": 0.6484375, + "learning_rate": 0.00014945496270797476, + "loss": 1.0672, + "step": 5210 + }, + { + "epoch": 0.07, + "grad_norm": 0.412109375, + "learning_rate": 0.0001495983935742972, + "loss": 0.9419, + "step": 5215 + }, + { + "epoch": 0.07, + "grad_norm": 0.5859375, + "learning_rate": 0.00014974182444061964, + "loss": 0.9147, + "step": 5220 + }, + { + "epoch": 0.07, + "grad_norm": 0.59375, + "learning_rate": 0.00014988525530694206, + "loss": 1.168, + "step": 5225 + }, + { + "epoch": 0.08, + "grad_norm": 0.46875, + "learning_rate": 0.0001500286861732645, + "loss": 1.0776, + "step": 5230 + }, + { + "epoch": 0.08, + "grad_norm": 0.451171875, + "learning_rate": 0.00015017211703958694, + "loss": 0.8433, + "step": 5235 + }, + { + "epoch": 0.08, + "grad_norm": 0.478515625, + "learning_rate": 0.00015031554790590935, + "loss": 0.959, + "step": 5240 + }, + { + "epoch": 0.08, + "grad_norm": 0.51171875, + "learning_rate": 0.0001504589787722318, + "loss": 0.9664, + "step": 5245 + }, + { + "epoch": 0.08, + "grad_norm": 0.474609375, + "learning_rate": 0.00015060240963855423, + "loss": 0.9351, + "step": 5250 + }, + { + "epoch": 0.08, + "grad_norm": 0.435546875, + "learning_rate": 0.00015074584050487664, + "loss": 1.0872, + "step": 5255 + }, + { + "epoch": 0.08, + "grad_norm": 0.478515625, + "learning_rate": 0.00015088927137119908, + "loss": 1.0728, + "step": 5260 + }, + { + "epoch": 0.08, + "grad_norm": 0.431640625, + "learning_rate": 0.00015103270223752152, + "loss": 1.07, + "step": 5265 + }, + { + "epoch": 0.08, + "grad_norm": 0.5078125, + "learning_rate": 0.00015117613310384396, + "loss": 1.003, + "step": 5270 + }, + { + "epoch": 0.08, + "grad_norm": 0.421875, + "learning_rate": 0.00015131956397016638, + "loss": 0.9776, + "step": 5275 + }, + { + "epoch": 0.08, + "grad_norm": 0.4765625, + "learning_rate": 0.00015146299483648882, + "loss": 0.908, + "step": 5280 + }, + { + "epoch": 0.08, + "grad_norm": 0.396484375, + "learning_rate": 0.00015160642570281126, + "loss": 0.8376, + "step": 5285 + }, + { + "epoch": 0.08, + "grad_norm": 0.4453125, + "learning_rate": 0.00015174985656913367, + "loss": 0.9636, + "step": 5290 + }, + { + "epoch": 0.08, + "grad_norm": 0.5078125, + "learning_rate": 0.0001518932874354561, + "loss": 0.9067, + "step": 5295 + }, + { + "epoch": 0.08, + "grad_norm": 0.4765625, + "learning_rate": 0.00015203671830177855, + "loss": 1.0184, + "step": 5300 + }, + { + "epoch": 0.08, + "grad_norm": 0.47265625, + "learning_rate": 0.00015218014916810096, + "loss": 0.9313, + "step": 5305 + }, + { + "epoch": 0.08, + "grad_norm": 0.546875, + "learning_rate": 0.00015232358003442343, + "loss": 1.1231, + "step": 5310 + }, + { + "epoch": 0.08, + "grad_norm": 0.390625, + "learning_rate": 0.00015246701090074584, + "loss": 1.0447, + "step": 5315 + }, + { + "epoch": 0.08, + "grad_norm": 0.4296875, + "learning_rate": 0.00015261044176706828, + "loss": 1.0549, + "step": 5320 + }, + { + "epoch": 0.08, + "grad_norm": 0.48046875, + "learning_rate": 0.00015275387263339072, + "loss": 0.9707, + "step": 5325 + }, + { + "epoch": 0.08, + "grad_norm": 0.451171875, + "learning_rate": 0.00015289730349971314, + "loss": 0.9855, + "step": 5330 + }, + { + "epoch": 0.08, + "grad_norm": 0.462890625, + "learning_rate": 0.00015304073436603558, + "loss": 1.1058, + "step": 5335 + }, + { + "epoch": 0.08, + "grad_norm": 0.462890625, + "learning_rate": 0.00015318416523235802, + "loss": 0.8579, + "step": 5340 + }, + { + "epoch": 0.08, + "grad_norm": 0.46484375, + "learning_rate": 0.00015332759609868043, + "loss": 1.1284, + "step": 5345 + }, + { + "epoch": 0.08, + "grad_norm": 0.5546875, + "learning_rate": 0.00015347102696500287, + "loss": 1.0177, + "step": 5350 + }, + { + "epoch": 0.08, + "grad_norm": 0.52734375, + "learning_rate": 0.0001536144578313253, + "loss": 0.9577, + "step": 5355 + }, + { + "epoch": 0.08, + "grad_norm": 0.5, + "learning_rate": 0.00015375788869764772, + "loss": 1.0579, + "step": 5360 + }, + { + "epoch": 0.08, + "grad_norm": 0.4296875, + "learning_rate": 0.0001539013195639702, + "loss": 0.9464, + "step": 5365 + }, + { + "epoch": 0.08, + "grad_norm": 0.447265625, + "learning_rate": 0.0001540447504302926, + "loss": 0.9591, + "step": 5370 + }, + { + "epoch": 0.08, + "grad_norm": 0.490234375, + "learning_rate": 0.00015418818129661502, + "loss": 0.9479, + "step": 5375 + }, + { + "epoch": 0.08, + "grad_norm": 0.46484375, + "learning_rate": 0.00015433161216293748, + "loss": 0.9686, + "step": 5380 + }, + { + "epoch": 0.08, + "grad_norm": 0.68359375, + "learning_rate": 0.0001544750430292599, + "loss": 0.9715, + "step": 5385 + }, + { + "epoch": 0.08, + "grad_norm": 0.3984375, + "learning_rate": 0.00015461847389558234, + "loss": 0.9483, + "step": 5390 + }, + { + "epoch": 0.08, + "grad_norm": 0.46484375, + "learning_rate": 0.00015476190476190478, + "loss": 0.8867, + "step": 5395 + }, + { + "epoch": 0.08, + "grad_norm": 0.49609375, + "learning_rate": 0.0001549053356282272, + "loss": 0.9678, + "step": 5400 + }, + { + "epoch": 0.08, + "grad_norm": 0.47265625, + "learning_rate": 0.00015504876649454963, + "loss": 1.0233, + "step": 5405 + }, + { + "epoch": 0.08, + "grad_norm": 0.51171875, + "learning_rate": 0.00015519219736087207, + "loss": 1.0728, + "step": 5410 + }, + { + "epoch": 0.08, + "grad_norm": 0.439453125, + "learning_rate": 0.00015533562822719448, + "loss": 0.9649, + "step": 5415 + }, + { + "epoch": 0.08, + "grad_norm": 0.44921875, + "learning_rate": 0.00015547905909351695, + "loss": 1.0888, + "step": 5420 + }, + { + "epoch": 0.08, + "grad_norm": 0.47265625, + "learning_rate": 0.00015562248995983936, + "loss": 0.9936, + "step": 5425 + }, + { + "epoch": 0.08, + "grad_norm": 0.486328125, + "learning_rate": 0.0001557659208261618, + "loss": 0.9588, + "step": 5430 + }, + { + "epoch": 0.08, + "grad_norm": 0.439453125, + "learning_rate": 0.00015590935169248425, + "loss": 0.9805, + "step": 5435 + }, + { + "epoch": 0.08, + "grad_norm": 0.458984375, + "learning_rate": 0.00015605278255880666, + "loss": 1.0008, + "step": 5440 + }, + { + "epoch": 0.08, + "grad_norm": 0.439453125, + "learning_rate": 0.0001561962134251291, + "loss": 1.0179, + "step": 5445 + }, + { + "epoch": 0.08, + "grad_norm": 0.466796875, + "learning_rate": 0.00015633964429145154, + "loss": 0.8855, + "step": 5450 + }, + { + "epoch": 0.08, + "grad_norm": 0.474609375, + "learning_rate": 0.00015648307515777395, + "loss": 0.958, + "step": 5455 + }, + { + "epoch": 0.08, + "grad_norm": 0.451171875, + "learning_rate": 0.0001566265060240964, + "loss": 0.9418, + "step": 5460 + }, + { + "epoch": 0.08, + "grad_norm": 0.5, + "learning_rate": 0.00015676993689041883, + "loss": 0.9573, + "step": 5465 + }, + { + "epoch": 0.08, + "grad_norm": 0.431640625, + "learning_rate": 0.00015691336775674127, + "loss": 1.1149, + "step": 5470 + }, + { + "epoch": 0.08, + "grad_norm": 0.42578125, + "learning_rate": 0.00015705679862306369, + "loss": 0.995, + "step": 5475 + }, + { + "epoch": 0.08, + "grad_norm": 0.5234375, + "learning_rate": 0.00015720022948938613, + "loss": 0.9856, + "step": 5480 + }, + { + "epoch": 0.08, + "grad_norm": 0.474609375, + "learning_rate": 0.00015734366035570857, + "loss": 0.9135, + "step": 5485 + }, + { + "epoch": 0.08, + "grad_norm": 0.36328125, + "learning_rate": 0.00015748709122203098, + "loss": 0.8655, + "step": 5490 + }, + { + "epoch": 0.08, + "grad_norm": 0.408203125, + "learning_rate": 0.00015763052208835342, + "loss": 1.0002, + "step": 5495 + }, + { + "epoch": 0.08, + "grad_norm": 0.404296875, + "learning_rate": 0.00015777395295467586, + "loss": 0.9998, + "step": 5500 + }, + { + "epoch": 0.08, + "grad_norm": 0.51171875, + "learning_rate": 0.00015791738382099827, + "loss": 0.9179, + "step": 5505 + }, + { + "epoch": 0.08, + "grad_norm": 0.41015625, + "learning_rate": 0.0001580608146873207, + "loss": 0.7862, + "step": 5510 + }, + { + "epoch": 0.08, + "grad_norm": 0.4453125, + "learning_rate": 0.00015820424555364315, + "loss": 1.0359, + "step": 5515 + }, + { + "epoch": 0.08, + "grad_norm": 0.466796875, + "learning_rate": 0.00015834767641996557, + "loss": 0.9956, + "step": 5520 + }, + { + "epoch": 0.08, + "grad_norm": 0.54296875, + "learning_rate": 0.00015849110728628803, + "loss": 0.8596, + "step": 5525 + }, + { + "epoch": 0.08, + "grad_norm": 0.48046875, + "learning_rate": 0.00015863453815261045, + "loss": 1.1384, + "step": 5530 + }, + { + "epoch": 0.08, + "grad_norm": 0.412109375, + "learning_rate": 0.00015877796901893289, + "loss": 1.0153, + "step": 5535 + }, + { + "epoch": 0.08, + "grad_norm": 0.45703125, + "learning_rate": 0.00015892139988525533, + "loss": 1.0316, + "step": 5540 + }, + { + "epoch": 0.08, + "grad_norm": 0.443359375, + "learning_rate": 0.00015906483075157774, + "loss": 1.0093, + "step": 5545 + }, + { + "epoch": 0.08, + "grad_norm": 0.4609375, + "learning_rate": 0.00015920826161790018, + "loss": 1.047, + "step": 5550 + }, + { + "epoch": 0.08, + "grad_norm": 0.515625, + "learning_rate": 0.00015935169248422262, + "loss": 0.9798, + "step": 5555 + }, + { + "epoch": 0.08, + "grad_norm": 0.453125, + "learning_rate": 0.00015949512335054503, + "loss": 0.9495, + "step": 5560 + }, + { + "epoch": 0.08, + "grad_norm": 0.44140625, + "learning_rate": 0.0001596385542168675, + "loss": 0.994, + "step": 5565 + }, + { + "epoch": 0.08, + "grad_norm": 0.474609375, + "learning_rate": 0.0001597819850831899, + "loss": 0.97, + "step": 5570 + }, + { + "epoch": 0.08, + "grad_norm": 0.478515625, + "learning_rate": 0.00015992541594951233, + "loss": 0.9392, + "step": 5575 + }, + { + "epoch": 0.08, + "grad_norm": 0.43359375, + "learning_rate": 0.0001600688468158348, + "loss": 0.9793, + "step": 5580 + }, + { + "epoch": 0.08, + "grad_norm": 0.453125, + "learning_rate": 0.0001602122776821572, + "loss": 1.0855, + "step": 5585 + }, + { + "epoch": 0.08, + "grad_norm": 0.46875, + "learning_rate": 0.00016035570854847965, + "loss": 1.0543, + "step": 5590 + }, + { + "epoch": 0.08, + "grad_norm": 0.458984375, + "learning_rate": 0.00016049913941480209, + "loss": 1.0799, + "step": 5595 + }, + { + "epoch": 0.08, + "grad_norm": 0.47265625, + "learning_rate": 0.0001606425702811245, + "loss": 0.9207, + "step": 5600 + }, + { + "epoch": 0.08, + "grad_norm": 0.423828125, + "learning_rate": 0.00016078600114744694, + "loss": 0.9722, + "step": 5605 + }, + { + "epoch": 0.08, + "grad_norm": 0.4765625, + "learning_rate": 0.00016092943201376938, + "loss": 1.0413, + "step": 5610 + }, + { + "epoch": 0.08, + "grad_norm": 0.50390625, + "learning_rate": 0.0001610728628800918, + "loss": 0.9253, + "step": 5615 + }, + { + "epoch": 0.08, + "grad_norm": 0.6796875, + "learning_rate": 0.00016121629374641423, + "loss": 1.0691, + "step": 5620 + }, + { + "epoch": 0.08, + "grad_norm": 0.46484375, + "learning_rate": 0.00016135972461273667, + "loss": 0.9382, + "step": 5625 + }, + { + "epoch": 0.08, + "grad_norm": 0.494140625, + "learning_rate": 0.00016150315547905909, + "loss": 0.9178, + "step": 5630 + }, + { + "epoch": 0.08, + "grad_norm": 0.515625, + "learning_rate": 0.00016164658634538153, + "loss": 1.0859, + "step": 5635 + }, + { + "epoch": 0.08, + "grad_norm": 0.53125, + "learning_rate": 0.00016179001721170397, + "loss": 0.9187, + "step": 5640 + }, + { + "epoch": 0.08, + "grad_norm": 0.470703125, + "learning_rate": 0.0001619334480780264, + "loss": 0.9841, + "step": 5645 + }, + { + "epoch": 0.08, + "grad_norm": 0.419921875, + "learning_rate": 0.00016207687894434885, + "loss": 0.9436, + "step": 5650 + }, + { + "epoch": 0.08, + "grad_norm": 0.40234375, + "learning_rate": 0.00016222030981067126, + "loss": 0.9063, + "step": 5655 + }, + { + "epoch": 0.08, + "grad_norm": 0.546875, + "learning_rate": 0.0001623637406769937, + "loss": 1.0031, + "step": 5660 + }, + { + "epoch": 0.08, + "grad_norm": 0.5390625, + "learning_rate": 0.00016250717154331614, + "loss": 0.968, + "step": 5665 + }, + { + "epoch": 0.08, + "grad_norm": 0.482421875, + "learning_rate": 0.00016265060240963855, + "loss": 1.1923, + "step": 5670 + }, + { + "epoch": 0.08, + "grad_norm": 0.447265625, + "learning_rate": 0.000162794033275961, + "loss": 0.85, + "step": 5675 + }, + { + "epoch": 0.08, + "grad_norm": 0.4609375, + "learning_rate": 0.00016293746414228343, + "loss": 1.0102, + "step": 5680 + }, + { + "epoch": 0.08, + "grad_norm": 0.41796875, + "learning_rate": 0.00016308089500860587, + "loss": 1.0054, + "step": 5685 + }, + { + "epoch": 0.08, + "grad_norm": 0.486328125, + "learning_rate": 0.0001632243258749283, + "loss": 0.8971, + "step": 5690 + }, + { + "epoch": 0.08, + "grad_norm": 0.48046875, + "learning_rate": 0.00016336775674125073, + "loss": 1.0443, + "step": 5695 + }, + { + "epoch": 0.08, + "grad_norm": 0.4375, + "learning_rate": 0.00016351118760757317, + "loss": 1.0344, + "step": 5700 + }, + { + "epoch": 0.08, + "grad_norm": 0.46875, + "learning_rate": 0.00016365461847389558, + "loss": 0.9438, + "step": 5705 + }, + { + "epoch": 0.08, + "grad_norm": 0.453125, + "learning_rate": 0.00016379804934021802, + "loss": 1.0228, + "step": 5710 + }, + { + "epoch": 0.08, + "grad_norm": 0.4296875, + "learning_rate": 0.00016394148020654046, + "loss": 0.9256, + "step": 5715 + }, + { + "epoch": 0.08, + "grad_norm": 0.4375, + "learning_rate": 0.00016408491107286287, + "loss": 0.9056, + "step": 5720 + }, + { + "epoch": 0.08, + "grad_norm": 0.474609375, + "learning_rate": 0.00016422834193918531, + "loss": 1.0632, + "step": 5725 + }, + { + "epoch": 0.08, + "grad_norm": 0.5078125, + "learning_rate": 0.00016437177280550775, + "loss": 0.8901, + "step": 5730 + }, + { + "epoch": 0.08, + "grad_norm": 0.46875, + "learning_rate": 0.00016451520367183017, + "loss": 0.9466, + "step": 5735 + }, + { + "epoch": 0.08, + "grad_norm": 0.48046875, + "learning_rate": 0.00016465863453815263, + "loss": 1.1376, + "step": 5740 + }, + { + "epoch": 0.08, + "grad_norm": 0.375, + "learning_rate": 0.00016480206540447505, + "loss": 0.8577, + "step": 5745 + }, + { + "epoch": 0.08, + "grad_norm": 0.39453125, + "learning_rate": 0.00016494549627079746, + "loss": 0.9745, + "step": 5750 + }, + { + "epoch": 0.08, + "grad_norm": 0.494140625, + "learning_rate": 0.00016508892713711993, + "loss": 1.0465, + "step": 5755 + }, + { + "epoch": 0.08, + "grad_norm": 0.5078125, + "learning_rate": 0.00016523235800344234, + "loss": 1.1213, + "step": 5760 + }, + { + "epoch": 0.08, + "grad_norm": 0.478515625, + "learning_rate": 0.00016537578886976478, + "loss": 0.9917, + "step": 5765 + }, + { + "epoch": 0.08, + "grad_norm": 0.4921875, + "learning_rate": 0.00016551921973608722, + "loss": 0.8194, + "step": 5770 + }, + { + "epoch": 0.08, + "grad_norm": 0.486328125, + "learning_rate": 0.00016566265060240963, + "loss": 0.9969, + "step": 5775 + }, + { + "epoch": 0.08, + "grad_norm": 0.5078125, + "learning_rate": 0.0001658060814687321, + "loss": 0.8893, + "step": 5780 + }, + { + "epoch": 0.08, + "grad_norm": 0.5234375, + "learning_rate": 0.00016594951233505451, + "loss": 0.9696, + "step": 5785 + }, + { + "epoch": 0.08, + "grad_norm": 0.474609375, + "learning_rate": 0.00016609294320137693, + "loss": 1.1031, + "step": 5790 + }, + { + "epoch": 0.08, + "grad_norm": 0.5078125, + "learning_rate": 0.0001662363740676994, + "loss": 1.0955, + "step": 5795 + }, + { + "epoch": 0.08, + "grad_norm": 0.451171875, + "learning_rate": 0.0001663798049340218, + "loss": 1.0878, + "step": 5800 + }, + { + "epoch": 0.08, + "grad_norm": 0.46484375, + "learning_rate": 0.00016652323580034425, + "loss": 0.9839, + "step": 5805 + }, + { + "epoch": 0.08, + "grad_norm": 0.474609375, + "learning_rate": 0.0001666666666666667, + "loss": 1.1202, + "step": 5810 + }, + { + "epoch": 0.08, + "grad_norm": 0.51953125, + "learning_rate": 0.0001668100975329891, + "loss": 0.9917, + "step": 5815 + }, + { + "epoch": 0.08, + "grad_norm": 0.421875, + "learning_rate": 0.00016695352839931154, + "loss": 0.9115, + "step": 5820 + }, + { + "epoch": 0.08, + "grad_norm": 0.451171875, + "learning_rate": 0.00016709695926563398, + "loss": 0.9164, + "step": 5825 + }, + { + "epoch": 0.08, + "grad_norm": 0.43359375, + "learning_rate": 0.0001672403901319564, + "loss": 0.995, + "step": 5830 + }, + { + "epoch": 0.08, + "grad_norm": 0.5390625, + "learning_rate": 0.00016738382099827883, + "loss": 1.091, + "step": 5835 + }, + { + "epoch": 0.08, + "grad_norm": 0.48046875, + "learning_rate": 0.00016752725186460127, + "loss": 0.9019, + "step": 5840 + }, + { + "epoch": 0.08, + "grad_norm": 0.515625, + "learning_rate": 0.0001676706827309237, + "loss": 0.8586, + "step": 5845 + }, + { + "epoch": 0.08, + "grad_norm": 0.474609375, + "learning_rate": 0.00016781411359724613, + "loss": 0.9595, + "step": 5850 + }, + { + "epoch": 0.08, + "grad_norm": 0.435546875, + "learning_rate": 0.00016795754446356857, + "loss": 1.1204, + "step": 5855 + }, + { + "epoch": 0.08, + "grad_norm": 0.458984375, + "learning_rate": 0.000168100975329891, + "loss": 0.8435, + "step": 5860 + }, + { + "epoch": 0.08, + "grad_norm": 0.4765625, + "learning_rate": 0.00016824440619621342, + "loss": 0.9602, + "step": 5865 + }, + { + "epoch": 0.08, + "grad_norm": 0.5, + "learning_rate": 0.00016838783706253586, + "loss": 0.9821, + "step": 5870 + }, + { + "epoch": 0.08, + "grad_norm": 0.478515625, + "learning_rate": 0.0001685312679288583, + "loss": 0.864, + "step": 5875 + }, + { + "epoch": 0.08, + "grad_norm": 0.45703125, + "learning_rate": 0.00016867469879518074, + "loss": 0.979, + "step": 5880 + }, + { + "epoch": 0.08, + "grad_norm": 0.4609375, + "learning_rate": 0.00016881812966150316, + "loss": 0.9652, + "step": 5885 + }, + { + "epoch": 0.08, + "grad_norm": 0.458984375, + "learning_rate": 0.0001689615605278256, + "loss": 0.9546, + "step": 5890 + }, + { + "epoch": 0.08, + "grad_norm": 0.474609375, + "learning_rate": 0.00016910499139414804, + "loss": 0.9793, + "step": 5895 + }, + { + "epoch": 0.08, + "grad_norm": 0.4609375, + "learning_rate": 0.00016924842226047048, + "loss": 0.9838, + "step": 5900 + }, + { + "epoch": 0.08, + "grad_norm": 0.421875, + "learning_rate": 0.0001693918531267929, + "loss": 0.9815, + "step": 5905 + }, + { + "epoch": 0.08, + "grad_norm": 0.4765625, + "learning_rate": 0.00016953528399311533, + "loss": 0.9909, + "step": 5910 + }, + { + "epoch": 0.08, + "grad_norm": 0.5546875, + "learning_rate": 0.00016967871485943777, + "loss": 1.0473, + "step": 5915 + }, + { + "epoch": 0.08, + "grad_norm": 0.486328125, + "learning_rate": 0.00016982214572576018, + "loss": 1.0577, + "step": 5920 + }, + { + "epoch": 0.08, + "grad_norm": 0.515625, + "learning_rate": 0.00016996557659208262, + "loss": 1.0259, + "step": 5925 + }, + { + "epoch": 0.09, + "grad_norm": 0.51953125, + "learning_rate": 0.00017010900745840506, + "loss": 0.9825, + "step": 5930 + }, + { + "epoch": 0.09, + "grad_norm": 0.5, + "learning_rate": 0.00017025243832472748, + "loss": 1.172, + "step": 5935 + }, + { + "epoch": 0.09, + "grad_norm": 0.46484375, + "learning_rate": 0.00017039586919104992, + "loss": 0.8524, + "step": 5940 + }, + { + "epoch": 0.09, + "grad_norm": 0.490234375, + "learning_rate": 0.00017053930005737236, + "loss": 0.8916, + "step": 5945 + }, + { + "epoch": 0.09, + "grad_norm": 0.51953125, + "learning_rate": 0.00017068273092369477, + "loss": 0.9652, + "step": 5950 + }, + { + "epoch": 0.09, + "grad_norm": 0.490234375, + "learning_rate": 0.00017082616179001724, + "loss": 0.9527, + "step": 5955 + }, + { + "epoch": 0.09, + "grad_norm": 0.50390625, + "learning_rate": 0.00017096959265633965, + "loss": 0.8899, + "step": 5960 + }, + { + "epoch": 0.09, + "grad_norm": 0.484375, + "learning_rate": 0.00017111302352266206, + "loss": 0.9364, + "step": 5965 + }, + { + "epoch": 0.09, + "grad_norm": 0.490234375, + "learning_rate": 0.00017125645438898453, + "loss": 0.9621, + "step": 5970 + }, + { + "epoch": 0.09, + "grad_norm": 0.45703125, + "learning_rate": 0.00017139988525530694, + "loss": 0.9841, + "step": 5975 + }, + { + "epoch": 0.09, + "grad_norm": 0.451171875, + "learning_rate": 0.00017154331612162938, + "loss": 1.0999, + "step": 5980 + }, + { + "epoch": 0.09, + "grad_norm": 0.447265625, + "learning_rate": 0.00017168674698795182, + "loss": 0.883, + "step": 5985 + }, + { + "epoch": 0.09, + "grad_norm": 0.515625, + "learning_rate": 0.00017183017785427424, + "loss": 1.0271, + "step": 5990 + }, + { + "epoch": 0.09, + "grad_norm": 0.48828125, + "learning_rate": 0.0001719736087205967, + "loss": 1.0657, + "step": 5995 + }, + { + "epoch": 0.09, + "grad_norm": 0.494140625, + "learning_rate": 0.00017211703958691912, + "loss": 1.0207, + "step": 6000 + }, + { + "epoch": 0.09, + "grad_norm": 0.47265625, + "learning_rate": 0.00017226047045324153, + "loss": 1.2019, + "step": 6005 + }, + { + "epoch": 0.09, + "grad_norm": 0.51171875, + "learning_rate": 0.000172403901319564, + "loss": 1.1323, + "step": 6010 + }, + { + "epoch": 0.09, + "grad_norm": 0.478515625, + "learning_rate": 0.0001725473321858864, + "loss": 0.8854, + "step": 6015 + }, + { + "epoch": 0.09, + "grad_norm": 0.4609375, + "learning_rate": 0.00017269076305220885, + "loss": 1.0004, + "step": 6020 + }, + { + "epoch": 0.09, + "grad_norm": 0.486328125, + "learning_rate": 0.0001728341939185313, + "loss": 0.9655, + "step": 6025 + }, + { + "epoch": 0.09, + "grad_norm": 0.515625, + "learning_rate": 0.0001729776247848537, + "loss": 0.9061, + "step": 6030 + }, + { + "epoch": 0.09, + "grad_norm": 0.453125, + "learning_rate": 0.00017312105565117614, + "loss": 0.9962, + "step": 6035 + }, + { + "epoch": 0.09, + "grad_norm": 0.53515625, + "learning_rate": 0.00017326448651749858, + "loss": 0.9991, + "step": 6040 + }, + { + "epoch": 0.09, + "grad_norm": 0.482421875, + "learning_rate": 0.000173407917383821, + "loss": 1.0361, + "step": 6045 + }, + { + "epoch": 0.09, + "grad_norm": 0.5, + "learning_rate": 0.00017355134825014344, + "loss": 0.9285, + "step": 6050 + }, + { + "epoch": 0.09, + "grad_norm": 0.4609375, + "learning_rate": 0.00017369477911646588, + "loss": 1.048, + "step": 6055 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.0001738382099827883, + "loss": 0.8781, + "step": 6060 + }, + { + "epoch": 0.09, + "grad_norm": 0.46875, + "learning_rate": 0.00017398164084911073, + "loss": 1.0609, + "step": 6065 + }, + { + "epoch": 0.09, + "grad_norm": 0.50390625, + "learning_rate": 0.00017412507171543317, + "loss": 0.8504, + "step": 6070 + }, + { + "epoch": 0.09, + "grad_norm": 0.53125, + "learning_rate": 0.0001742685025817556, + "loss": 1.1307, + "step": 6075 + }, + { + "epoch": 0.09, + "grad_norm": 0.53125, + "learning_rate": 0.00017441193344807802, + "loss": 1.0649, + "step": 6080 + }, + { + "epoch": 0.09, + "grad_norm": 0.494140625, + "learning_rate": 0.00017455536431440046, + "loss": 0.8316, + "step": 6085 + }, + { + "epoch": 0.09, + "grad_norm": 0.46875, + "learning_rate": 0.0001746987951807229, + "loss": 0.9775, + "step": 6090 + }, + { + "epoch": 0.09, + "grad_norm": 0.484375, + "learning_rate": 0.00017484222604704534, + "loss": 1.0049, + "step": 6095 + }, + { + "epoch": 0.09, + "grad_norm": 0.462890625, + "learning_rate": 0.00017498565691336776, + "loss": 0.965, + "step": 6100 + }, + { + "epoch": 0.09, + "grad_norm": 0.45703125, + "learning_rate": 0.0001751290877796902, + "loss": 1.0832, + "step": 6105 + }, + { + "epoch": 0.09, + "grad_norm": 0.3984375, + "learning_rate": 0.00017527251864601264, + "loss": 0.8216, + "step": 6110 + }, + { + "epoch": 0.09, + "grad_norm": 0.48046875, + "learning_rate": 0.00017541594951233508, + "loss": 0.9725, + "step": 6115 + }, + { + "epoch": 0.09, + "grad_norm": 0.390625, + "learning_rate": 0.0001755593803786575, + "loss": 0.8486, + "step": 6120 + }, + { + "epoch": 0.09, + "grad_norm": 0.470703125, + "learning_rate": 0.00017570281124497993, + "loss": 1.067, + "step": 6125 + }, + { + "epoch": 0.09, + "grad_norm": 0.51953125, + "learning_rate": 0.00017584624211130237, + "loss": 0.9053, + "step": 6130 + }, + { + "epoch": 0.09, + "grad_norm": 0.423828125, + "learning_rate": 0.00017598967297762478, + "loss": 0.8203, + "step": 6135 + }, + { + "epoch": 0.09, + "grad_norm": 0.48046875, + "learning_rate": 0.00017613310384394722, + "loss": 1.0192, + "step": 6140 + }, + { + "epoch": 0.09, + "grad_norm": 0.53125, + "learning_rate": 0.00017627653471026966, + "loss": 0.9056, + "step": 6145 + }, + { + "epoch": 0.09, + "grad_norm": 0.42578125, + "learning_rate": 0.00017641996557659208, + "loss": 1.0876, + "step": 6150 + }, + { + "epoch": 0.09, + "grad_norm": 0.390625, + "learning_rate": 0.00017656339644291452, + "loss": 1.0021, + "step": 6155 + }, + { + "epoch": 0.09, + "grad_norm": 0.4765625, + "learning_rate": 0.00017670682730923696, + "loss": 1.0436, + "step": 6160 + }, + { + "epoch": 0.09, + "grad_norm": 0.451171875, + "learning_rate": 0.00017685025817555937, + "loss": 0.874, + "step": 6165 + }, + { + "epoch": 0.09, + "grad_norm": 0.4765625, + "learning_rate": 0.00017699368904188184, + "loss": 0.9586, + "step": 6170 + }, + { + "epoch": 0.09, + "grad_norm": 0.466796875, + "learning_rate": 0.00017713711990820425, + "loss": 1.0107, + "step": 6175 + }, + { + "epoch": 0.09, + "grad_norm": 0.515625, + "learning_rate": 0.00017728055077452666, + "loss": 0.9436, + "step": 6180 + }, + { + "epoch": 0.09, + "grad_norm": 0.53125, + "learning_rate": 0.00017742398164084913, + "loss": 0.9741, + "step": 6185 + }, + { + "epoch": 0.09, + "grad_norm": 0.46484375, + "learning_rate": 0.00017756741250717154, + "loss": 0.9436, + "step": 6190 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.00017771084337349398, + "loss": 0.9143, + "step": 6195 + }, + { + "epoch": 0.09, + "grad_norm": 0.546875, + "learning_rate": 0.00017785427423981642, + "loss": 1.0719, + "step": 6200 + }, + { + "epoch": 0.09, + "grad_norm": 0.39453125, + "learning_rate": 0.00017799770510613884, + "loss": 0.795, + "step": 6205 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.0001781411359724613, + "loss": 0.8589, + "step": 6210 + }, + { + "epoch": 0.09, + "grad_norm": 0.54296875, + "learning_rate": 0.00017828456683878372, + "loss": 1.1208, + "step": 6215 + }, + { + "epoch": 0.09, + "grad_norm": 0.498046875, + "learning_rate": 0.00017842799770510613, + "loss": 1.0982, + "step": 6220 + }, + { + "epoch": 0.09, + "grad_norm": 0.443359375, + "learning_rate": 0.0001785714285714286, + "loss": 0.9124, + "step": 6225 + }, + { + "epoch": 0.09, + "grad_norm": 0.486328125, + "learning_rate": 0.000178714859437751, + "loss": 1.1237, + "step": 6230 + }, + { + "epoch": 0.09, + "grad_norm": 0.458984375, + "learning_rate": 0.00017885829030407345, + "loss": 0.8831, + "step": 6235 + }, + { + "epoch": 0.09, + "grad_norm": 0.5, + "learning_rate": 0.0001790017211703959, + "loss": 0.9529, + "step": 6240 + }, + { + "epoch": 0.09, + "grad_norm": 0.46875, + "learning_rate": 0.0001791451520367183, + "loss": 1.0183, + "step": 6245 + }, + { + "epoch": 0.09, + "grad_norm": 0.47265625, + "learning_rate": 0.00017928858290304074, + "loss": 1.0009, + "step": 6250 + }, + { + "epoch": 0.09, + "grad_norm": 0.451171875, + "learning_rate": 0.00017943201376936319, + "loss": 0.79, + "step": 6255 + }, + { + "epoch": 0.09, + "grad_norm": 0.515625, + "learning_rate": 0.0001795754446356856, + "loss": 0.9302, + "step": 6260 + }, + { + "epoch": 0.09, + "grad_norm": 0.48828125, + "learning_rate": 0.00017971887550200804, + "loss": 1.0073, + "step": 6265 + }, + { + "epoch": 0.09, + "grad_norm": 0.50390625, + "learning_rate": 0.00017986230636833048, + "loss": 0.9439, + "step": 6270 + }, + { + "epoch": 0.09, + "grad_norm": 0.484375, + "learning_rate": 0.0001800057372346529, + "loss": 0.9953, + "step": 6275 + }, + { + "epoch": 0.09, + "grad_norm": 0.484375, + "learning_rate": 0.00018014916810097533, + "loss": 0.9232, + "step": 6280 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.00018029259896729777, + "loss": 0.9308, + "step": 6285 + }, + { + "epoch": 0.09, + "grad_norm": 0.47265625, + "learning_rate": 0.0001804360298336202, + "loss": 0.9297, + "step": 6290 + }, + { + "epoch": 0.09, + "grad_norm": 0.470703125, + "learning_rate": 0.00018057946069994263, + "loss": 0.9201, + "step": 6295 + }, + { + "epoch": 0.09, + "grad_norm": 0.44921875, + "learning_rate": 0.00018072289156626507, + "loss": 1.0003, + "step": 6300 + }, + { + "epoch": 0.09, + "grad_norm": 0.515625, + "learning_rate": 0.0001808663224325875, + "loss": 1.0814, + "step": 6305 + }, + { + "epoch": 0.09, + "grad_norm": 0.44921875, + "learning_rate": 0.00018100975329890992, + "loss": 1.1829, + "step": 6310 + }, + { + "epoch": 0.09, + "grad_norm": 0.478515625, + "learning_rate": 0.00018115318416523236, + "loss": 0.7843, + "step": 6315 + }, + { + "epoch": 0.09, + "grad_norm": 0.48828125, + "learning_rate": 0.0001812966150315548, + "loss": 1.071, + "step": 6320 + }, + { + "epoch": 0.09, + "grad_norm": 0.4609375, + "learning_rate": 0.00018144004589787724, + "loss": 0.766, + "step": 6325 + }, + { + "epoch": 0.09, + "grad_norm": 0.458984375, + "learning_rate": 0.00018158347676419968, + "loss": 0.9826, + "step": 6330 + }, + { + "epoch": 0.09, + "grad_norm": 0.416015625, + "learning_rate": 0.0001817269076305221, + "loss": 0.9046, + "step": 6335 + }, + { + "epoch": 0.09, + "grad_norm": 0.51171875, + "learning_rate": 0.00018187033849684453, + "loss": 0.9938, + "step": 6340 + }, + { + "epoch": 0.09, + "grad_norm": 0.451171875, + "learning_rate": 0.00018201376936316697, + "loss": 0.9356, + "step": 6345 + }, + { + "epoch": 0.09, + "grad_norm": 0.490234375, + "learning_rate": 0.00018215720022948939, + "loss": 0.9787, + "step": 6350 + }, + { + "epoch": 0.09, + "grad_norm": 0.482421875, + "learning_rate": 0.00018230063109581183, + "loss": 0.9114, + "step": 6355 + }, + { + "epoch": 0.09, + "grad_norm": 0.46875, + "learning_rate": 0.00018244406196213427, + "loss": 1.0716, + "step": 6360 + }, + { + "epoch": 0.09, + "grad_norm": 0.50390625, + "learning_rate": 0.00018258749282845668, + "loss": 0.8913, + "step": 6365 + }, + { + "epoch": 0.09, + "grad_norm": 0.5234375, + "learning_rate": 0.00018273092369477912, + "loss": 1.1467, + "step": 6370 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.00018287435456110156, + "loss": 0.9404, + "step": 6375 + }, + { + "epoch": 0.09, + "grad_norm": 0.44140625, + "learning_rate": 0.00018301778542742397, + "loss": 0.9886, + "step": 6380 + }, + { + "epoch": 0.09, + "grad_norm": 0.54296875, + "learning_rate": 0.00018316121629374644, + "loss": 0.847, + "step": 6385 + }, + { + "epoch": 0.09, + "grad_norm": 0.54296875, + "learning_rate": 0.00018330464716006885, + "loss": 0.8849, + "step": 6390 + }, + { + "epoch": 0.09, + "grad_norm": 0.515625, + "learning_rate": 0.00018344807802639127, + "loss": 1.2015, + "step": 6395 + }, + { + "epoch": 0.09, + "grad_norm": 0.484375, + "learning_rate": 0.00018359150889271373, + "loss": 0.9451, + "step": 6400 + }, + { + "epoch": 0.09, + "grad_norm": 0.490234375, + "learning_rate": 0.00018373493975903615, + "loss": 0.7917, + "step": 6405 + }, + { + "epoch": 0.09, + "grad_norm": 0.4921875, + "learning_rate": 0.00018387837062535859, + "loss": 1.0318, + "step": 6410 + }, + { + "epoch": 0.09, + "grad_norm": 0.43359375, + "learning_rate": 0.00018402180149168103, + "loss": 0.9183, + "step": 6415 + }, + { + "epoch": 0.09, + "grad_norm": 0.478515625, + "learning_rate": 0.00018416523235800344, + "loss": 0.9658, + "step": 6420 + }, + { + "epoch": 0.09, + "grad_norm": 0.466796875, + "learning_rate": 0.0001843086632243259, + "loss": 0.9126, + "step": 6425 + }, + { + "epoch": 0.09, + "grad_norm": 0.470703125, + "learning_rate": 0.00018445209409064832, + "loss": 0.9077, + "step": 6430 + }, + { + "epoch": 0.09, + "grad_norm": 0.427734375, + "learning_rate": 0.00018459552495697073, + "loss": 0.9502, + "step": 6435 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.0001847389558232932, + "loss": 0.8701, + "step": 6440 + }, + { + "epoch": 0.09, + "grad_norm": 0.482421875, + "learning_rate": 0.0001848823866896156, + "loss": 0.9623, + "step": 6445 + }, + { + "epoch": 0.09, + "grad_norm": 0.5, + "learning_rate": 0.00018502581755593805, + "loss": 1.0634, + "step": 6450 + }, + { + "epoch": 0.09, + "grad_norm": 0.490234375, + "learning_rate": 0.0001851692484222605, + "loss": 0.8987, + "step": 6455 + }, + { + "epoch": 0.09, + "grad_norm": 0.484375, + "learning_rate": 0.0001853126792885829, + "loss": 1.0042, + "step": 6460 + }, + { + "epoch": 0.09, + "grad_norm": 0.5234375, + "learning_rate": 0.00018545611015490535, + "loss": 0.9661, + "step": 6465 + }, + { + "epoch": 0.09, + "grad_norm": 0.5234375, + "learning_rate": 0.0001855995410212278, + "loss": 0.9081, + "step": 6470 + }, + { + "epoch": 0.09, + "grad_norm": 0.453125, + "learning_rate": 0.0001857429718875502, + "loss": 1.1281, + "step": 6475 + }, + { + "epoch": 0.09, + "grad_norm": 0.482421875, + "learning_rate": 0.00018588640275387264, + "loss": 0.9521, + "step": 6480 + }, + { + "epoch": 0.09, + "grad_norm": 0.490234375, + "learning_rate": 0.00018602983362019508, + "loss": 0.8882, + "step": 6485 + }, + { + "epoch": 0.09, + "grad_norm": 0.482421875, + "learning_rate": 0.0001861732644865175, + "loss": 0.9865, + "step": 6490 + }, + { + "epoch": 0.09, + "grad_norm": 0.66015625, + "learning_rate": 0.00018631669535283993, + "loss": 0.8568, + "step": 6495 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.00018646012621916237, + "loss": 1.1288, + "step": 6500 + }, + { + "epoch": 0.09, + "grad_norm": 0.51171875, + "learning_rate": 0.00018660355708548481, + "loss": 0.9184, + "step": 6505 + }, + { + "epoch": 0.09, + "grad_norm": 0.51953125, + "learning_rate": 0.00018674698795180723, + "loss": 0.9487, + "step": 6510 + }, + { + "epoch": 0.09, + "grad_norm": 0.439453125, + "learning_rate": 0.00018689041881812967, + "loss": 0.8626, + "step": 6515 + }, + { + "epoch": 0.09, + "grad_norm": 0.484375, + "learning_rate": 0.0001870338496844521, + "loss": 0.926, + "step": 6520 + }, + { + "epoch": 0.09, + "grad_norm": 0.482421875, + "learning_rate": 0.00018717728055077452, + "loss": 0.9192, + "step": 6525 + }, + { + "epoch": 0.09, + "grad_norm": 0.49609375, + "learning_rate": 0.00018732071141709696, + "loss": 0.9383, + "step": 6530 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.0001874641422834194, + "loss": 0.8595, + "step": 6535 + }, + { + "epoch": 0.09, + "grad_norm": 0.486328125, + "learning_rate": 0.00018760757314974184, + "loss": 0.8997, + "step": 6540 + }, + { + "epoch": 0.09, + "grad_norm": 0.427734375, + "learning_rate": 0.00018775100401606428, + "loss": 0.9123, + "step": 6545 + }, + { + "epoch": 0.09, + "grad_norm": 0.4609375, + "learning_rate": 0.0001878944348823867, + "loss": 0.9546, + "step": 6550 + }, + { + "epoch": 0.09, + "grad_norm": 0.47265625, + "learning_rate": 0.00018803786574870913, + "loss": 0.9023, + "step": 6555 + }, + { + "epoch": 0.09, + "grad_norm": 0.5234375, + "learning_rate": 0.00018818129661503157, + "loss": 0.9866, + "step": 6560 + }, + { + "epoch": 0.09, + "grad_norm": 0.54296875, + "learning_rate": 0.000188324727481354, + "loss": 1.1346, + "step": 6565 + }, + { + "epoch": 0.09, + "grad_norm": 0.4921875, + "learning_rate": 0.00018846815834767643, + "loss": 0.946, + "step": 6570 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.00018861158921399887, + "loss": 0.9296, + "step": 6575 + }, + { + "epoch": 0.09, + "grad_norm": 0.443359375, + "learning_rate": 0.00018875502008032128, + "loss": 0.9713, + "step": 6580 + }, + { + "epoch": 0.09, + "grad_norm": 0.498046875, + "learning_rate": 0.00018889845094664375, + "loss": 1.0274, + "step": 6585 + }, + { + "epoch": 0.09, + "grad_norm": 0.53515625, + "learning_rate": 0.00018904188181296616, + "loss": 0.8749, + "step": 6590 + }, + { + "epoch": 0.09, + "grad_norm": 0.494140625, + "learning_rate": 0.00018918531267928857, + "loss": 0.989, + "step": 6595 + }, + { + "epoch": 0.09, + "grad_norm": 0.478515625, + "learning_rate": 0.00018932874354561104, + "loss": 0.9316, + "step": 6600 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.00018947217441193345, + "loss": 0.9591, + "step": 6605 + }, + { + "epoch": 0.09, + "grad_norm": 0.5546875, + "learning_rate": 0.0001896156052782559, + "loss": 1.045, + "step": 6610 + }, + { + "epoch": 0.09, + "grad_norm": 0.5078125, + "learning_rate": 0.00018975903614457833, + "loss": 1.0241, + "step": 6615 + }, + { + "epoch": 0.09, + "grad_norm": 0.51171875, + "learning_rate": 0.00018990246701090075, + "loss": 1.0077, + "step": 6620 + }, + { + "epoch": 0.1, + "grad_norm": 0.54296875, + "learning_rate": 0.0001900458978772232, + "loss": 1.047, + "step": 6625 + }, + { + "epoch": 0.1, + "grad_norm": 0.515625, + "learning_rate": 0.00019018932874354563, + "loss": 0.986, + "step": 6630 + }, + { + "epoch": 0.1, + "grad_norm": 0.46484375, + "learning_rate": 0.00019033275960986804, + "loss": 0.8676, + "step": 6635 + }, + { + "epoch": 0.1, + "grad_norm": 0.447265625, + "learning_rate": 0.00019047619047619048, + "loss": 0.9166, + "step": 6640 + }, + { + "epoch": 0.1, + "grad_norm": 0.46484375, + "learning_rate": 0.00019061962134251292, + "loss": 1.1594, + "step": 6645 + }, + { + "epoch": 0.1, + "grad_norm": 0.4921875, + "learning_rate": 0.00019076305220883533, + "loss": 0.9954, + "step": 6650 + }, + { + "epoch": 0.1, + "grad_norm": 0.51953125, + "learning_rate": 0.0001909064830751578, + "loss": 0.9052, + "step": 6655 + }, + { + "epoch": 0.1, + "grad_norm": 0.53125, + "learning_rate": 0.00019104991394148021, + "loss": 0.9444, + "step": 6660 + }, + { + "epoch": 0.1, + "grad_norm": 0.482421875, + "learning_rate": 0.00019119334480780266, + "loss": 0.9136, + "step": 6665 + }, + { + "epoch": 0.1, + "grad_norm": 0.50390625, + "learning_rate": 0.0001913367756741251, + "loss": 0.9398, + "step": 6670 + }, + { + "epoch": 0.1, + "grad_norm": 0.5234375, + "learning_rate": 0.0001914802065404475, + "loss": 1.1613, + "step": 6675 + }, + { + "epoch": 0.1, + "grad_norm": 0.5078125, + "learning_rate": 0.00019162363740676995, + "loss": 1.0539, + "step": 6680 + }, + { + "epoch": 0.1, + "grad_norm": 0.51171875, + "learning_rate": 0.0001917670682730924, + "loss": 1.0843, + "step": 6685 + }, + { + "epoch": 0.1, + "grad_norm": 0.515625, + "learning_rate": 0.0001919104991394148, + "loss": 0.8968, + "step": 6690 + }, + { + "epoch": 0.1, + "grad_norm": 0.498046875, + "learning_rate": 0.00019205393000573724, + "loss": 0.9034, + "step": 6695 + }, + { + "epoch": 0.1, + "grad_norm": 0.498046875, + "learning_rate": 0.00019219736087205968, + "loss": 1.011, + "step": 6700 + }, + { + "epoch": 0.1, + "grad_norm": 0.4765625, + "learning_rate": 0.00019234079173838212, + "loss": 1.0446, + "step": 6705 + }, + { + "epoch": 0.1, + "grad_norm": 0.53515625, + "learning_rate": 0.00019248422260470454, + "loss": 0.8931, + "step": 6710 + }, + { + "epoch": 0.1, + "grad_norm": 0.478515625, + "learning_rate": 0.00019262765347102698, + "loss": 0.8736, + "step": 6715 + }, + { + "epoch": 0.1, + "grad_norm": 0.47265625, + "learning_rate": 0.00019277108433734942, + "loss": 0.9596, + "step": 6720 + }, + { + "epoch": 0.1, + "grad_norm": 0.453125, + "learning_rate": 0.00019291451520367183, + "loss": 0.9351, + "step": 6725 + }, + { + "epoch": 0.1, + "grad_norm": 0.50390625, + "learning_rate": 0.00019305794606999427, + "loss": 0.9922, + "step": 6730 + }, + { + "epoch": 0.1, + "grad_norm": 0.5234375, + "learning_rate": 0.0001932013769363167, + "loss": 1.0471, + "step": 6735 + }, + { + "epoch": 0.1, + "grad_norm": 0.4140625, + "learning_rate": 0.00019334480780263912, + "loss": 0.9003, + "step": 6740 + }, + { + "epoch": 0.1, + "grad_norm": 0.4375, + "learning_rate": 0.00019348823866896156, + "loss": 0.8397, + "step": 6745 + }, + { + "epoch": 0.1, + "grad_norm": 0.50390625, + "learning_rate": 0.000193631669535284, + "loss": 0.9666, + "step": 6750 + }, + { + "epoch": 0.1, + "grad_norm": 0.470703125, + "learning_rate": 0.00019377510040160642, + "loss": 0.9207, + "step": 6755 + }, + { + "epoch": 0.1, + "grad_norm": 0.4921875, + "learning_rate": 0.00019391853126792888, + "loss": 0.997, + "step": 6760 + }, + { + "epoch": 0.1, + "grad_norm": 0.4765625, + "learning_rate": 0.0001940619621342513, + "loss": 0.9651, + "step": 6765 + }, + { + "epoch": 0.1, + "grad_norm": 0.71484375, + "learning_rate": 0.00019420539300057374, + "loss": 0.9569, + "step": 6770 + }, + { + "epoch": 0.1, + "grad_norm": 0.50390625, + "learning_rate": 0.00019434882386689618, + "loss": 1.0032, + "step": 6775 + }, + { + "epoch": 0.1, + "grad_norm": 0.43359375, + "learning_rate": 0.0001944922547332186, + "loss": 0.8701, + "step": 6780 + }, + { + "epoch": 0.1, + "grad_norm": 0.515625, + "learning_rate": 0.00019463568559954103, + "loss": 0.8503, + "step": 6785 + }, + { + "epoch": 0.1, + "grad_norm": 0.5859375, + "learning_rate": 0.00019477911646586347, + "loss": 0.9847, + "step": 6790 + }, + { + "epoch": 0.1, + "grad_norm": 0.44140625, + "learning_rate": 0.00019492254733218588, + "loss": 0.9694, + "step": 6795 + }, + { + "epoch": 0.1, + "grad_norm": 0.466796875, + "learning_rate": 0.00019506597819850835, + "loss": 0.8473, + "step": 6800 + }, + { + "epoch": 0.1, + "grad_norm": 0.5, + "learning_rate": 0.00019520940906483076, + "loss": 0.8632, + "step": 6805 + }, + { + "epoch": 0.1, + "grad_norm": 0.53125, + "learning_rate": 0.00019535283993115318, + "loss": 1.0169, + "step": 6810 + }, + { + "epoch": 0.1, + "grad_norm": 0.484375, + "learning_rate": 0.00019549627079747564, + "loss": 0.9015, + "step": 6815 + }, + { + "epoch": 0.1, + "grad_norm": 0.61328125, + "learning_rate": 0.00019563970166379806, + "loss": 1.1592, + "step": 6820 + }, + { + "epoch": 0.1, + "grad_norm": 0.53515625, + "learning_rate": 0.0001957831325301205, + "loss": 0.9537, + "step": 6825 + }, + { + "epoch": 0.1, + "grad_norm": 0.53515625, + "learning_rate": 0.00019592656339644294, + "loss": 1.0179, + "step": 6830 + }, + { + "epoch": 0.1, + "grad_norm": 0.470703125, + "learning_rate": 0.00019606999426276535, + "loss": 0.865, + "step": 6835 + }, + { + "epoch": 0.1, + "grad_norm": 0.4921875, + "learning_rate": 0.0001962134251290878, + "loss": 1.0772, + "step": 6840 + }, + { + "epoch": 0.1, + "grad_norm": 0.455078125, + "learning_rate": 0.00019635685599541023, + "loss": 0.9969, + "step": 6845 + }, + { + "epoch": 0.1, + "grad_norm": 0.416015625, + "learning_rate": 0.00019650028686173264, + "loss": 0.8717, + "step": 6850 + }, + { + "epoch": 0.1, + "grad_norm": 0.51953125, + "learning_rate": 0.00019664371772805508, + "loss": 0.9652, + "step": 6855 + }, + { + "epoch": 0.1, + "grad_norm": 0.4765625, + "learning_rate": 0.00019678714859437752, + "loss": 1.0097, + "step": 6860 + }, + { + "epoch": 0.1, + "grad_norm": 0.546875, + "learning_rate": 0.00019693057946069994, + "loss": 0.9766, + "step": 6865 + }, + { + "epoch": 0.1, + "grad_norm": 0.470703125, + "learning_rate": 0.00019707401032702238, + "loss": 1.0268, + "step": 6870 + }, + { + "epoch": 0.1, + "grad_norm": 0.484375, + "learning_rate": 0.00019721744119334482, + "loss": 0.924, + "step": 6875 + }, + { + "epoch": 0.1, + "grad_norm": 0.462890625, + "learning_rate": 0.00019736087205966726, + "loss": 0.9296, + "step": 6880 + }, + { + "epoch": 0.1, + "grad_norm": 0.51171875, + "learning_rate": 0.0001975043029259897, + "loss": 1.0601, + "step": 6885 + }, + { + "epoch": 0.1, + "grad_norm": 0.51953125, + "learning_rate": 0.0001976477337923121, + "loss": 0.9167, + "step": 6890 + }, + { + "epoch": 0.1, + "grad_norm": 0.490234375, + "learning_rate": 0.00019779116465863455, + "loss": 0.9092, + "step": 6895 + }, + { + "epoch": 0.1, + "grad_norm": 0.5078125, + "learning_rate": 0.000197934595524957, + "loss": 0.9494, + "step": 6900 + }, + { + "epoch": 0.1, + "grad_norm": 0.48046875, + "learning_rate": 0.0001980780263912794, + "loss": 0.8921, + "step": 6905 + }, + { + "epoch": 0.1, + "grad_norm": 0.482421875, + "learning_rate": 0.00019822145725760184, + "loss": 0.9716, + "step": 6910 + }, + { + "epoch": 0.1, + "grad_norm": 0.5234375, + "learning_rate": 0.00019836488812392428, + "loss": 0.9527, + "step": 6915 + }, + { + "epoch": 0.1, + "grad_norm": 0.490234375, + "learning_rate": 0.00019850831899024672, + "loss": 1.0112, + "step": 6920 + }, + { + "epoch": 0.1, + "grad_norm": 0.51171875, + "learning_rate": 0.00019865174985656914, + "loss": 1.0535, + "step": 6925 + }, + { + "epoch": 0.1, + "grad_norm": 0.51953125, + "learning_rate": 0.00019879518072289158, + "loss": 0.8874, + "step": 6930 + }, + { + "epoch": 0.1, + "grad_norm": 0.54296875, + "learning_rate": 0.00019893861158921402, + "loss": 1.0039, + "step": 6935 + }, + { + "epoch": 0.1, + "grad_norm": 0.5703125, + "learning_rate": 0.00019908204245553643, + "loss": 1.0371, + "step": 6940 + }, + { + "epoch": 0.1, + "grad_norm": 0.486328125, + "learning_rate": 0.00019922547332185887, + "loss": 1.2052, + "step": 6945 + }, + { + "epoch": 0.1, + "grad_norm": 0.53515625, + "learning_rate": 0.0001993689041881813, + "loss": 0.942, + "step": 6950 + }, + { + "epoch": 0.1, + "grad_norm": 0.5234375, + "learning_rate": 0.00019951233505450372, + "loss": 1.0566, + "step": 6955 + }, + { + "epoch": 0.1, + "grad_norm": 0.486328125, + "learning_rate": 0.00019965576592082616, + "loss": 0.9806, + "step": 6960 + }, + { + "epoch": 0.1, + "grad_norm": 0.48046875, + "learning_rate": 0.0001997991967871486, + "loss": 0.8986, + "step": 6965 + }, + { + "epoch": 0.1, + "grad_norm": 0.451171875, + "learning_rate": 0.00019994262765347102, + "loss": 1.0299, + "step": 6970 + }, + { + "epoch": 0.1, + "grad_norm": 0.56640625, + "learning_rate": 0.0001999999988717395, + "loss": 0.977, + "step": 6975 + }, + { + "epoch": 0.1, + "grad_norm": 0.470703125, + "learning_rate": 0.0001999999919768143, + "loss": 0.8963, + "step": 6980 + }, + { + "epoch": 0.1, + "grad_norm": 0.58203125, + "learning_rate": 0.0001999999788137757, + "loss": 1.0324, + "step": 6985 + }, + { + "epoch": 0.1, + "grad_norm": 0.48046875, + "learning_rate": 0.00019999995938262455, + "loss": 0.9285, + "step": 6990 + }, + { + "epoch": 0.1, + "grad_norm": 0.484375, + "learning_rate": 0.00019999993368336204, + "loss": 0.878, + "step": 6995 + }, + { + "epoch": 0.1, + "grad_norm": 0.50390625, + "learning_rate": 0.0001999999017159898, + "loss": 1.0065, + "step": 7000 + }, + { + "epoch": 0.1, + "grad_norm": 0.490234375, + "learning_rate": 0.00019999986348050983, + "loss": 1.0322, + "step": 7005 + }, + { + "epoch": 0.1, + "grad_norm": 0.490234375, + "learning_rate": 0.00019999981897692452, + "loss": 0.9879, + "step": 7010 + }, + { + "epoch": 0.1, + "grad_norm": 0.515625, + "learning_rate": 0.00019999976820523667, + "loss": 0.8922, + "step": 7015 + }, + { + "epoch": 0.1, + "grad_norm": 0.546875, + "learning_rate": 0.00019999971116544947, + "loss": 1.0516, + "step": 7020 + }, + { + "epoch": 0.1, + "grad_norm": 0.53515625, + "learning_rate": 0.0001999996478575665, + "loss": 0.9043, + "step": 7025 + }, + { + "epoch": 0.1, + "grad_norm": 0.52734375, + "learning_rate": 0.0001999995782815917, + "loss": 1.0973, + "step": 7030 + }, + { + "epoch": 0.1, + "grad_norm": 0.50390625, + "learning_rate": 0.0001999995024375294, + "loss": 0.9128, + "step": 7035 + }, + { + "epoch": 0.1, + "grad_norm": 0.51171875, + "learning_rate": 0.00019999942032538444, + "loss": 1.0025, + "step": 7040 + }, + { + "epoch": 0.1, + "grad_norm": 0.4765625, + "learning_rate": 0.0001999993319451619, + "loss": 0.8311, + "step": 7045 + }, + { + "epoch": 0.1, + "grad_norm": 0.490234375, + "learning_rate": 0.00019999923729686737, + "loss": 1.0452, + "step": 7050 + }, + { + "epoch": 0.1, + "grad_norm": 0.470703125, + "learning_rate": 0.00019999913638050674, + "loss": 1.0823, + "step": 7055 + }, + { + "epoch": 0.1, + "grad_norm": 0.44140625, + "learning_rate": 0.00019999902919608635, + "loss": 0.8616, + "step": 7060 + }, + { + "epoch": 0.1, + "grad_norm": 0.51953125, + "learning_rate": 0.00019999891574361294, + "loss": 0.8661, + "step": 7065 + }, + { + "epoch": 0.1, + "grad_norm": 0.49609375, + "learning_rate": 0.00019999879602309358, + "loss": 1.0924, + "step": 7070 + }, + { + "epoch": 0.1, + "grad_norm": 0.474609375, + "learning_rate": 0.0001999986700345358, + "loss": 0.9282, + "step": 7075 + }, + { + "epoch": 0.1, + "grad_norm": 0.4296875, + "learning_rate": 0.0001999985377779475, + "loss": 0.8499, + "step": 7080 + }, + { + "epoch": 0.1, + "grad_norm": 0.47265625, + "learning_rate": 0.00019999839925333697, + "loss": 0.9638, + "step": 7085 + }, + { + "epoch": 0.1, + "grad_norm": 0.484375, + "learning_rate": 0.0001999982544607129, + "loss": 1.019, + "step": 7090 + }, + { + "epoch": 0.1, + "grad_norm": 0.466796875, + "learning_rate": 0.00019999810340008432, + "loss": 1.0765, + "step": 7095 + }, + { + "epoch": 0.1, + "grad_norm": 0.48046875, + "learning_rate": 0.00019999794607146072, + "loss": 0.9585, + "step": 7100 + }, + { + "epoch": 0.1, + "grad_norm": 0.51953125, + "learning_rate": 0.00019999778247485202, + "loss": 0.9834, + "step": 7105 + }, + { + "epoch": 0.1, + "grad_norm": 0.5234375, + "learning_rate": 0.00019999761261026838, + "loss": 1.0673, + "step": 7110 + }, + { + "epoch": 0.1, + "grad_norm": 0.5234375, + "learning_rate": 0.00019999743647772055, + "loss": 0.9441, + "step": 7115 + }, + { + "epoch": 0.1, + "grad_norm": 0.5078125, + "learning_rate": 0.00019999725407721946, + "loss": 0.7877, + "step": 7120 + }, + { + "epoch": 0.1, + "grad_norm": 0.55859375, + "learning_rate": 0.0001999970654087766, + "loss": 0.9829, + "step": 7125 + }, + { + "epoch": 0.1, + "grad_norm": 0.50390625, + "learning_rate": 0.00019999687047240382, + "loss": 0.9581, + "step": 7130 + }, + { + "epoch": 0.1, + "grad_norm": 0.423828125, + "learning_rate": 0.0001999966692681133, + "loss": 0.9482, + "step": 7135 + }, + { + "epoch": 0.1, + "grad_norm": 0.5234375, + "learning_rate": 0.00019999646179591767, + "loss": 1.0197, + "step": 7140 + }, + { + "epoch": 0.1, + "grad_norm": 0.74609375, + "learning_rate": 0.0001999962480558299, + "loss": 0.8861, + "step": 7145 + }, + { + "epoch": 0.1, + "grad_norm": 0.59375, + "learning_rate": 0.00019999602804786343, + "loss": 1.0572, + "step": 7150 + }, + { + "epoch": 0.1, + "grad_norm": 0.51171875, + "learning_rate": 0.00019999580177203208, + "loss": 0.8916, + "step": 7155 + }, + { + "epoch": 0.1, + "grad_norm": 0.484375, + "learning_rate": 0.00019999556922834993, + "loss": 0.8698, + "step": 7160 + }, + { + "epoch": 0.1, + "grad_norm": 0.53125, + "learning_rate": 0.00019999533041683166, + "loss": 0.9218, + "step": 7165 + }, + { + "epoch": 0.1, + "grad_norm": 0.48828125, + "learning_rate": 0.00019999508533749216, + "loss": 0.8811, + "step": 7170 + }, + { + "epoch": 0.1, + "grad_norm": 0.482421875, + "learning_rate": 0.00019999483399034683, + "loss": 1.0427, + "step": 7175 + }, + { + "epoch": 0.1, + "grad_norm": 0.51171875, + "learning_rate": 0.00019999457637541142, + "loss": 0.8641, + "step": 7180 + }, + { + "epoch": 0.1, + "grad_norm": 0.51953125, + "learning_rate": 0.0001999943124927021, + "loss": 0.9981, + "step": 7185 + }, + { + "epoch": 0.1, + "grad_norm": 0.58984375, + "learning_rate": 0.00019999404234223537, + "loss": 1.0612, + "step": 7190 + }, + { + "epoch": 0.1, + "grad_norm": 0.4609375, + "learning_rate": 0.0001999937659240282, + "loss": 0.9003, + "step": 7195 + }, + { + "epoch": 0.1, + "grad_norm": 0.57421875, + "learning_rate": 0.0001999934832380979, + "loss": 1.0519, + "step": 7200 + }, + { + "epoch": 0.1, + "grad_norm": 0.5078125, + "learning_rate": 0.00019999319428446217, + "loss": 1.1604, + "step": 7205 + }, + { + "epoch": 0.1, + "grad_norm": 0.53125, + "learning_rate": 0.00019999289906313914, + "loss": 1.0522, + "step": 7210 + }, + { + "epoch": 0.1, + "grad_norm": 0.4765625, + "learning_rate": 0.00019999259757414733, + "loss": 0.9722, + "step": 7215 + }, + { + "epoch": 0.1, + "grad_norm": 0.490234375, + "learning_rate": 0.0001999922898175056, + "loss": 1.0899, + "step": 7220 + }, + { + "epoch": 0.1, + "grad_norm": 0.52734375, + "learning_rate": 0.00019999197579323327, + "loss": 0.9977, + "step": 7225 + }, + { + "epoch": 0.1, + "grad_norm": 0.5390625, + "learning_rate": 0.00019999165550135003, + "loss": 0.9424, + "step": 7230 + }, + { + "epoch": 0.1, + "grad_norm": 0.447265625, + "learning_rate": 0.0001999913289418759, + "loss": 0.8548, + "step": 7235 + }, + { + "epoch": 0.1, + "grad_norm": 0.5546875, + "learning_rate": 0.00019999099611483145, + "loss": 0.9436, + "step": 7240 + }, + { + "epoch": 0.1, + "grad_norm": 0.515625, + "learning_rate": 0.00019999065702023744, + "loss": 1.0087, + "step": 7245 + }, + { + "epoch": 0.1, + "grad_norm": 0.44921875, + "learning_rate": 0.0001999903116581152, + "loss": 1.0426, + "step": 7250 + }, + { + "epoch": 0.1, + "grad_norm": 0.546875, + "learning_rate": 0.00019998996002848634, + "loss": 0.9219, + "step": 7255 + }, + { + "epoch": 0.1, + "grad_norm": 0.458984375, + "learning_rate": 0.00019998960213137286, + "loss": 0.933, + "step": 7260 + }, + { + "epoch": 0.1, + "grad_norm": 0.49609375, + "learning_rate": 0.0001999892379667973, + "loss": 1.0413, + "step": 7265 + }, + { + "epoch": 0.1, + "grad_norm": 0.76171875, + "learning_rate": 0.0001999888675347824, + "loss": 0.9412, + "step": 7270 + }, + { + "epoch": 0.1, + "grad_norm": 0.55078125, + "learning_rate": 0.0001999884908353514, + "loss": 0.9745, + "step": 7275 + }, + { + "epoch": 0.1, + "grad_norm": 0.40234375, + "learning_rate": 0.00019998810786852795, + "loss": 0.8132, + "step": 7280 + }, + { + "epoch": 0.1, + "grad_norm": 0.4921875, + "learning_rate": 0.000199987718634336, + "loss": 0.9773, + "step": 7285 + }, + { + "epoch": 0.1, + "grad_norm": 0.73046875, + "learning_rate": 0.00019998732313279994, + "loss": 0.8849, + "step": 7290 + }, + { + "epoch": 0.1, + "grad_norm": 0.5703125, + "learning_rate": 0.00019998692136394465, + "loss": 0.9524, + "step": 7295 + }, + { + "epoch": 0.1, + "grad_norm": 0.5625, + "learning_rate": 0.0001999865133277952, + "loss": 1.0294, + "step": 7300 + }, + { + "epoch": 0.1, + "grad_norm": 0.6328125, + "learning_rate": 0.00019998609902437725, + "loss": 1.0705, + "step": 7305 + }, + { + "epoch": 0.1, + "grad_norm": 0.47265625, + "learning_rate": 0.00019998567845371673, + "loss": 0.9558, + "step": 7310 + }, + { + "epoch": 0.1, + "grad_norm": 0.482421875, + "learning_rate": 0.00019998525161584, + "loss": 1.0696, + "step": 7315 + }, + { + "epoch": 0.11, + "grad_norm": 0.5, + "learning_rate": 0.00019998481851077384, + "loss": 0.8552, + "step": 7320 + }, + { + "epoch": 0.11, + "grad_norm": 0.5, + "learning_rate": 0.00019998437913854538, + "loss": 0.9012, + "step": 7325 + }, + { + "epoch": 0.11, + "grad_norm": 0.494140625, + "learning_rate": 0.00019998393349918216, + "loss": 0.9747, + "step": 7330 + }, + { + "epoch": 0.11, + "grad_norm": 0.46484375, + "learning_rate": 0.0001999834815927121, + "loss": 0.9947, + "step": 7335 + }, + { + "epoch": 0.11, + "grad_norm": 0.58984375, + "learning_rate": 0.00019998302341916357, + "loss": 0.9596, + "step": 7340 + }, + { + "epoch": 0.11, + "grad_norm": 0.53125, + "learning_rate": 0.00019998255897856523, + "loss": 0.9642, + "step": 7345 + }, + { + "epoch": 0.11, + "grad_norm": 0.4921875, + "learning_rate": 0.00019998208827094627, + "loss": 1.0481, + "step": 7350 + }, + { + "epoch": 0.11, + "grad_norm": 0.50390625, + "learning_rate": 0.0001999816112963361, + "loss": 1.0289, + "step": 7355 + }, + { + "epoch": 0.11, + "grad_norm": 0.54296875, + "learning_rate": 0.0001999811280547647, + "loss": 1.0296, + "step": 7360 + }, + { + "epoch": 0.11, + "grad_norm": 0.443359375, + "learning_rate": 0.0001999806385462623, + "loss": 0.9311, + "step": 7365 + }, + { + "epoch": 0.11, + "grad_norm": 0.515625, + "learning_rate": 0.0001999801427708596, + "loss": 1.0396, + "step": 7370 + }, + { + "epoch": 0.11, + "grad_norm": 0.466796875, + "learning_rate": 0.0001999796407285877, + "loss": 0.9284, + "step": 7375 + }, + { + "epoch": 0.11, + "grad_norm": 0.55078125, + "learning_rate": 0.00019997913241947806, + "loss": 1.107, + "step": 7380 + }, + { + "epoch": 0.11, + "grad_norm": 0.48046875, + "learning_rate": 0.00019997861784356254, + "loss": 0.946, + "step": 7385 + }, + { + "epoch": 0.11, + "grad_norm": 0.48828125, + "learning_rate": 0.00019997809700087336, + "loss": 0.9533, + "step": 7390 + }, + { + "epoch": 0.11, + "grad_norm": 0.482421875, + "learning_rate": 0.00019997756989144317, + "loss": 0.9806, + "step": 7395 + }, + { + "epoch": 0.11, + "grad_norm": 0.640625, + "learning_rate": 0.0001999770365153051, + "loss": 1.0817, + "step": 7400 + }, + { + "epoch": 0.11, + "grad_norm": 0.51171875, + "learning_rate": 0.0001999764968724925, + "loss": 0.9468, + "step": 7405 + }, + { + "epoch": 0.11, + "grad_norm": 0.4765625, + "learning_rate": 0.00019997595096303919, + "loss": 0.9327, + "step": 7410 + }, + { + "epoch": 0.11, + "grad_norm": 0.5390625, + "learning_rate": 0.00019997539878697943, + "loss": 1.0226, + "step": 7415 + }, + { + "epoch": 0.11, + "grad_norm": 0.50390625, + "learning_rate": 0.0001999748403443478, + "loss": 1.0015, + "step": 7420 + }, + { + "epoch": 0.11, + "grad_norm": 0.53125, + "learning_rate": 0.0001999742756351793, + "loss": 1.029, + "step": 7425 + }, + { + "epoch": 0.11, + "grad_norm": 0.56640625, + "learning_rate": 0.00019997370465950936, + "loss": 0.9649, + "step": 7430 + }, + { + "epoch": 0.11, + "grad_norm": 0.470703125, + "learning_rate": 0.00019997312741737376, + "loss": 1.0384, + "step": 7435 + }, + { + "epoch": 0.11, + "grad_norm": 0.5, + "learning_rate": 0.00019997254390880867, + "loss": 0.9221, + "step": 7440 + }, + { + "epoch": 0.11, + "grad_norm": 0.578125, + "learning_rate": 0.00019997195413385064, + "loss": 1.0112, + "step": 7445 + }, + { + "epoch": 0.11, + "grad_norm": 0.5078125, + "learning_rate": 0.00019997135809253668, + "loss": 0.8739, + "step": 7450 + }, + { + "epoch": 0.11, + "grad_norm": 0.486328125, + "learning_rate": 0.00019997075578490417, + "loss": 1.0613, + "step": 7455 + }, + { + "epoch": 0.11, + "grad_norm": 0.49609375, + "learning_rate": 0.00019997014721099082, + "loss": 1.0139, + "step": 7460 + }, + { + "epoch": 0.11, + "grad_norm": 0.58203125, + "learning_rate": 0.00019996953237083474, + "loss": 1.0393, + "step": 7465 + }, + { + "epoch": 0.11, + "grad_norm": 0.51953125, + "learning_rate": 0.00019996891126447457, + "loss": 1.0109, + "step": 7470 + }, + { + "epoch": 0.11, + "grad_norm": 0.498046875, + "learning_rate": 0.00019996828389194914, + "loss": 0.901, + "step": 7475 + }, + { + "epoch": 0.11, + "grad_norm": 0.515625, + "learning_rate": 0.00019996765025329785, + "loss": 0.9787, + "step": 7480 + }, + { + "epoch": 0.11, + "grad_norm": 0.498046875, + "learning_rate": 0.0001999670103485604, + "loss": 1.0034, + "step": 7485 + }, + { + "epoch": 0.11, + "grad_norm": 0.65234375, + "learning_rate": 0.00019996636417777687, + "loss": 1.0497, + "step": 7490 + }, + { + "epoch": 0.11, + "grad_norm": 0.5546875, + "learning_rate": 0.0001999657117409878, + "loss": 0.8797, + "step": 7495 + }, + { + "epoch": 0.11, + "grad_norm": 0.4765625, + "learning_rate": 0.00019996505303823402, + "loss": 1.0155, + "step": 7500 + }, + { + "epoch": 0.11, + "grad_norm": 0.53125, + "learning_rate": 0.00019996438806955692, + "loss": 0.9911, + "step": 7505 + }, + { + "epoch": 0.11, + "grad_norm": 0.6875, + "learning_rate": 0.0001999637168349981, + "loss": 1.0781, + "step": 7510 + }, + { + "epoch": 0.11, + "grad_norm": 0.49609375, + "learning_rate": 0.00019996303933459965, + "loss": 1.0164, + "step": 7515 + }, + { + "epoch": 0.11, + "grad_norm": 0.49609375, + "learning_rate": 0.00019996235556840405, + "loss": 0.9753, + "step": 7520 + }, + { + "epoch": 0.11, + "grad_norm": 0.55078125, + "learning_rate": 0.00019996166553645417, + "loss": 0.96, + "step": 7525 + }, + { + "epoch": 0.11, + "grad_norm": 0.5078125, + "learning_rate": 0.00019996096923879323, + "loss": 0.9442, + "step": 7530 + }, + { + "epoch": 0.11, + "grad_norm": 0.494140625, + "learning_rate": 0.00019996026667546492, + "loss": 0.8743, + "step": 7535 + }, + { + "epoch": 0.11, + "grad_norm": 0.52734375, + "learning_rate": 0.00019995955784651322, + "loss": 0.978, + "step": 7540 + }, + { + "epoch": 0.11, + "grad_norm": 0.53515625, + "learning_rate": 0.0001999588427519826, + "loss": 0.969, + "step": 7545 + }, + { + "epoch": 0.11, + "grad_norm": 0.49609375, + "learning_rate": 0.00019995812139191787, + "loss": 0.8663, + "step": 7550 + }, + { + "epoch": 0.11, + "grad_norm": 0.474609375, + "learning_rate": 0.00019995739376636425, + "loss": 1.0276, + "step": 7555 + }, + { + "epoch": 0.11, + "grad_norm": 0.62109375, + "learning_rate": 0.00019995665987536737, + "loss": 1.1052, + "step": 7560 + }, + { + "epoch": 0.11, + "grad_norm": 0.68359375, + "learning_rate": 0.0001999559197189732, + "loss": 1.019, + "step": 7565 + }, + { + "epoch": 0.11, + "grad_norm": 0.62109375, + "learning_rate": 0.0001999551732972281, + "loss": 0.8811, + "step": 7570 + }, + { + "epoch": 0.11, + "grad_norm": 0.52734375, + "learning_rate": 0.00019995442061017892, + "loss": 0.8933, + "step": 7575 + }, + { + "epoch": 0.11, + "grad_norm": 0.55078125, + "learning_rate": 0.00019995366165787283, + "loss": 1.0845, + "step": 7580 + }, + { + "epoch": 0.11, + "grad_norm": 0.5, + "learning_rate": 0.0001999528964403574, + "loss": 0.8869, + "step": 7585 + }, + { + "epoch": 0.11, + "grad_norm": 0.546875, + "learning_rate": 0.00019995212495768055, + "loss": 0.9254, + "step": 7590 + }, + { + "epoch": 0.11, + "grad_norm": 0.515625, + "learning_rate": 0.0001999513472098907, + "loss": 0.938, + "step": 7595 + }, + { + "epoch": 0.11, + "grad_norm": 0.474609375, + "learning_rate": 0.00019995056319703657, + "loss": 1.0029, + "step": 7600 + }, + { + "epoch": 0.11, + "grad_norm": 0.455078125, + "learning_rate": 0.00019994977291916727, + "loss": 0.9692, + "step": 7605 + }, + { + "epoch": 0.11, + "grad_norm": 0.59375, + "learning_rate": 0.00019994897637633244, + "loss": 1.0439, + "step": 7610 + }, + { + "epoch": 0.11, + "grad_norm": 0.5390625, + "learning_rate": 0.00019994817356858187, + "loss": 0.8371, + "step": 7615 + }, + { + "epoch": 0.11, + "grad_norm": 0.49609375, + "learning_rate": 0.00019994736449596598, + "loss": 0.9537, + "step": 7620 + }, + { + "epoch": 0.11, + "grad_norm": 0.5078125, + "learning_rate": 0.00019994654915853546, + "loss": 0.9284, + "step": 7625 + }, + { + "epoch": 0.11, + "grad_norm": 0.50390625, + "learning_rate": 0.00019994572755634138, + "loss": 1.0318, + "step": 7630 + }, + { + "epoch": 0.11, + "grad_norm": 0.5625, + "learning_rate": 0.00019994489968943528, + "loss": 1.1444, + "step": 7635 + }, + { + "epoch": 0.11, + "grad_norm": 0.59375, + "learning_rate": 0.00019994406555786905, + "loss": 0.8694, + "step": 7640 + }, + { + "epoch": 0.11, + "grad_norm": 0.5, + "learning_rate": 0.00019994322516169496, + "loss": 1.0167, + "step": 7645 + }, + { + "epoch": 0.11, + "grad_norm": 0.54296875, + "learning_rate": 0.0001999423785009657, + "loss": 1.1432, + "step": 7650 + }, + { + "epoch": 0.11, + "grad_norm": 0.59375, + "learning_rate": 0.0001999415255757343, + "loss": 0.9968, + "step": 7655 + }, + { + "epoch": 0.11, + "grad_norm": 0.52734375, + "learning_rate": 0.0001999406663860543, + "loss": 0.9843, + "step": 7660 + }, + { + "epoch": 0.11, + "grad_norm": 0.470703125, + "learning_rate": 0.00019993980093197946, + "loss": 0.9883, + "step": 7665 + }, + { + "epoch": 0.11, + "grad_norm": 0.56640625, + "learning_rate": 0.0001999389292135641, + "loss": 0.8624, + "step": 7670 + }, + { + "epoch": 0.11, + "grad_norm": 0.59765625, + "learning_rate": 0.00019993805123086285, + "loss": 1.0464, + "step": 7675 + }, + { + "epoch": 0.11, + "grad_norm": 0.44921875, + "learning_rate": 0.0001999371669839307, + "loss": 0.8542, + "step": 7680 + }, + { + "epoch": 0.11, + "grad_norm": 0.494140625, + "learning_rate": 0.0001999362764728231, + "loss": 0.9581, + "step": 7685 + }, + { + "epoch": 0.11, + "grad_norm": 0.5234375, + "learning_rate": 0.00019993537969759592, + "loss": 0.935, + "step": 7690 + }, + { + "epoch": 0.11, + "grad_norm": 0.578125, + "learning_rate": 0.0001999344766583053, + "loss": 0.8918, + "step": 7695 + }, + { + "epoch": 0.11, + "grad_norm": 0.5859375, + "learning_rate": 0.00019993356735500782, + "loss": 1.0286, + "step": 7700 + }, + { + "epoch": 0.11, + "grad_norm": 0.640625, + "learning_rate": 0.0001999326517877606, + "loss": 1.0116, + "step": 7705 + }, + { + "epoch": 0.11, + "grad_norm": 0.55078125, + "learning_rate": 0.0001999317299566209, + "loss": 0.9422, + "step": 7710 + }, + { + "epoch": 0.11, + "grad_norm": 0.458984375, + "learning_rate": 0.0001999308018616466, + "loss": 0.9841, + "step": 7715 + }, + { + "epoch": 0.11, + "grad_norm": 0.58203125, + "learning_rate": 0.00019992986750289577, + "loss": 1.05, + "step": 7720 + }, + { + "epoch": 0.11, + "grad_norm": 0.55859375, + "learning_rate": 0.00019992892688042705, + "loss": 0.9811, + "step": 7725 + }, + { + "epoch": 0.11, + "grad_norm": 0.546875, + "learning_rate": 0.0001999279799942994, + "loss": 1.144, + "step": 7730 + }, + { + "epoch": 0.11, + "grad_norm": 0.498046875, + "learning_rate": 0.00019992702684457216, + "loss": 0.8471, + "step": 7735 + }, + { + "epoch": 0.11, + "grad_norm": 0.474609375, + "learning_rate": 0.00019992606743130507, + "loss": 0.8222, + "step": 7740 + }, + { + "epoch": 0.11, + "grad_norm": 0.486328125, + "learning_rate": 0.00019992510175455825, + "loss": 0.9594, + "step": 7745 + }, + { + "epoch": 0.11, + "grad_norm": 0.55859375, + "learning_rate": 0.00019992412981439226, + "loss": 0.9451, + "step": 7750 + }, + { + "epoch": 0.11, + "grad_norm": 0.51171875, + "learning_rate": 0.000199923151610868, + "loss": 0.8905, + "step": 7755 + }, + { + "epoch": 0.11, + "grad_norm": 0.5234375, + "learning_rate": 0.00019992216714404681, + "loss": 1.06, + "step": 7760 + }, + { + "epoch": 0.11, + "grad_norm": 0.462890625, + "learning_rate": 0.00019992117641399036, + "loss": 0.9044, + "step": 7765 + }, + { + "epoch": 0.11, + "grad_norm": 0.494140625, + "learning_rate": 0.0001999201794207608, + "loss": 0.8762, + "step": 7770 + }, + { + "epoch": 0.11, + "grad_norm": 0.47265625, + "learning_rate": 0.00019991917616442057, + "loss": 0.9525, + "step": 7775 + }, + { + "epoch": 0.11, + "grad_norm": 0.515625, + "learning_rate": 0.0001999181666450326, + "loss": 1.1321, + "step": 7780 + }, + { + "epoch": 0.11, + "grad_norm": 0.55859375, + "learning_rate": 0.0001999171508626601, + "loss": 0.9884, + "step": 7785 + }, + { + "epoch": 0.11, + "grad_norm": 0.515625, + "learning_rate": 0.00019991612881736684, + "loss": 0.9493, + "step": 7790 + }, + { + "epoch": 0.11, + "grad_norm": 0.53515625, + "learning_rate": 0.0001999151005092168, + "loss": 1.0176, + "step": 7795 + }, + { + "epoch": 0.11, + "grad_norm": 0.54296875, + "learning_rate": 0.0001999140659382745, + "loss": 0.9507, + "step": 7800 + }, + { + "epoch": 0.11, + "grad_norm": 0.546875, + "learning_rate": 0.00019991302510460472, + "loss": 0.9906, + "step": 7805 + }, + { + "epoch": 0.11, + "grad_norm": 0.52734375, + "learning_rate": 0.00019991197800827275, + "loss": 0.9058, + "step": 7810 + }, + { + "epoch": 0.11, + "grad_norm": 0.5546875, + "learning_rate": 0.0001999109246493442, + "loss": 0.9245, + "step": 7815 + }, + { + "epoch": 0.11, + "grad_norm": 0.5625, + "learning_rate": 0.0001999098650278851, + "loss": 1.0691, + "step": 7820 + }, + { + "epoch": 0.11, + "grad_norm": 0.52734375, + "learning_rate": 0.00019990879914396189, + "loss": 0.9908, + "step": 7825 + }, + { + "epoch": 0.11, + "grad_norm": 0.396484375, + "learning_rate": 0.00019990772699764135, + "loss": 0.8805, + "step": 7830 + }, + { + "epoch": 0.11, + "grad_norm": 0.5234375, + "learning_rate": 0.0001999066485889907, + "loss": 0.9424, + "step": 7835 + }, + { + "epoch": 0.11, + "grad_norm": 0.5234375, + "learning_rate": 0.00019990556391807752, + "loss": 0.8939, + "step": 7840 + }, + { + "epoch": 0.11, + "grad_norm": 0.5234375, + "learning_rate": 0.00019990447298496986, + "loss": 1.0941, + "step": 7845 + }, + { + "epoch": 0.11, + "grad_norm": 0.50390625, + "learning_rate": 0.000199903375789736, + "loss": 1.064, + "step": 7850 + }, + { + "epoch": 0.11, + "grad_norm": 0.5078125, + "learning_rate": 0.00019990227233244477, + "loss": 1.1336, + "step": 7855 + }, + { + "epoch": 0.11, + "grad_norm": 0.453125, + "learning_rate": 0.00019990116261316536, + "loss": 0.8951, + "step": 7860 + }, + { + "epoch": 0.11, + "grad_norm": 0.52734375, + "learning_rate": 0.00019990004663196728, + "loss": 0.8861, + "step": 7865 + }, + { + "epoch": 0.11, + "grad_norm": 0.50390625, + "learning_rate": 0.0001998989243889205, + "loss": 0.9515, + "step": 7870 + }, + { + "epoch": 0.11, + "grad_norm": 0.546875, + "learning_rate": 0.00019989779588409538, + "loss": 0.81, + "step": 7875 + }, + { + "epoch": 0.11, + "grad_norm": 0.50390625, + "learning_rate": 0.00019989666111756263, + "loss": 0.937, + "step": 7880 + }, + { + "epoch": 0.11, + "grad_norm": 0.486328125, + "learning_rate": 0.00019989552008939336, + "loss": 1.0635, + "step": 7885 + }, + { + "epoch": 0.11, + "grad_norm": 0.5078125, + "learning_rate": 0.00019989437279965917, + "loss": 1.0161, + "step": 7890 + }, + { + "epoch": 0.11, + "grad_norm": 0.5859375, + "learning_rate": 0.00019989321924843186, + "loss": 1.0589, + "step": 7895 + }, + { + "epoch": 0.11, + "grad_norm": 0.53515625, + "learning_rate": 0.00019989205943578387, + "loss": 0.9836, + "step": 7900 + }, + { + "epoch": 0.11, + "grad_norm": 0.5, + "learning_rate": 0.0001998908933617878, + "loss": 0.9613, + "step": 7905 + }, + { + "epoch": 0.11, + "grad_norm": 0.5859375, + "learning_rate": 0.00019988972102651675, + "loss": 1.0425, + "step": 7910 + }, + { + "epoch": 0.11, + "grad_norm": 0.609375, + "learning_rate": 0.00019988854243004422, + "loss": 0.9164, + "step": 7915 + }, + { + "epoch": 0.11, + "grad_norm": 0.609375, + "learning_rate": 0.0001998873575724441, + "loss": 0.9471, + "step": 7920 + }, + { + "epoch": 0.11, + "grad_norm": 0.490234375, + "learning_rate": 0.00019988616645379064, + "loss": 0.8745, + "step": 7925 + }, + { + "epoch": 0.11, + "grad_norm": 0.54296875, + "learning_rate": 0.0001998849690741585, + "loss": 0.8713, + "step": 7930 + }, + { + "epoch": 0.11, + "grad_norm": 0.51953125, + "learning_rate": 0.00019988376543362277, + "loss": 1.0146, + "step": 7935 + }, + { + "epoch": 0.11, + "grad_norm": 0.59375, + "learning_rate": 0.00019988255553225886, + "loss": 1.0469, + "step": 7940 + }, + { + "epoch": 0.11, + "grad_norm": 0.5234375, + "learning_rate": 0.00019988133937014256, + "loss": 1.032, + "step": 7945 + }, + { + "epoch": 0.11, + "grad_norm": 0.53125, + "learning_rate": 0.0001998801169473502, + "loss": 0.9113, + "step": 7950 + }, + { + "epoch": 0.11, + "grad_norm": 0.52734375, + "learning_rate": 0.00019987888826395836, + "loss": 1.0555, + "step": 7955 + }, + { + "epoch": 0.11, + "grad_norm": 0.466796875, + "learning_rate": 0.00019987765332004403, + "loss": 1.0353, + "step": 7960 + }, + { + "epoch": 0.11, + "grad_norm": 0.5078125, + "learning_rate": 0.00019987641211568463, + "loss": 0.8749, + "step": 7965 + }, + { + "epoch": 0.11, + "grad_norm": 0.5390625, + "learning_rate": 0.00019987516465095798, + "loss": 1.0204, + "step": 7970 + }, + { + "epoch": 0.11, + "grad_norm": 0.515625, + "learning_rate": 0.00019987391092594228, + "loss": 0.978, + "step": 7975 + }, + { + "epoch": 0.11, + "grad_norm": 0.4921875, + "learning_rate": 0.00019987265094071607, + "loss": 1.0604, + "step": 7980 + }, + { + "epoch": 0.11, + "grad_norm": 0.466796875, + "learning_rate": 0.00019987138469535837, + "loss": 0.9035, + "step": 7985 + }, + { + "epoch": 0.11, + "grad_norm": 0.5, + "learning_rate": 0.00019987011218994854, + "loss": 1.1171, + "step": 7990 + }, + { + "epoch": 0.11, + "grad_norm": 0.51171875, + "learning_rate": 0.00019986883342456633, + "loss": 0.9754, + "step": 7995 + }, + { + "epoch": 0.11, + "grad_norm": 0.578125, + "learning_rate": 0.00019986754839929188, + "loss": 1.0346, + "step": 8000 + }, + { + "epoch": 0.11, + "grad_norm": 0.486328125, + "learning_rate": 0.00019986625711420578, + "loss": 0.9792, + "step": 8005 + }, + { + "epoch": 0.11, + "grad_norm": 0.63671875, + "learning_rate": 0.00019986495956938894, + "loss": 1.1406, + "step": 8010 + }, + { + "epoch": 0.11, + "grad_norm": 0.484375, + "learning_rate": 0.00019986365576492266, + "loss": 0.9641, + "step": 8015 + }, + { + "epoch": 0.12, + "grad_norm": 0.5, + "learning_rate": 0.00019986234570088876, + "loss": 1.0433, + "step": 8020 + }, + { + "epoch": 0.12, + "grad_norm": 0.59765625, + "learning_rate": 0.0001998610293773693, + "loss": 1.013, + "step": 8025 + }, + { + "epoch": 0.12, + "grad_norm": 0.734375, + "learning_rate": 0.0001998597067944467, + "loss": 1.0205, + "step": 8030 + }, + { + "epoch": 0.12, + "grad_norm": 0.51953125, + "learning_rate": 0.00019985837795220404, + "loss": 0.8346, + "step": 8035 + }, + { + "epoch": 0.12, + "grad_norm": 0.53515625, + "learning_rate": 0.0001998570428507245, + "loss": 1.0299, + "step": 8040 + }, + { + "epoch": 0.12, + "grad_norm": 0.55859375, + "learning_rate": 0.00019985570149009177, + "loss": 0.8954, + "step": 8045 + }, + { + "epoch": 0.12, + "grad_norm": 0.53515625, + "learning_rate": 0.00019985435387038996, + "loss": 1.0256, + "step": 8050 + }, + { + "epoch": 0.12, + "grad_norm": 0.5859375, + "learning_rate": 0.0001998529999917035, + "loss": 1.1181, + "step": 8055 + }, + { + "epoch": 0.12, + "grad_norm": 0.55078125, + "learning_rate": 0.0001998516398541173, + "loss": 0.9315, + "step": 8060 + }, + { + "epoch": 0.12, + "grad_norm": 0.51953125, + "learning_rate": 0.00019985027345771657, + "loss": 1.0552, + "step": 8065 + }, + { + "epoch": 0.12, + "grad_norm": 0.5078125, + "learning_rate": 0.000199848900802587, + "loss": 1.0288, + "step": 8070 + }, + { + "epoch": 0.12, + "grad_norm": 0.5703125, + "learning_rate": 0.0001998475218888146, + "loss": 1.0476, + "step": 8075 + }, + { + "epoch": 0.12, + "grad_norm": 0.5546875, + "learning_rate": 0.00019984613671648579, + "loss": 1.0866, + "step": 8080 + }, + { + "epoch": 0.12, + "grad_norm": 0.515625, + "learning_rate": 0.00019984474528568746, + "loss": 0.9443, + "step": 8085 + }, + { + "epoch": 0.12, + "grad_norm": 0.51953125, + "learning_rate": 0.00019984334759650676, + "loss": 0.9872, + "step": 8090 + }, + { + "epoch": 0.12, + "grad_norm": 0.60546875, + "learning_rate": 0.0001998419436490313, + "loss": 0.9306, + "step": 8095 + }, + { + "epoch": 0.12, + "grad_norm": 0.58984375, + "learning_rate": 0.00019984053344334916, + "loss": 1.1319, + "step": 8100 + }, + { + "epoch": 0.12, + "grad_norm": 0.55078125, + "learning_rate": 0.00019983911697954863, + "loss": 0.9468, + "step": 8105 + }, + { + "epoch": 0.12, + "grad_norm": 0.49609375, + "learning_rate": 0.00019983769425771855, + "loss": 1.0312, + "step": 8110 + }, + { + "epoch": 0.12, + "grad_norm": 0.5546875, + "learning_rate": 0.00019983626527794808, + "loss": 1.0196, + "step": 8115 + }, + { + "epoch": 0.12, + "grad_norm": 0.51171875, + "learning_rate": 0.0001998348300403268, + "loss": 0.8862, + "step": 8120 + }, + { + "epoch": 0.12, + "grad_norm": 0.5234375, + "learning_rate": 0.00019983338854494468, + "loss": 0.8766, + "step": 8125 + }, + { + "epoch": 0.12, + "grad_norm": 0.50390625, + "learning_rate": 0.00019983194079189204, + "loss": 0.9119, + "step": 8130 + }, + { + "epoch": 0.12, + "grad_norm": 0.54296875, + "learning_rate": 0.0001998304867812597, + "loss": 1.1187, + "step": 8135 + }, + { + "epoch": 0.12, + "grad_norm": 0.51171875, + "learning_rate": 0.00019982902651313872, + "loss": 0.9005, + "step": 8140 + }, + { + "epoch": 0.12, + "grad_norm": 0.5390625, + "learning_rate": 0.00019982755998762067, + "loss": 0.9234, + "step": 8145 + }, + { + "epoch": 0.12, + "grad_norm": 0.69921875, + "learning_rate": 0.00019982608720479745, + "loss": 1.0712, + "step": 8150 + }, + { + "epoch": 0.12, + "grad_norm": 0.6015625, + "learning_rate": 0.00019982460816476142, + "loss": 0.8635, + "step": 8155 + }, + { + "epoch": 0.12, + "grad_norm": 0.57421875, + "learning_rate": 0.00019982312286760524, + "loss": 1.1264, + "step": 8160 + }, + { + "epoch": 0.12, + "grad_norm": 0.5234375, + "learning_rate": 0.00019982163131342201, + "loss": 0.9491, + "step": 8165 + }, + { + "epoch": 0.12, + "grad_norm": 0.57421875, + "learning_rate": 0.0001998201335023053, + "loss": 0.956, + "step": 8170 + }, + { + "epoch": 0.12, + "grad_norm": 0.443359375, + "learning_rate": 0.00019981862943434887, + "loss": 0.983, + "step": 8175 + }, + { + "epoch": 0.12, + "grad_norm": 0.474609375, + "learning_rate": 0.00019981711910964714, + "loss": 0.9417, + "step": 8180 + }, + { + "epoch": 0.12, + "grad_norm": 0.5625, + "learning_rate": 0.00019981560252829464, + "loss": 1.0731, + "step": 8185 + }, + { + "epoch": 0.12, + "grad_norm": 0.486328125, + "learning_rate": 0.00019981407969038651, + "loss": 0.9941, + "step": 8190 + }, + { + "epoch": 0.12, + "grad_norm": 0.58203125, + "learning_rate": 0.0001998125505960182, + "loss": 0.935, + "step": 8195 + }, + { + "epoch": 0.12, + "grad_norm": 0.55078125, + "learning_rate": 0.00019981101524528552, + "loss": 0.8414, + "step": 8200 + }, + { + "epoch": 0.12, + "grad_norm": 0.51171875, + "learning_rate": 0.00019980947363828472, + "loss": 1.0664, + "step": 8205 + }, + { + "epoch": 0.12, + "grad_norm": 0.51953125, + "learning_rate": 0.00019980792577511247, + "loss": 0.88, + "step": 8210 + }, + { + "epoch": 0.12, + "grad_norm": 0.5625, + "learning_rate": 0.00019980637165586574, + "loss": 0.9858, + "step": 8215 + }, + { + "epoch": 0.12, + "grad_norm": 0.54296875, + "learning_rate": 0.00019980481128064196, + "loss": 0.9296, + "step": 8220 + }, + { + "epoch": 0.12, + "grad_norm": 0.462890625, + "learning_rate": 0.00019980324464953896, + "loss": 1.0408, + "step": 8225 + }, + { + "epoch": 0.12, + "grad_norm": 0.55859375, + "learning_rate": 0.0001998016717626549, + "loss": 1.1235, + "step": 8230 + }, + { + "epoch": 0.12, + "grad_norm": 0.54296875, + "learning_rate": 0.00019980009262008844, + "loss": 0.8829, + "step": 8235 + }, + { + "epoch": 0.12, + "grad_norm": 0.5, + "learning_rate": 0.00019979850722193847, + "loss": 1.0015, + "step": 8240 + }, + { + "epoch": 0.12, + "grad_norm": 0.494140625, + "learning_rate": 0.0001997969155683044, + "loss": 0.8638, + "step": 8245 + }, + { + "epoch": 0.12, + "grad_norm": 0.56640625, + "learning_rate": 0.000199795317659286, + "loss": 0.8831, + "step": 8250 + }, + { + "epoch": 0.12, + "grad_norm": 0.66796875, + "learning_rate": 0.00019979371349498345, + "loss": 0.8855, + "step": 8255 + }, + { + "epoch": 0.12, + "grad_norm": 0.5, + "learning_rate": 0.00019979210307549726, + "loss": 0.9473, + "step": 8260 + }, + { + "epoch": 0.12, + "grad_norm": 0.59375, + "learning_rate": 0.00019979048640092844, + "loss": 0.919, + "step": 8265 + }, + { + "epoch": 0.12, + "grad_norm": 0.515625, + "learning_rate": 0.00019978886347137825, + "loss": 0.9902, + "step": 8270 + }, + { + "epoch": 0.12, + "grad_norm": 0.54296875, + "learning_rate": 0.00019978723428694845, + "loss": 0.9017, + "step": 8275 + }, + { + "epoch": 0.12, + "grad_norm": 0.498046875, + "learning_rate": 0.00019978559884774116, + "loss": 0.8228, + "step": 8280 + }, + { + "epoch": 0.12, + "grad_norm": 0.5390625, + "learning_rate": 0.0001997839571538589, + "loss": 0.9621, + "step": 8285 + }, + { + "epoch": 0.12, + "grad_norm": 0.51953125, + "learning_rate": 0.00019978230920540452, + "loss": 0.9191, + "step": 8290 + }, + { + "epoch": 0.12, + "grad_norm": 0.6015625, + "learning_rate": 0.00019978065500248139, + "loss": 1.051, + "step": 8295 + }, + { + "epoch": 0.12, + "grad_norm": 0.5390625, + "learning_rate": 0.00019977899454519315, + "loss": 0.8845, + "step": 8300 + }, + { + "epoch": 0.12, + "grad_norm": 0.58984375, + "learning_rate": 0.0001997773278336439, + "loss": 1.0515, + "step": 8305 + }, + { + "epoch": 0.12, + "grad_norm": 0.470703125, + "learning_rate": 0.0001997756548679381, + "loss": 0.9412, + "step": 8310 + }, + { + "epoch": 0.12, + "grad_norm": 0.5390625, + "learning_rate": 0.00019977397564818064, + "loss": 1.0884, + "step": 8315 + }, + { + "epoch": 0.12, + "grad_norm": 0.51953125, + "learning_rate": 0.00019977229017447673, + "loss": 1.1093, + "step": 8320 + }, + { + "epoch": 0.12, + "grad_norm": 0.56640625, + "learning_rate": 0.00019977059844693206, + "loss": 0.9655, + "step": 8325 + }, + { + "epoch": 0.12, + "grad_norm": 0.54296875, + "learning_rate": 0.0001997689004656526, + "loss": 0.9096, + "step": 8330 + }, + { + "epoch": 0.12, + "grad_norm": 0.515625, + "learning_rate": 0.0001997671962307449, + "loss": 1.0204, + "step": 8335 + }, + { + "epoch": 0.12, + "grad_norm": 0.5, + "learning_rate": 0.0001997654857423157, + "loss": 0.909, + "step": 8340 + }, + { + "epoch": 0.12, + "grad_norm": 0.54296875, + "learning_rate": 0.0001997637690004722, + "loss": 0.966, + "step": 8345 + }, + { + "epoch": 0.12, + "grad_norm": 0.494140625, + "learning_rate": 0.00019976204600532206, + "loss": 1.0793, + "step": 8350 + }, + { + "epoch": 0.12, + "grad_norm": 0.734375, + "learning_rate": 0.00019976031675697322, + "loss": 0.8746, + "step": 8355 + }, + { + "epoch": 0.12, + "grad_norm": 0.546875, + "learning_rate": 0.00019975858125553415, + "loss": 1.0465, + "step": 8360 + }, + { + "epoch": 0.12, + "grad_norm": 0.51171875, + "learning_rate": 0.00019975683950111358, + "loss": 0.8873, + "step": 8365 + }, + { + "epoch": 0.12, + "grad_norm": 0.59375, + "learning_rate": 0.0001997550914938207, + "loss": 0.9766, + "step": 8370 + }, + { + "epoch": 0.12, + "grad_norm": 0.474609375, + "learning_rate": 0.00019975333723376505, + "loss": 0.8231, + "step": 8375 + }, + { + "epoch": 0.12, + "grad_norm": 0.458984375, + "learning_rate": 0.00019975157672105665, + "loss": 0.8231, + "step": 8380 + }, + { + "epoch": 0.12, + "grad_norm": 0.5078125, + "learning_rate": 0.00019974980995580578, + "loss": 0.9867, + "step": 8385 + }, + { + "epoch": 0.12, + "grad_norm": 0.51953125, + "learning_rate": 0.00019974803693812324, + "loss": 0.91, + "step": 8390 + }, + { + "epoch": 0.12, + "grad_norm": 0.546875, + "learning_rate": 0.00019974625766812013, + "loss": 0.9998, + "step": 8395 + }, + { + "epoch": 0.12, + "grad_norm": 0.50390625, + "learning_rate": 0.000199744472145908, + "loss": 0.9511, + "step": 8400 + }, + { + "epoch": 0.12, + "grad_norm": 0.4921875, + "learning_rate": 0.00019974268037159873, + "loss": 1.0058, + "step": 8405 + }, + { + "epoch": 0.12, + "grad_norm": 0.5234375, + "learning_rate": 0.00019974088234530468, + "loss": 1.0336, + "step": 8410 + }, + { + "epoch": 0.12, + "grad_norm": 0.75390625, + "learning_rate": 0.00019973907806713852, + "loss": 1.1555, + "step": 8415 + }, + { + "epoch": 0.12, + "grad_norm": 0.5234375, + "learning_rate": 0.00019973726753721335, + "loss": 0.99, + "step": 8420 + }, + { + "epoch": 0.12, + "grad_norm": 0.4765625, + "learning_rate": 0.00019973545075564268, + "loss": 0.9725, + "step": 8425 + }, + { + "epoch": 0.12, + "grad_norm": 0.51953125, + "learning_rate": 0.00019973362772254035, + "loss": 0.9521, + "step": 8430 + }, + { + "epoch": 0.12, + "grad_norm": 0.44921875, + "learning_rate": 0.00019973179843802064, + "loss": 0.9352, + "step": 8435 + }, + { + "epoch": 0.12, + "grad_norm": 0.5859375, + "learning_rate": 0.00019972996290219824, + "loss": 0.8785, + "step": 8440 + }, + { + "epoch": 0.12, + "grad_norm": 0.53125, + "learning_rate": 0.00019972812111518817, + "loss": 0.9064, + "step": 8445 + }, + { + "epoch": 0.12, + "grad_norm": 0.51953125, + "learning_rate": 0.0001997262730771059, + "loss": 0.9741, + "step": 8450 + }, + { + "epoch": 0.12, + "grad_norm": 0.5625, + "learning_rate": 0.00019972441878806721, + "loss": 1.0906, + "step": 8455 + }, + { + "epoch": 0.12, + "grad_norm": 0.498046875, + "learning_rate": 0.00019972255824818845, + "loss": 0.9678, + "step": 8460 + }, + { + "epoch": 0.12, + "grad_norm": 0.61328125, + "learning_rate": 0.00019972069145758609, + "loss": 1.0063, + "step": 8465 + }, + { + "epoch": 0.12, + "grad_norm": 0.5703125, + "learning_rate": 0.00019971881841637727, + "loss": 0.9492, + "step": 8470 + }, + { + "epoch": 0.12, + "grad_norm": 0.53125, + "learning_rate": 0.00019971693912467932, + "loss": 1.1059, + "step": 8475 + }, + { + "epoch": 0.12, + "grad_norm": 0.546875, + "learning_rate": 0.00019971505358261005, + "loss": 0.866, + "step": 8480 + }, + { + "epoch": 0.12, + "grad_norm": 0.51171875, + "learning_rate": 0.00019971316179028765, + "loss": 0.9676, + "step": 8485 + }, + { + "epoch": 0.12, + "grad_norm": 0.51171875, + "learning_rate": 0.00019971126374783074, + "loss": 0.8942, + "step": 8490 + }, + { + "epoch": 0.12, + "grad_norm": 0.46484375, + "learning_rate": 0.00019970935945535823, + "loss": 0.955, + "step": 8495 + }, + { + "epoch": 0.12, + "grad_norm": 0.515625, + "learning_rate": 0.00019970744891298952, + "loss": 1.0123, + "step": 8500 + }, + { + "epoch": 0.12, + "grad_norm": 0.515625, + "learning_rate": 0.00019970553212084435, + "loss": 1.1463, + "step": 8505 + }, + { + "epoch": 0.12, + "grad_norm": 0.466796875, + "learning_rate": 0.00019970360907904287, + "loss": 0.8798, + "step": 8510 + }, + { + "epoch": 0.12, + "grad_norm": 0.5703125, + "learning_rate": 0.0001997016797877056, + "loss": 0.9562, + "step": 8515 + }, + { + "epoch": 0.12, + "grad_norm": 0.5703125, + "learning_rate": 0.00019969974424695352, + "loss": 0.9939, + "step": 8520 + }, + { + "epoch": 0.12, + "grad_norm": 0.486328125, + "learning_rate": 0.00019969780245690792, + "loss": 0.9863, + "step": 8525 + }, + { + "epoch": 0.12, + "grad_norm": 0.4609375, + "learning_rate": 0.00019969585441769052, + "loss": 0.9419, + "step": 8530 + }, + { + "epoch": 0.12, + "grad_norm": 0.5703125, + "learning_rate": 0.0001996939001294234, + "loss": 0.9845, + "step": 8535 + }, + { + "epoch": 0.12, + "grad_norm": 0.640625, + "learning_rate": 0.0001996919395922291, + "loss": 0.9737, + "step": 8540 + }, + { + "epoch": 0.12, + "grad_norm": 0.486328125, + "learning_rate": 0.00019968997280623047, + "loss": 0.9785, + "step": 8545 + }, + { + "epoch": 0.12, + "grad_norm": 0.55859375, + "learning_rate": 0.00019968799977155083, + "loss": 0.963, + "step": 8550 + }, + { + "epoch": 0.12, + "grad_norm": 0.5859375, + "learning_rate": 0.0001996860204883138, + "loss": 0.8494, + "step": 8555 + }, + { + "epoch": 0.12, + "grad_norm": 0.609375, + "learning_rate": 0.00019968403495664348, + "loss": 0.8966, + "step": 8560 + }, + { + "epoch": 0.12, + "grad_norm": 0.625, + "learning_rate": 0.00019968204317666436, + "loss": 0.8522, + "step": 8565 + }, + { + "epoch": 0.12, + "grad_norm": 0.5078125, + "learning_rate": 0.0001996800451485012, + "loss": 1.0312, + "step": 8570 + }, + { + "epoch": 0.12, + "grad_norm": 0.5703125, + "learning_rate": 0.00019967804087227928, + "loss": 1.0109, + "step": 8575 + }, + { + "epoch": 0.12, + "grad_norm": 0.625, + "learning_rate": 0.00019967603034812425, + "loss": 0.8581, + "step": 8580 + }, + { + "epoch": 0.12, + "grad_norm": 0.5390625, + "learning_rate": 0.0001996740135761621, + "loss": 0.9733, + "step": 8585 + }, + { + "epoch": 0.12, + "grad_norm": 0.5703125, + "learning_rate": 0.00019967199055651928, + "loss": 0.9642, + "step": 8590 + }, + { + "epoch": 0.12, + "grad_norm": 0.5390625, + "learning_rate": 0.00019966996128932256, + "loss": 0.9924, + "step": 8595 + }, + { + "epoch": 0.12, + "grad_norm": 0.44140625, + "learning_rate": 0.00019966792577469917, + "loss": 0.7756, + "step": 8600 + }, + { + "epoch": 0.12, + "grad_norm": 0.515625, + "learning_rate": 0.00019966588401277666, + "loss": 0.9274, + "step": 8605 + }, + { + "epoch": 0.12, + "grad_norm": 0.494140625, + "learning_rate": 0.00019966383600368307, + "loss": 1.006, + "step": 8610 + }, + { + "epoch": 0.12, + "grad_norm": 0.482421875, + "learning_rate": 0.00019966178174754667, + "loss": 0.9224, + "step": 8615 + }, + { + "epoch": 0.12, + "grad_norm": 0.453125, + "learning_rate": 0.00019965972124449634, + "loss": 0.8353, + "step": 8620 + }, + { + "epoch": 0.12, + "grad_norm": 0.65625, + "learning_rate": 0.00019965765449466116, + "loss": 0.9543, + "step": 8625 + }, + { + "epoch": 0.12, + "grad_norm": 0.5703125, + "learning_rate": 0.00019965558149817069, + "loss": 0.9076, + "step": 8630 + }, + { + "epoch": 0.12, + "grad_norm": 0.56640625, + "learning_rate": 0.00019965350225515486, + "loss": 0.9186, + "step": 8635 + }, + { + "epoch": 0.12, + "grad_norm": 0.58203125, + "learning_rate": 0.00019965141676574403, + "loss": 1.0729, + "step": 8640 + }, + { + "epoch": 0.12, + "grad_norm": 0.54296875, + "learning_rate": 0.00019964932503006893, + "loss": 1.0694, + "step": 8645 + }, + { + "epoch": 0.12, + "grad_norm": 0.55078125, + "learning_rate": 0.0001996472270482606, + "loss": 0.9504, + "step": 8650 + }, + { + "epoch": 0.12, + "grad_norm": 0.50390625, + "learning_rate": 0.0001996451228204506, + "loss": 0.9101, + "step": 8655 + }, + { + "epoch": 0.12, + "grad_norm": 0.51171875, + "learning_rate": 0.00019964301234677082, + "loss": 1.0089, + "step": 8660 + }, + { + "epoch": 0.12, + "grad_norm": 0.546875, + "learning_rate": 0.00019964089562735356, + "loss": 1.0573, + "step": 8665 + }, + { + "epoch": 0.12, + "grad_norm": 0.58984375, + "learning_rate": 0.00019963877266233147, + "loss": 0.9161, + "step": 8670 + }, + { + "epoch": 0.12, + "grad_norm": 0.52734375, + "learning_rate": 0.0001996366434518376, + "loss": 0.8975, + "step": 8675 + }, + { + "epoch": 0.12, + "grad_norm": 0.48046875, + "learning_rate": 0.0001996345079960055, + "loss": 0.986, + "step": 8680 + }, + { + "epoch": 0.12, + "grad_norm": 0.54296875, + "learning_rate": 0.00019963236629496893, + "loss": 1.1205, + "step": 8685 + }, + { + "epoch": 0.12, + "grad_norm": 0.58984375, + "learning_rate": 0.00019963021834886217, + "loss": 0.9446, + "step": 8690 + }, + { + "epoch": 0.12, + "grad_norm": 0.482421875, + "learning_rate": 0.00019962806415781988, + "loss": 0.8427, + "step": 8695 + }, + { + "epoch": 0.12, + "grad_norm": 0.58984375, + "learning_rate": 0.00019962590372197701, + "loss": 1.0005, + "step": 8700 + }, + { + "epoch": 0.12, + "grad_norm": 0.52734375, + "learning_rate": 0.00019962373704146907, + "loss": 1.1333, + "step": 8705 + }, + { + "epoch": 0.12, + "grad_norm": 0.515625, + "learning_rate": 0.0001996215641164318, + "loss": 1.1934, + "step": 8710 + }, + { + "epoch": 0.13, + "grad_norm": 0.57421875, + "learning_rate": 0.00019961938494700147, + "loss": 0.916, + "step": 8715 + }, + { + "epoch": 0.13, + "grad_norm": 0.51171875, + "learning_rate": 0.00019961719953331462, + "loss": 1.0266, + "step": 8720 + }, + { + "epoch": 0.13, + "grad_norm": 0.56640625, + "learning_rate": 0.00019961500787550823, + "loss": 1.1552, + "step": 8725 + }, + { + "epoch": 0.13, + "grad_norm": 0.470703125, + "learning_rate": 0.0001996128099737197, + "loss": 1.0242, + "step": 8730 + }, + { + "epoch": 0.13, + "grad_norm": 0.50390625, + "learning_rate": 0.00019961060582808683, + "loss": 1.0109, + "step": 8735 + }, + { + "epoch": 0.13, + "grad_norm": 0.59375, + "learning_rate": 0.0001996083954387477, + "loss": 1.0398, + "step": 8740 + }, + { + "epoch": 0.13, + "grad_norm": 0.53515625, + "learning_rate": 0.00019960617880584091, + "loss": 0.8758, + "step": 8745 + }, + { + "epoch": 0.13, + "grad_norm": 0.7578125, + "learning_rate": 0.0001996039559295054, + "loss": 1.0004, + "step": 8750 + }, + { + "epoch": 0.13, + "grad_norm": 0.53125, + "learning_rate": 0.00019960172680988047, + "loss": 0.9889, + "step": 8755 + }, + { + "epoch": 0.13, + "grad_norm": 0.515625, + "learning_rate": 0.00019959949144710587, + "loss": 0.8624, + "step": 8760 + }, + { + "epoch": 0.13, + "grad_norm": 0.51953125, + "learning_rate": 0.00019959724984132174, + "loss": 0.8611, + "step": 8765 + }, + { + "epoch": 0.13, + "grad_norm": 0.546875, + "learning_rate": 0.00019959500199266854, + "loss": 0.9709, + "step": 8770 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.00019959274790128716, + "loss": 1.0855, + "step": 8775 + }, + { + "epoch": 0.13, + "grad_norm": 0.6796875, + "learning_rate": 0.00019959048756731896, + "loss": 1.0042, + "step": 8780 + }, + { + "epoch": 0.13, + "grad_norm": 0.470703125, + "learning_rate": 0.00019958822099090556, + "loss": 0.9496, + "step": 8785 + }, + { + "epoch": 0.13, + "grad_norm": 0.55859375, + "learning_rate": 0.00019958594817218902, + "loss": 0.9307, + "step": 8790 + }, + { + "epoch": 0.13, + "grad_norm": 0.51171875, + "learning_rate": 0.00019958366911131188, + "loss": 0.9806, + "step": 8795 + }, + { + "epoch": 0.13, + "grad_norm": 0.5625, + "learning_rate": 0.0001995813838084169, + "loss": 0.8629, + "step": 8800 + }, + { + "epoch": 0.13, + "grad_norm": 0.5234375, + "learning_rate": 0.0001995790922636474, + "loss": 1.0448, + "step": 8805 + }, + { + "epoch": 0.13, + "grad_norm": 0.486328125, + "learning_rate": 0.00019957679447714697, + "loss": 0.9303, + "step": 8810 + }, + { + "epoch": 0.13, + "grad_norm": 0.54296875, + "learning_rate": 0.00019957449044905964, + "loss": 1.0203, + "step": 8815 + }, + { + "epoch": 0.13, + "grad_norm": 0.5546875, + "learning_rate": 0.00019957218017952987, + "loss": 0.8014, + "step": 8820 + }, + { + "epoch": 0.13, + "grad_norm": 0.546875, + "learning_rate": 0.00019956986366870242, + "loss": 1.0335, + "step": 8825 + }, + { + "epoch": 0.13, + "grad_norm": 0.7265625, + "learning_rate": 0.00019956754091672254, + "loss": 1.1559, + "step": 8830 + }, + { + "epoch": 0.13, + "grad_norm": 0.51953125, + "learning_rate": 0.0001995652119237358, + "loss": 0.9023, + "step": 8835 + }, + { + "epoch": 0.13, + "grad_norm": 0.515625, + "learning_rate": 0.00019956287668988814, + "loss": 0.9111, + "step": 8840 + }, + { + "epoch": 0.13, + "grad_norm": 0.494140625, + "learning_rate": 0.00019956053521532602, + "loss": 1.0862, + "step": 8845 + }, + { + "epoch": 0.13, + "grad_norm": 0.5546875, + "learning_rate": 0.00019955818750019613, + "loss": 1.0293, + "step": 8850 + }, + { + "epoch": 0.13, + "grad_norm": 0.59765625, + "learning_rate": 0.00019955583354464568, + "loss": 1.2116, + "step": 8855 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.0001995534733488222, + "loss": 0.877, + "step": 8860 + }, + { + "epoch": 0.13, + "grad_norm": 0.494140625, + "learning_rate": 0.00019955110691287362, + "loss": 0.9599, + "step": 8865 + }, + { + "epoch": 0.13, + "grad_norm": 0.51171875, + "learning_rate": 0.00019954873423694825, + "loss": 0.965, + "step": 8870 + }, + { + "epoch": 0.13, + "grad_norm": 0.57421875, + "learning_rate": 0.00019954635532119487, + "loss": 0.9688, + "step": 8875 + }, + { + "epoch": 0.13, + "grad_norm": 0.447265625, + "learning_rate": 0.00019954397016576258, + "loss": 0.9937, + "step": 8880 + }, + { + "epoch": 0.13, + "grad_norm": 0.5078125, + "learning_rate": 0.00019954157877080086, + "loss": 0.9031, + "step": 8885 + }, + { + "epoch": 0.13, + "grad_norm": 0.625, + "learning_rate": 0.0001995391811364596, + "loss": 1.0922, + "step": 8890 + }, + { + "epoch": 0.13, + "grad_norm": 0.486328125, + "learning_rate": 0.00019953677726288914, + "loss": 1.0146, + "step": 8895 + }, + { + "epoch": 0.13, + "grad_norm": 0.57421875, + "learning_rate": 0.00019953436715024008, + "loss": 1.1008, + "step": 8900 + }, + { + "epoch": 0.13, + "grad_norm": 0.5, + "learning_rate": 0.00019953195079866354, + "loss": 1.0151, + "step": 8905 + }, + { + "epoch": 0.13, + "grad_norm": 0.65625, + "learning_rate": 0.00019952952820831093, + "loss": 0.9264, + "step": 8910 + }, + { + "epoch": 0.13, + "grad_norm": 0.60546875, + "learning_rate": 0.0001995270993793342, + "loss": 0.9496, + "step": 8915 + }, + { + "epoch": 0.13, + "grad_norm": 0.55078125, + "learning_rate": 0.0001995246643118855, + "loss": 0.9531, + "step": 8920 + }, + { + "epoch": 0.13, + "grad_norm": 0.65234375, + "learning_rate": 0.00019952222300611747, + "loss": 0.9197, + "step": 8925 + }, + { + "epoch": 0.13, + "grad_norm": 0.5859375, + "learning_rate": 0.00019951977546218318, + "loss": 1.096, + "step": 8930 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.00019951732168023602, + "loss": 0.9807, + "step": 8935 + }, + { + "epoch": 0.13, + "grad_norm": 0.54296875, + "learning_rate": 0.00019951486166042978, + "loss": 0.9146, + "step": 8940 + }, + { + "epoch": 0.13, + "grad_norm": 0.5078125, + "learning_rate": 0.0001995123954029187, + "loss": 0.9749, + "step": 8945 + }, + { + "epoch": 0.13, + "grad_norm": 0.51953125, + "learning_rate": 0.00019950992290785732, + "loss": 0.9829, + "step": 8950 + }, + { + "epoch": 0.13, + "grad_norm": 0.5, + "learning_rate": 0.00019950744417540067, + "loss": 0.8879, + "step": 8955 + }, + { + "epoch": 0.13, + "grad_norm": 0.56640625, + "learning_rate": 0.00019950495920570408, + "loss": 0.8371, + "step": 8960 + }, + { + "epoch": 0.13, + "grad_norm": 0.5234375, + "learning_rate": 0.00019950246799892328, + "loss": 1.046, + "step": 8965 + }, + { + "epoch": 0.13, + "grad_norm": 0.5234375, + "learning_rate": 0.0001994999705552145, + "loss": 0.9569, + "step": 8970 + }, + { + "epoch": 0.13, + "grad_norm": 0.50390625, + "learning_rate": 0.00019949746687473422, + "loss": 0.9587, + "step": 8975 + }, + { + "epoch": 0.13, + "grad_norm": 0.5078125, + "learning_rate": 0.00019949495695763942, + "loss": 1.0237, + "step": 8980 + }, + { + "epoch": 0.13, + "grad_norm": 0.6015625, + "learning_rate": 0.0001994924408040874, + "loss": 1.0305, + "step": 8985 + }, + { + "epoch": 0.13, + "grad_norm": 0.5078125, + "learning_rate": 0.00019948991841423587, + "loss": 0.9636, + "step": 8990 + }, + { + "epoch": 0.13, + "grad_norm": 0.48046875, + "learning_rate": 0.00019948738978824295, + "loss": 1.0234, + "step": 8995 + }, + { + "epoch": 0.13, + "grad_norm": 0.5, + "learning_rate": 0.0001994848549262671, + "loss": 0.9714, + "step": 9000 + }, + { + "epoch": 0.13, + "grad_norm": 0.55078125, + "learning_rate": 0.0001994823138284673, + "loss": 0.9048, + "step": 9005 + }, + { + "epoch": 0.13, + "grad_norm": 0.5234375, + "learning_rate": 0.00019947976649500274, + "loss": 0.996, + "step": 9010 + }, + { + "epoch": 0.13, + "grad_norm": 0.45703125, + "learning_rate": 0.00019947721292603313, + "loss": 0.7694, + "step": 9015 + }, + { + "epoch": 0.13, + "grad_norm": 0.5703125, + "learning_rate": 0.00019947465312171846, + "loss": 0.8679, + "step": 9020 + }, + { + "epoch": 0.13, + "grad_norm": 0.5546875, + "learning_rate": 0.00019947208708221933, + "loss": 1.1444, + "step": 9025 + }, + { + "epoch": 0.13, + "grad_norm": 0.546875, + "learning_rate": 0.00019946951480769642, + "loss": 1.1269, + "step": 9030 + }, + { + "epoch": 0.13, + "grad_norm": 0.59375, + "learning_rate": 0.00019946693629831105, + "loss": 1.1152, + "step": 9035 + }, + { + "epoch": 0.13, + "grad_norm": 0.51171875, + "learning_rate": 0.00019946435155422486, + "loss": 0.9622, + "step": 9040 + }, + { + "epoch": 0.13, + "grad_norm": 0.462890625, + "learning_rate": 0.00019946176057559982, + "loss": 0.9847, + "step": 9045 + }, + { + "epoch": 0.13, + "grad_norm": 0.458984375, + "learning_rate": 0.00019945916336259833, + "loss": 0.8883, + "step": 9050 + }, + { + "epoch": 0.13, + "grad_norm": 0.55078125, + "learning_rate": 0.00019945655991538322, + "loss": 0.9141, + "step": 9055 + }, + { + "epoch": 0.13, + "grad_norm": 0.53125, + "learning_rate": 0.00019945395023411767, + "loss": 1.0952, + "step": 9060 + }, + { + "epoch": 0.13, + "grad_norm": 0.498046875, + "learning_rate": 0.00019945133431896524, + "loss": 1.006, + "step": 9065 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.0001994487121700899, + "loss": 0.9484, + "step": 9070 + }, + { + "epoch": 0.13, + "grad_norm": 0.50390625, + "learning_rate": 0.00019944608378765603, + "loss": 0.8655, + "step": 9075 + }, + { + "epoch": 0.13, + "grad_norm": 0.5, + "learning_rate": 0.0001994434491718284, + "loss": 1.0319, + "step": 9080 + }, + { + "epoch": 0.13, + "grad_norm": 0.57421875, + "learning_rate": 0.00019944080832277208, + "loss": 1.0843, + "step": 9085 + }, + { + "epoch": 0.13, + "grad_norm": 0.53125, + "learning_rate": 0.00019943816124065266, + "loss": 0.9651, + "step": 9090 + }, + { + "epoch": 0.13, + "grad_norm": 0.515625, + "learning_rate": 0.00019943550792563604, + "loss": 0.9859, + "step": 9095 + }, + { + "epoch": 0.13, + "grad_norm": 0.51953125, + "learning_rate": 0.0001994328483778885, + "loss": 1.0562, + "step": 9100 + }, + { + "epoch": 0.13, + "grad_norm": 0.51171875, + "learning_rate": 0.00019943018259757685, + "loss": 1.0792, + "step": 9105 + }, + { + "epoch": 0.13, + "grad_norm": 0.60546875, + "learning_rate": 0.00019942751058486807, + "loss": 0.9782, + "step": 9110 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.00019942483233992967, + "loss": 0.8319, + "step": 9115 + }, + { + "epoch": 0.13, + "grad_norm": 0.5078125, + "learning_rate": 0.00019942214786292957, + "loss": 1.0149, + "step": 9120 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.00019941945715403598, + "loss": 0.9469, + "step": 9125 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.00019941676021341764, + "loss": 0.9073, + "step": 9130 + }, + { + "epoch": 0.13, + "grad_norm": 0.5234375, + "learning_rate": 0.0001994140570412435, + "loss": 1.0213, + "step": 9135 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.00019941134763768305, + "loss": 1.0976, + "step": 9140 + }, + { + "epoch": 0.13, + "grad_norm": 0.5234375, + "learning_rate": 0.0001994086320029061, + "loss": 0.9995, + "step": 9145 + }, + { + "epoch": 0.13, + "grad_norm": 0.52734375, + "learning_rate": 0.0001994059101370829, + "loss": 0.9779, + "step": 9150 + }, + { + "epoch": 0.13, + "grad_norm": 0.52734375, + "learning_rate": 0.00019940318204038406, + "loss": 0.931, + "step": 9155 + }, + { + "epoch": 0.13, + "grad_norm": 0.62890625, + "learning_rate": 0.0001994004477129805, + "loss": 0.9962, + "step": 9160 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.00019939770715504373, + "loss": 0.9804, + "step": 9165 + }, + { + "epoch": 0.13, + "grad_norm": 0.546875, + "learning_rate": 0.00019939496036674542, + "loss": 1.0479, + "step": 9170 + }, + { + "epoch": 0.13, + "grad_norm": 0.65625, + "learning_rate": 0.00019939220734825784, + "loss": 1.0682, + "step": 9175 + }, + { + "epoch": 0.13, + "grad_norm": 0.48828125, + "learning_rate": 0.0001993894480997535, + "loss": 0.8179, + "step": 9180 + }, + { + "epoch": 0.13, + "grad_norm": 0.54296875, + "learning_rate": 0.0001993866826214053, + "loss": 1.1365, + "step": 9185 + }, + { + "epoch": 0.13, + "grad_norm": 0.50390625, + "learning_rate": 0.00019938391091338668, + "loss": 1.0454, + "step": 9190 + }, + { + "epoch": 0.13, + "grad_norm": 0.54296875, + "learning_rate": 0.00019938113297587133, + "loss": 0.9802, + "step": 9195 + }, + { + "epoch": 0.13, + "grad_norm": 0.5859375, + "learning_rate": 0.00019937834880903342, + "loss": 0.9689, + "step": 9200 + }, + { + "epoch": 0.13, + "grad_norm": 0.58203125, + "learning_rate": 0.00019937555841304736, + "loss": 0.9684, + "step": 9205 + }, + { + "epoch": 0.13, + "grad_norm": 0.640625, + "learning_rate": 0.00019937276178808814, + "loss": 0.9603, + "step": 9210 + }, + { + "epoch": 0.13, + "grad_norm": 0.490234375, + "learning_rate": 0.00019936995893433105, + "loss": 1.0888, + "step": 9215 + }, + { + "epoch": 0.13, + "grad_norm": 0.546875, + "learning_rate": 0.00019936714985195177, + "loss": 0.991, + "step": 9220 + }, + { + "epoch": 0.13, + "grad_norm": 0.546875, + "learning_rate": 0.00019936433454112635, + "loss": 0.9825, + "step": 9225 + }, + { + "epoch": 0.13, + "grad_norm": 0.53515625, + "learning_rate": 0.00019936151300203127, + "loss": 0.9383, + "step": 9230 + }, + { + "epoch": 0.13, + "grad_norm": 0.486328125, + "learning_rate": 0.0001993586852348434, + "loss": 0.9222, + "step": 9235 + }, + { + "epoch": 0.13, + "grad_norm": 0.625, + "learning_rate": 0.00019935585123973998, + "loss": 0.9562, + "step": 9240 + }, + { + "epoch": 0.13, + "grad_norm": 0.58203125, + "learning_rate": 0.00019935301101689864, + "loss": 1.1302, + "step": 9245 + }, + { + "epoch": 0.13, + "grad_norm": 0.5859375, + "learning_rate": 0.0001993501645664974, + "loss": 0.8784, + "step": 9250 + }, + { + "epoch": 0.13, + "grad_norm": 0.53125, + "learning_rate": 0.00019934731188871473, + "loss": 1.0054, + "step": 9255 + }, + { + "epoch": 0.13, + "grad_norm": 0.55078125, + "learning_rate": 0.0001993444529837294, + "loss": 0.9292, + "step": 9260 + }, + { + "epoch": 0.13, + "grad_norm": 0.54296875, + "learning_rate": 0.00019934158785172058, + "loss": 0.8246, + "step": 9265 + }, + { + "epoch": 0.13, + "grad_norm": 0.5078125, + "learning_rate": 0.00019933871649286796, + "loss": 0.8338, + "step": 9270 + }, + { + "epoch": 0.13, + "grad_norm": 0.54296875, + "learning_rate": 0.00019933583890735138, + "loss": 0.8329, + "step": 9275 + }, + { + "epoch": 0.13, + "grad_norm": 0.51171875, + "learning_rate": 0.0001993329550953513, + "loss": 1.0388, + "step": 9280 + }, + { + "epoch": 0.13, + "grad_norm": 0.490234375, + "learning_rate": 0.0001993300650570485, + "loss": 0.952, + "step": 9285 + }, + { + "epoch": 0.13, + "grad_norm": 0.671875, + "learning_rate": 0.00019932716879262404, + "loss": 1.2071, + "step": 9290 + }, + { + "epoch": 0.13, + "grad_norm": 0.52734375, + "learning_rate": 0.00019932426630225956, + "loss": 0.9085, + "step": 9295 + }, + { + "epoch": 0.13, + "grad_norm": 0.55859375, + "learning_rate": 0.00019932135758613694, + "loss": 0.9346, + "step": 9300 + }, + { + "epoch": 0.13, + "grad_norm": 0.494140625, + "learning_rate": 0.0001993184426444385, + "loss": 0.9858, + "step": 9305 + }, + { + "epoch": 0.13, + "grad_norm": 0.55078125, + "learning_rate": 0.00019931552147734697, + "loss": 1.0675, + "step": 9310 + }, + { + "epoch": 0.13, + "grad_norm": 0.5546875, + "learning_rate": 0.00019931259408504545, + "loss": 1.0765, + "step": 9315 + }, + { + "epoch": 0.13, + "grad_norm": 0.484375, + "learning_rate": 0.0001993096604677174, + "loss": 1.045, + "step": 9320 + }, + { + "epoch": 0.13, + "grad_norm": 0.53515625, + "learning_rate": 0.00019930672062554674, + "loss": 0.7794, + "step": 9325 + }, + { + "epoch": 0.13, + "grad_norm": 0.5859375, + "learning_rate": 0.00019930377455871771, + "loss": 0.8622, + "step": 9330 + }, + { + "epoch": 0.13, + "grad_norm": 0.5625, + "learning_rate": 0.00019930082226741504, + "loss": 0.9834, + "step": 9335 + }, + { + "epoch": 0.13, + "grad_norm": 0.62109375, + "learning_rate": 0.0001992978637518237, + "loss": 0.9345, + "step": 9340 + }, + { + "epoch": 0.13, + "grad_norm": 0.55859375, + "learning_rate": 0.00019929489901212918, + "loss": 0.9969, + "step": 9345 + }, + { + "epoch": 0.13, + "grad_norm": 0.578125, + "learning_rate": 0.0001992919280485173, + "loss": 1.1695, + "step": 9350 + }, + { + "epoch": 0.13, + "grad_norm": 0.578125, + "learning_rate": 0.0001992889508611743, + "loss": 1.2468, + "step": 9355 + }, + { + "epoch": 0.13, + "grad_norm": 0.5703125, + "learning_rate": 0.00019928596745028677, + "loss": 1.0997, + "step": 9360 + }, + { + "epoch": 0.13, + "grad_norm": 0.56640625, + "learning_rate": 0.0001992829778160417, + "loss": 0.8832, + "step": 9365 + }, + { + "epoch": 0.13, + "grad_norm": 0.59765625, + "learning_rate": 0.0001992799819586265, + "loss": 1.0756, + "step": 9370 + }, + { + "epoch": 0.13, + "grad_norm": 0.515625, + "learning_rate": 0.000199276979878229, + "loss": 0.8576, + "step": 9375 + }, + { + "epoch": 0.13, + "grad_norm": 0.59765625, + "learning_rate": 0.0001992739715750373, + "loss": 1.2125, + "step": 9380 + }, + { + "epoch": 0.13, + "grad_norm": 0.57421875, + "learning_rate": 0.00019927095704924005, + "loss": 0.8716, + "step": 9385 + }, + { + "epoch": 0.13, + "grad_norm": 0.515625, + "learning_rate": 0.00019926793630102612, + "loss": 1.0045, + "step": 9390 + }, + { + "epoch": 0.13, + "grad_norm": 0.52734375, + "learning_rate": 0.0001992649093305849, + "loss": 0.9902, + "step": 9395 + }, + { + "epoch": 0.13, + "grad_norm": 0.5234375, + "learning_rate": 0.00019926187613810608, + "loss": 0.9739, + "step": 9400 + }, + { + "epoch": 0.13, + "grad_norm": 0.5390625, + "learning_rate": 0.00019925883672377982, + "loss": 0.9858, + "step": 9405 + }, + { + "epoch": 0.13, + "grad_norm": 0.56640625, + "learning_rate": 0.00019925579108779663, + "loss": 1.0679, + "step": 9410 + }, + { + "epoch": 0.14, + "grad_norm": 0.53515625, + "learning_rate": 0.00019925273923034742, + "loss": 0.9112, + "step": 9415 + }, + { + "epoch": 0.14, + "grad_norm": 0.57421875, + "learning_rate": 0.00019924968115162347, + "loss": 0.9754, + "step": 9420 + }, + { + "epoch": 0.14, + "grad_norm": 0.498046875, + "learning_rate": 0.0001992466168518165, + "loss": 0.8947, + "step": 9425 + }, + { + "epoch": 0.14, + "grad_norm": 0.53125, + "learning_rate": 0.00019924354633111852, + "loss": 1.0981, + "step": 9430 + }, + { + "epoch": 0.14, + "grad_norm": 0.53125, + "learning_rate": 0.00019924046958972206, + "loss": 0.8397, + "step": 9435 + }, + { + "epoch": 0.14, + "grad_norm": 0.478515625, + "learning_rate": 0.00019923738662781988, + "loss": 0.7539, + "step": 9440 + }, + { + "epoch": 0.14, + "grad_norm": 0.484375, + "learning_rate": 0.00019923429744560532, + "loss": 0.8619, + "step": 9445 + }, + { + "epoch": 0.14, + "grad_norm": 0.5390625, + "learning_rate": 0.000199231202043272, + "loss": 0.9305, + "step": 9450 + }, + { + "epoch": 0.14, + "grad_norm": 0.54296875, + "learning_rate": 0.00019922810042101387, + "loss": 0.8837, + "step": 9455 + }, + { + "epoch": 0.14, + "grad_norm": 0.486328125, + "learning_rate": 0.00019922499257902544, + "loss": 1.0377, + "step": 9460 + }, + { + "epoch": 0.14, + "grad_norm": 0.5546875, + "learning_rate": 0.00019922187851750144, + "loss": 1.0431, + "step": 9465 + }, + { + "epoch": 0.14, + "grad_norm": 0.48828125, + "learning_rate": 0.0001992187582366371, + "loss": 1.0584, + "step": 9470 + }, + { + "epoch": 0.14, + "grad_norm": 0.51953125, + "learning_rate": 0.000199215631736628, + "loss": 0.913, + "step": 9475 + }, + { + "epoch": 0.14, + "grad_norm": 0.5859375, + "learning_rate": 0.0001992124990176701, + "loss": 0.8658, + "step": 9480 + }, + { + "epoch": 0.14, + "grad_norm": 0.474609375, + "learning_rate": 0.0001992093600799598, + "loss": 1.0133, + "step": 9485 + }, + { + "epoch": 0.14, + "grad_norm": 0.5, + "learning_rate": 0.00019920621492369375, + "loss": 0.9382, + "step": 9490 + }, + { + "epoch": 0.14, + "grad_norm": 0.57421875, + "learning_rate": 0.0001992030635490692, + "loss": 0.9738, + "step": 9495 + }, + { + "epoch": 0.14, + "grad_norm": 0.52734375, + "learning_rate": 0.00019919990595628363, + "loss": 1.0165, + "step": 9500 + }, + { + "epoch": 0.14, + "grad_norm": 0.482421875, + "learning_rate": 0.000199196742145535, + "loss": 0.8768, + "step": 9505 + }, + { + "epoch": 0.14, + "grad_norm": 0.5, + "learning_rate": 0.00019919357211702158, + "loss": 0.8994, + "step": 9510 + }, + { + "epoch": 0.14, + "grad_norm": 0.58203125, + "learning_rate": 0.00019919039587094211, + "loss": 0.9562, + "step": 9515 + }, + { + "epoch": 0.14, + "grad_norm": 0.5390625, + "learning_rate": 0.00019918721340749564, + "loss": 0.9853, + "step": 9520 + }, + { + "epoch": 0.14, + "grad_norm": 0.51953125, + "learning_rate": 0.00019918402472688166, + "loss": 1.0501, + "step": 9525 + }, + { + "epoch": 0.14, + "grad_norm": 0.5, + "learning_rate": 0.00019918082982930008, + "loss": 1.0426, + "step": 9530 + }, + { + "epoch": 0.14, + "grad_norm": 0.53125, + "learning_rate": 0.00019917762871495112, + "loss": 0.9506, + "step": 9535 + }, + { + "epoch": 0.14, + "grad_norm": 0.51953125, + "learning_rate": 0.00019917442138403543, + "loss": 0.9277, + "step": 9540 + }, + { + "epoch": 0.14, + "grad_norm": 0.58984375, + "learning_rate": 0.000199171207836754, + "loss": 1.0632, + "step": 9545 + }, + { + "epoch": 0.14, + "grad_norm": 0.51171875, + "learning_rate": 0.00019916798807330838, + "loss": 0.9718, + "step": 9550 + }, + { + "epoch": 0.14, + "grad_norm": 0.6171875, + "learning_rate": 0.00019916476209390035, + "loss": 0.9782, + "step": 9555 + }, + { + "epoch": 0.14, + "grad_norm": 0.484375, + "learning_rate": 0.00019916152989873204, + "loss": 0.9661, + "step": 9560 + }, + { + "epoch": 0.14, + "grad_norm": 0.58203125, + "learning_rate": 0.00019915829148800613, + "loss": 0.8995, + "step": 9565 + }, + { + "epoch": 0.14, + "grad_norm": 0.66796875, + "learning_rate": 0.00019915504686192557, + "loss": 0.9292, + "step": 9570 + }, + { + "epoch": 0.14, + "grad_norm": 0.50390625, + "learning_rate": 0.00019915179602069373, + "loss": 0.9884, + "step": 9575 + }, + { + "epoch": 0.14, + "grad_norm": 0.5546875, + "learning_rate": 0.0001991485389645144, + "loss": 0.9105, + "step": 9580 + }, + { + "epoch": 0.14, + "grad_norm": 0.49609375, + "learning_rate": 0.00019914527569359173, + "loss": 1.0626, + "step": 9585 + }, + { + "epoch": 0.14, + "grad_norm": 0.5625, + "learning_rate": 0.00019914200620813025, + "loss": 1.0535, + "step": 9590 + }, + { + "epoch": 0.14, + "grad_norm": 0.55859375, + "learning_rate": 0.00019913873050833494, + "loss": 1.0736, + "step": 9595 + }, + { + "epoch": 0.14, + "grad_norm": 0.5546875, + "learning_rate": 0.00019913544859441107, + "loss": 0.8547, + "step": 9600 + }, + { + "epoch": 0.14, + "grad_norm": 0.69140625, + "learning_rate": 0.00019913216046656436, + "loss": 1.1305, + "step": 9605 + }, + { + "epoch": 0.14, + "grad_norm": 0.54296875, + "learning_rate": 0.00019912886612500095, + "loss": 1.0175, + "step": 9610 + }, + { + "epoch": 0.14, + "grad_norm": 0.515625, + "learning_rate": 0.00019912556556992732, + "loss": 0.9938, + "step": 9615 + }, + { + "epoch": 0.14, + "grad_norm": 0.53125, + "learning_rate": 0.00019912225880155032, + "loss": 0.8953, + "step": 9620 + }, + { + "epoch": 0.14, + "grad_norm": 0.52734375, + "learning_rate": 0.0001991189458200773, + "loss": 0.9436, + "step": 9625 + }, + { + "epoch": 0.14, + "grad_norm": 0.5859375, + "learning_rate": 0.00019911562662571581, + "loss": 1.0913, + "step": 9630 + }, + { + "epoch": 0.14, + "grad_norm": 0.5703125, + "learning_rate": 0.00019911230121867396, + "loss": 0.9126, + "step": 9635 + }, + { + "epoch": 0.14, + "grad_norm": 0.4921875, + "learning_rate": 0.00019910896959916024, + "loss": 1.0492, + "step": 9640 + }, + { + "epoch": 0.14, + "grad_norm": 0.51953125, + "learning_rate": 0.0001991056317673834, + "loss": 1.0146, + "step": 9645 + }, + { + "epoch": 0.14, + "grad_norm": 0.53515625, + "learning_rate": 0.00019910228772355268, + "loss": 0.9471, + "step": 9650 + }, + { + "epoch": 0.14, + "grad_norm": 0.69140625, + "learning_rate": 0.00019909893746787772, + "loss": 0.9236, + "step": 9655 + }, + { + "epoch": 0.14, + "grad_norm": 0.51953125, + "learning_rate": 0.00019909558100056847, + "loss": 0.8822, + "step": 9660 + }, + { + "epoch": 0.14, + "grad_norm": 0.53515625, + "learning_rate": 0.00019909221832183538, + "loss": 1.0455, + "step": 9665 + }, + { + "epoch": 0.14, + "grad_norm": 0.53515625, + "learning_rate": 0.00019908884943188915, + "loss": 1.0162, + "step": 9670 + }, + { + "epoch": 0.14, + "grad_norm": 0.53515625, + "learning_rate": 0.000199085474330941, + "loss": 0.9446, + "step": 9675 + }, + { + "epoch": 0.14, + "grad_norm": 0.51953125, + "learning_rate": 0.00019908209301920248, + "loss": 0.9998, + "step": 9680 + }, + { + "epoch": 0.14, + "grad_norm": 0.55859375, + "learning_rate": 0.0001990787054968855, + "loss": 0.977, + "step": 9685 + }, + { + "epoch": 0.14, + "grad_norm": 0.57421875, + "learning_rate": 0.00019907531176420245, + "loss": 0.9763, + "step": 9690 + }, + { + "epoch": 0.14, + "grad_norm": 0.5234375, + "learning_rate": 0.00019907191182136596, + "loss": 0.7974, + "step": 9695 + }, + { + "epoch": 0.14, + "grad_norm": 0.5703125, + "learning_rate": 0.00019906850566858928, + "loss": 0.9493, + "step": 9700 + }, + { + "epoch": 0.14, + "grad_norm": 0.53125, + "learning_rate": 0.0001990650933060858, + "loss": 0.8047, + "step": 9705 + }, + { + "epoch": 0.14, + "grad_norm": 0.5859375, + "learning_rate": 0.00019906167473406945, + "loss": 0.9946, + "step": 9710 + }, + { + "epoch": 0.14, + "grad_norm": 0.52734375, + "learning_rate": 0.00019905824995275452, + "loss": 0.928, + "step": 9715 + }, + { + "epoch": 0.14, + "grad_norm": 0.54296875, + "learning_rate": 0.00019905481896235566, + "loss": 1.0442, + "step": 9720 + }, + { + "epoch": 0.14, + "grad_norm": 0.498046875, + "learning_rate": 0.00019905138176308791, + "loss": 0.8989, + "step": 9725 + }, + { + "epoch": 0.14, + "grad_norm": 0.56640625, + "learning_rate": 0.00019904793835516676, + "loss": 1.0771, + "step": 9730 + }, + { + "epoch": 0.14, + "grad_norm": 0.515625, + "learning_rate": 0.000199044488738808, + "loss": 1.0894, + "step": 9735 + }, + { + "epoch": 0.14, + "grad_norm": 0.55859375, + "learning_rate": 0.0001990410329142279, + "loss": 1.1339, + "step": 9740 + }, + { + "epoch": 0.14, + "grad_norm": 0.55859375, + "learning_rate": 0.00019903757088164306, + "loss": 0.8538, + "step": 9745 + }, + { + "epoch": 0.14, + "grad_norm": 0.6015625, + "learning_rate": 0.0001990341026412705, + "loss": 1.1117, + "step": 9750 + }, + { + "epoch": 0.14, + "grad_norm": 0.5, + "learning_rate": 0.0001990306281933276, + "loss": 0.9435, + "step": 9755 + }, + { + "epoch": 0.14, + "grad_norm": 0.578125, + "learning_rate": 0.00019902714753803212, + "loss": 1.0701, + "step": 9760 + }, + { + "epoch": 0.14, + "grad_norm": 0.6484375, + "learning_rate": 0.00019902366067560222, + "loss": 1.0426, + "step": 9765 + }, + { + "epoch": 0.14, + "grad_norm": 0.5546875, + "learning_rate": 0.00019902016760625654, + "loss": 1.1211, + "step": 9770 + }, + { + "epoch": 0.14, + "grad_norm": 0.5, + "learning_rate": 0.00019901666833021397, + "loss": 0.9565, + "step": 9775 + }, + { + "epoch": 0.14, + "grad_norm": 0.546875, + "learning_rate": 0.00019901316284769385, + "loss": 1.0333, + "step": 9780 + }, + { + "epoch": 0.14, + "grad_norm": 0.59375, + "learning_rate": 0.0001990096511589159, + "loss": 1.0048, + "step": 9785 + }, + { + "epoch": 0.14, + "grad_norm": 0.609375, + "learning_rate": 0.00019900613326410027, + "loss": 1.0639, + "step": 9790 + }, + { + "epoch": 0.14, + "grad_norm": 0.5859375, + "learning_rate": 0.00019900260916346743, + "loss": 1.0796, + "step": 9795 + }, + { + "epoch": 0.14, + "grad_norm": 0.5390625, + "learning_rate": 0.00019899907885723834, + "loss": 0.9319, + "step": 9800 + }, + { + "epoch": 0.14, + "grad_norm": 0.56640625, + "learning_rate": 0.0001989955423456342, + "loss": 0.9127, + "step": 9805 + }, + { + "epoch": 0.14, + "grad_norm": 0.546875, + "learning_rate": 0.00019899199962887672, + "loss": 0.9493, + "step": 9810 + }, + { + "epoch": 0.14, + "grad_norm": 0.53125, + "learning_rate": 0.00019898845070718797, + "loss": 0.9051, + "step": 9815 + }, + { + "epoch": 0.14, + "grad_norm": 0.56640625, + "learning_rate": 0.00019898489558079039, + "loss": 0.9273, + "step": 9820 + }, + { + "epoch": 0.14, + "grad_norm": 0.49609375, + "learning_rate": 0.00019898133424990682, + "loss": 0.9571, + "step": 9825 + }, + { + "epoch": 0.14, + "grad_norm": 0.5078125, + "learning_rate": 0.0001989777667147605, + "loss": 0.9934, + "step": 9830 + }, + { + "epoch": 0.14, + "grad_norm": 0.56640625, + "learning_rate": 0.00019897419297557504, + "loss": 0.9298, + "step": 9835 + }, + { + "epoch": 0.14, + "grad_norm": 0.55859375, + "learning_rate": 0.0001989706130325744, + "loss": 0.9945, + "step": 9840 + }, + { + "epoch": 0.14, + "grad_norm": 0.58203125, + "learning_rate": 0.00019896702688598306, + "loss": 1.0029, + "step": 9845 + }, + { + "epoch": 0.14, + "grad_norm": 0.61328125, + "learning_rate": 0.00019896343453602576, + "loss": 0.9952, + "step": 9850 + }, + { + "epoch": 0.14, + "grad_norm": 0.54296875, + "learning_rate": 0.00019895983598292762, + "loss": 0.9123, + "step": 9855 + }, + { + "epoch": 0.14, + "grad_norm": 0.53515625, + "learning_rate": 0.0001989562312269143, + "loss": 0.9369, + "step": 9860 + }, + { + "epoch": 0.14, + "grad_norm": 0.6640625, + "learning_rate": 0.0001989526202682117, + "loss": 1.1773, + "step": 9865 + }, + { + "epoch": 0.14, + "grad_norm": 0.55859375, + "learning_rate": 0.0001989490031070462, + "loss": 0.9184, + "step": 9870 + }, + { + "epoch": 0.14, + "grad_norm": 0.546875, + "learning_rate": 0.00019894537974364442, + "loss": 0.9259, + "step": 9875 + }, + { + "epoch": 0.14, + "grad_norm": 0.59765625, + "learning_rate": 0.00019894175017823358, + "loss": 0.9172, + "step": 9880 + }, + { + "epoch": 0.14, + "grad_norm": 0.515625, + "learning_rate": 0.00019893811441104115, + "loss": 0.9146, + "step": 9885 + }, + { + "epoch": 0.14, + "grad_norm": 0.515625, + "learning_rate": 0.00019893447244229503, + "loss": 1.0027, + "step": 9890 + }, + { + "epoch": 0.14, + "grad_norm": 0.5546875, + "learning_rate": 0.00019893082427222352, + "loss": 0.9167, + "step": 9895 + }, + { + "epoch": 0.14, + "grad_norm": 0.5234375, + "learning_rate": 0.00019892716990105528, + "loss": 0.9703, + "step": 9900 + }, + { + "epoch": 0.14, + "grad_norm": 0.56640625, + "learning_rate": 0.0001989235093290193, + "loss": 0.9928, + "step": 9905 + }, + { + "epoch": 0.14, + "grad_norm": 0.546875, + "learning_rate": 0.00019891984255634513, + "loss": 0.9418, + "step": 9910 + }, + { + "epoch": 0.14, + "grad_norm": 0.57421875, + "learning_rate": 0.00019891616958326257, + "loss": 0.8257, + "step": 9915 + }, + { + "epoch": 0.14, + "grad_norm": 0.51171875, + "learning_rate": 0.00019891249041000184, + "loss": 0.8762, + "step": 9920 + }, + { + "epoch": 0.14, + "grad_norm": 0.58984375, + "learning_rate": 0.00019890880503679358, + "loss": 0.9428, + "step": 9925 + }, + { + "epoch": 0.14, + "grad_norm": 0.51953125, + "learning_rate": 0.00019890511346386873, + "loss": 1.0612, + "step": 9930 + }, + { + "epoch": 0.14, + "grad_norm": 0.59375, + "learning_rate": 0.00019890141569145877, + "loss": 1.0748, + "step": 9935 + }, + { + "epoch": 0.14, + "grad_norm": 0.6171875, + "learning_rate": 0.0001988977117197954, + "loss": 1.0498, + "step": 9940 + }, + { + "epoch": 0.14, + "grad_norm": 0.546875, + "learning_rate": 0.00019889400154911085, + "loss": 0.9022, + "step": 9945 + }, + { + "epoch": 0.14, + "grad_norm": 0.5625, + "learning_rate": 0.00019889028517963762, + "loss": 0.9875, + "step": 9950 + }, + { + "epoch": 0.14, + "grad_norm": 0.5390625, + "learning_rate": 0.00019888656261160873, + "loss": 0.8687, + "step": 9955 + }, + { + "epoch": 0.14, + "grad_norm": 0.51171875, + "learning_rate": 0.00019888283384525743, + "loss": 0.9364, + "step": 9960 + }, + { + "epoch": 0.14, + "grad_norm": 0.5234375, + "learning_rate": 0.00019887909888081752, + "loss": 0.904, + "step": 9965 + }, + { + "epoch": 0.14, + "grad_norm": 0.55859375, + "learning_rate": 0.00019887535771852307, + "loss": 1.2971, + "step": 9970 + }, + { + "epoch": 0.14, + "grad_norm": 0.54296875, + "learning_rate": 0.00019887161035860859, + "loss": 0.9255, + "step": 9975 + }, + { + "epoch": 0.14, + "grad_norm": 0.5234375, + "learning_rate": 0.00019886785680130892, + "loss": 0.9778, + "step": 9980 + }, + { + "epoch": 0.14, + "grad_norm": 0.5703125, + "learning_rate": 0.00019886409704685944, + "loss": 0.9391, + "step": 9985 + }, + { + "epoch": 0.14, + "grad_norm": 0.5703125, + "learning_rate": 0.00019886033109549575, + "loss": 0.9114, + "step": 9990 + }, + { + "epoch": 0.14, + "grad_norm": 0.61328125, + "learning_rate": 0.0001988565589474539, + "loss": 1.144, + "step": 9995 + }, + { + "epoch": 0.14, + "grad_norm": 0.5703125, + "learning_rate": 0.00019885278060297038, + "loss": 1.0354, + "step": 10000 + }, + { + "epoch": 0.14, + "grad_norm": 0.51171875, + "learning_rate": 0.00019884899606228195, + "loss": 1.0957, + "step": 10005 + }, + { + "epoch": 0.14, + "grad_norm": 0.5234375, + "learning_rate": 0.0001988452053256259, + "loss": 1.0502, + "step": 10010 + }, + { + "epoch": 0.14, + "grad_norm": 0.5, + "learning_rate": 0.00019884140839323977, + "loss": 0.8835, + "step": 10015 + }, + { + "epoch": 0.14, + "grad_norm": 0.5390625, + "learning_rate": 0.00019883760526536161, + "loss": 0.9943, + "step": 10020 + }, + { + "epoch": 0.14, + "grad_norm": 0.63671875, + "learning_rate": 0.0001988337959422298, + "loss": 0.9613, + "step": 10025 + }, + { + "epoch": 0.14, + "grad_norm": 0.51171875, + "learning_rate": 0.00019882998042408307, + "loss": 0.9662, + "step": 10030 + }, + { + "epoch": 0.14, + "grad_norm": 0.703125, + "learning_rate": 0.00019882615871116062, + "loss": 0.8121, + "step": 10035 + }, + { + "epoch": 0.14, + "grad_norm": 0.5, + "learning_rate": 0.000198822330803702, + "loss": 0.813, + "step": 10040 + }, + { + "epoch": 0.14, + "grad_norm": 0.7421875, + "learning_rate": 0.00019881849670194712, + "loss": 0.9225, + "step": 10045 + }, + { + "epoch": 0.14, + "grad_norm": 0.6796875, + "learning_rate": 0.00019881465640613635, + "loss": 0.9777, + "step": 10050 + }, + { + "epoch": 0.14, + "grad_norm": 0.57421875, + "learning_rate": 0.00019881080991651033, + "loss": 0.9568, + "step": 10055 + }, + { + "epoch": 0.14, + "grad_norm": 0.5390625, + "learning_rate": 0.00019880695723331024, + "loss": 0.9948, + "step": 10060 + }, + { + "epoch": 0.14, + "grad_norm": 0.60546875, + "learning_rate": 0.00019880309835677753, + "loss": 1.3049, + "step": 10065 + }, + { + "epoch": 0.14, + "grad_norm": 0.5, + "learning_rate": 0.00019879923328715407, + "loss": 0.945, + "step": 10070 + }, + { + "epoch": 0.14, + "grad_norm": 0.5859375, + "learning_rate": 0.0001987953620246822, + "loss": 1.1085, + "step": 10075 + }, + { + "epoch": 0.14, + "grad_norm": 0.59765625, + "learning_rate": 0.00019879148456960447, + "loss": 1.068, + "step": 10080 + }, + { + "epoch": 0.14, + "grad_norm": 0.59765625, + "learning_rate": 0.000198787600922164, + "loss": 0.9367, + "step": 10085 + }, + { + "epoch": 0.14, + "grad_norm": 0.5859375, + "learning_rate": 0.00019878371108260416, + "loss": 1.0229, + "step": 10090 + }, + { + "epoch": 0.14, + "grad_norm": 0.65625, + "learning_rate": 0.00019877981505116884, + "loss": 0.9013, + "step": 10095 + }, + { + "epoch": 0.14, + "grad_norm": 0.53515625, + "learning_rate": 0.0001987759128281022, + "loss": 1.0527, + "step": 10100 + }, + { + "epoch": 0.14, + "grad_norm": 0.6171875, + "learning_rate": 0.00019877200441364884, + "loss": 1.0512, + "step": 10105 + }, + { + "epoch": 0.15, + "grad_norm": 0.51171875, + "learning_rate": 0.00019876808980805375, + "loss": 0.9033, + "step": 10110 + }, + { + "epoch": 0.15, + "grad_norm": 0.52734375, + "learning_rate": 0.0001987641690115623, + "loss": 0.9431, + "step": 10115 + }, + { + "epoch": 0.15, + "grad_norm": 0.5234375, + "learning_rate": 0.00019876024202442028, + "loss": 1.0798, + "step": 10120 + }, + { + "epoch": 0.15, + "grad_norm": 0.625, + "learning_rate": 0.0001987563088468738, + "loss": 1.0832, + "step": 10125 + }, + { + "epoch": 0.15, + "grad_norm": 0.5546875, + "learning_rate": 0.0001987523694791694, + "loss": 1.0411, + "step": 10130 + }, + { + "epoch": 0.15, + "grad_norm": 0.734375, + "learning_rate": 0.000198748423921554, + "loss": 0.9704, + "step": 10135 + }, + { + "epoch": 0.15, + "grad_norm": 0.59375, + "learning_rate": 0.00019874447217427493, + "loss": 1.0958, + "step": 10140 + }, + { + "epoch": 0.15, + "grad_norm": 0.5390625, + "learning_rate": 0.0001987405142375799, + "loss": 1.013, + "step": 10145 + }, + { + "epoch": 0.15, + "grad_norm": 0.52734375, + "learning_rate": 0.00019873655011171698, + "loss": 0.7886, + "step": 10150 + }, + { + "epoch": 0.15, + "grad_norm": 0.5546875, + "learning_rate": 0.00019873257979693463, + "loss": 0.8691, + "step": 10155 + }, + { + "epoch": 0.15, + "grad_norm": 0.57421875, + "learning_rate": 0.00019872860329348173, + "loss": 0.9006, + "step": 10160 + }, + { + "epoch": 0.15, + "grad_norm": 0.5390625, + "learning_rate": 0.00019872462060160754, + "loss": 0.8719, + "step": 10165 + }, + { + "epoch": 0.15, + "grad_norm": 0.65234375, + "learning_rate": 0.0001987206317215617, + "loss": 0.8805, + "step": 10170 + }, + { + "epoch": 0.15, + "grad_norm": 0.53125, + "learning_rate": 0.00019871663665359422, + "loss": 0.8455, + "step": 10175 + }, + { + "epoch": 0.15, + "grad_norm": 0.52734375, + "learning_rate": 0.00019871263539795555, + "loss": 1.0638, + "step": 10180 + }, + { + "epoch": 0.15, + "grad_norm": 0.55078125, + "learning_rate": 0.00019870862795489646, + "loss": 0.9571, + "step": 10185 + }, + { + "epoch": 0.15, + "grad_norm": 0.5625, + "learning_rate": 0.00019870461432466814, + "loss": 0.9332, + "step": 10190 + }, + { + "epoch": 0.15, + "grad_norm": 0.53125, + "learning_rate": 0.00019870059450752216, + "loss": 1.0385, + "step": 10195 + }, + { + "epoch": 0.15, + "grad_norm": 0.5546875, + "learning_rate": 0.00019869656850371055, + "loss": 1.0117, + "step": 10200 + }, + { + "epoch": 0.15, + "grad_norm": 0.5859375, + "learning_rate": 0.00019869253631348558, + "loss": 0.9248, + "step": 10205 + }, + { + "epoch": 0.15, + "grad_norm": 0.466796875, + "learning_rate": 0.00019868849793710006, + "loss": 0.9542, + "step": 10210 + }, + { + "epoch": 0.15, + "grad_norm": 0.515625, + "learning_rate": 0.00019868445337480707, + "loss": 0.9228, + "step": 10215 + }, + { + "epoch": 0.15, + "grad_norm": 0.58203125, + "learning_rate": 0.00019868040262686017, + "loss": 0.8607, + "step": 10220 + }, + { + "epoch": 0.15, + "grad_norm": 0.53125, + "learning_rate": 0.00019867634569351324, + "loss": 0.9858, + "step": 10225 + }, + { + "epoch": 0.15, + "grad_norm": 0.5, + "learning_rate": 0.00019867228257502056, + "loss": 0.9653, + "step": 10230 + }, + { + "epoch": 0.15, + "grad_norm": 0.4921875, + "learning_rate": 0.00019866821327163689, + "loss": 0.9971, + "step": 10235 + }, + { + "epoch": 0.15, + "grad_norm": 0.59765625, + "learning_rate": 0.00019866413778361718, + "loss": 1.0423, + "step": 10240 + }, + { + "epoch": 0.15, + "grad_norm": 0.50390625, + "learning_rate": 0.00019866005611121694, + "loss": 1.0405, + "step": 10245 + }, + { + "epoch": 0.15, + "grad_norm": 0.56640625, + "learning_rate": 0.00019865596825469206, + "loss": 1.1111, + "step": 10250 + }, + { + "epoch": 0.15, + "grad_norm": 0.515625, + "learning_rate": 0.00019865187421429868, + "loss": 0.8356, + "step": 10255 + }, + { + "epoch": 0.15, + "grad_norm": 0.578125, + "learning_rate": 0.00019864777399029353, + "loss": 0.8764, + "step": 10260 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.00019864366758293352, + "loss": 0.9006, + "step": 10265 + }, + { + "epoch": 0.15, + "grad_norm": 0.5390625, + "learning_rate": 0.0001986395549924761, + "loss": 0.9196, + "step": 10270 + }, + { + "epoch": 0.15, + "grad_norm": 0.5, + "learning_rate": 0.00019863543621917898, + "loss": 1.0036, + "step": 10275 + }, + { + "epoch": 0.15, + "grad_norm": 0.55859375, + "learning_rate": 0.00019863131126330043, + "loss": 0.8909, + "step": 10280 + }, + { + "epoch": 0.15, + "grad_norm": 0.5390625, + "learning_rate": 0.00019862718012509897, + "loss": 1.0645, + "step": 10285 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.00019862304280483347, + "loss": 1.0257, + "step": 10290 + }, + { + "epoch": 0.15, + "grad_norm": 0.5703125, + "learning_rate": 0.00019861889930276338, + "loss": 0.9767, + "step": 10295 + }, + { + "epoch": 0.15, + "grad_norm": 0.5625, + "learning_rate": 0.00019861474961914834, + "loss": 0.9614, + "step": 10300 + }, + { + "epoch": 0.15, + "grad_norm": 0.5078125, + "learning_rate": 0.00019861059375424848, + "loss": 0.9972, + "step": 10305 + }, + { + "epoch": 0.15, + "grad_norm": 0.53515625, + "learning_rate": 0.00019860643170832432, + "loss": 0.9258, + "step": 10310 + }, + { + "epoch": 0.15, + "grad_norm": 0.52734375, + "learning_rate": 0.0001986022634816367, + "loss": 1.049, + "step": 10315 + }, + { + "epoch": 0.15, + "grad_norm": 0.6015625, + "learning_rate": 0.0001985980890744469, + "loss": 1.1258, + "step": 10320 + }, + { + "epoch": 0.15, + "grad_norm": 0.59375, + "learning_rate": 0.0001985939084870166, + "loss": 1.0743, + "step": 10325 + }, + { + "epoch": 0.15, + "grad_norm": 0.55078125, + "learning_rate": 0.00019858972171960782, + "loss": 0.882, + "step": 10330 + }, + { + "epoch": 0.15, + "grad_norm": 0.5703125, + "learning_rate": 0.00019858552877248298, + "loss": 1.1144, + "step": 10335 + }, + { + "epoch": 0.15, + "grad_norm": 0.50390625, + "learning_rate": 0.00019858132964590495, + "loss": 1.0556, + "step": 10340 + }, + { + "epoch": 0.15, + "grad_norm": 0.5390625, + "learning_rate": 0.00019857712434013687, + "loss": 1.0406, + "step": 10345 + }, + { + "epoch": 0.15, + "grad_norm": 0.58984375, + "learning_rate": 0.00019857291285544238, + "loss": 0.9308, + "step": 10350 + }, + { + "epoch": 0.15, + "grad_norm": 0.5078125, + "learning_rate": 0.00019856869519208544, + "loss": 0.9467, + "step": 10355 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.00019856447135033046, + "loss": 0.9573, + "step": 10360 + }, + { + "epoch": 0.15, + "grad_norm": 0.50390625, + "learning_rate": 0.00019856024133044214, + "loss": 1.1789, + "step": 10365 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.0001985560051326856, + "loss": 0.9767, + "step": 10370 + }, + { + "epoch": 0.15, + "grad_norm": 0.58203125, + "learning_rate": 0.00019855176275732647, + "loss": 0.9285, + "step": 10375 + }, + { + "epoch": 0.15, + "grad_norm": 0.6015625, + "learning_rate": 0.0001985475142046306, + "loss": 0.999, + "step": 10380 + }, + { + "epoch": 0.15, + "grad_norm": 0.6015625, + "learning_rate": 0.00019854325947486428, + "loss": 1.1369, + "step": 10385 + }, + { + "epoch": 0.15, + "grad_norm": 0.52734375, + "learning_rate": 0.00019853899856829424, + "loss": 0.9692, + "step": 10390 + }, + { + "epoch": 0.15, + "grad_norm": 0.5859375, + "learning_rate": 0.00019853473148518755, + "loss": 0.9785, + "step": 10395 + }, + { + "epoch": 0.15, + "grad_norm": 0.5625, + "learning_rate": 0.00019853045822581166, + "loss": 1.0419, + "step": 10400 + }, + { + "epoch": 0.15, + "grad_norm": 0.578125, + "learning_rate": 0.00019852617879043442, + "loss": 1.0083, + "step": 10405 + }, + { + "epoch": 0.15, + "grad_norm": 0.5078125, + "learning_rate": 0.0001985218931793241, + "loss": 1.0458, + "step": 10410 + }, + { + "epoch": 0.15, + "grad_norm": 0.64453125, + "learning_rate": 0.00019851760139274932, + "loss": 0.999, + "step": 10415 + }, + { + "epoch": 0.15, + "grad_norm": 0.57421875, + "learning_rate": 0.00019851330343097903, + "loss": 0.8451, + "step": 10420 + }, + { + "epoch": 0.15, + "grad_norm": 0.50390625, + "learning_rate": 0.00019850899929428276, + "loss": 1.2424, + "step": 10425 + }, + { + "epoch": 0.15, + "grad_norm": 0.51171875, + "learning_rate": 0.00019850468898293015, + "loss": 0.8905, + "step": 10430 + }, + { + "epoch": 0.15, + "grad_norm": 0.515625, + "learning_rate": 0.00019850037249719149, + "loss": 0.9937, + "step": 10435 + }, + { + "epoch": 0.15, + "grad_norm": 0.55078125, + "learning_rate": 0.0001984960498373373, + "loss": 0.9661, + "step": 10440 + }, + { + "epoch": 0.15, + "grad_norm": 0.62109375, + "learning_rate": 0.00019849172100363851, + "loss": 1.0552, + "step": 10445 + }, + { + "epoch": 0.15, + "grad_norm": 0.59375, + "learning_rate": 0.00019848738599636647, + "loss": 0.9607, + "step": 10450 + }, + { + "epoch": 0.15, + "grad_norm": 0.54296875, + "learning_rate": 0.00019848304481579293, + "loss": 0.8086, + "step": 10455 + }, + { + "epoch": 0.15, + "grad_norm": 0.53125, + "learning_rate": 0.00019847869746218995, + "loss": 0.9596, + "step": 10460 + }, + { + "epoch": 0.15, + "grad_norm": 0.609375, + "learning_rate": 0.00019847434393583006, + "loss": 0.9708, + "step": 10465 + }, + { + "epoch": 0.15, + "grad_norm": 0.6015625, + "learning_rate": 0.00019846998423698616, + "loss": 0.9375, + "step": 10470 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.00019846561836593148, + "loss": 1.0335, + "step": 10475 + }, + { + "epoch": 0.15, + "grad_norm": 0.51953125, + "learning_rate": 0.00019846124632293973, + "loss": 0.9171, + "step": 10480 + }, + { + "epoch": 0.15, + "grad_norm": 0.5859375, + "learning_rate": 0.00019845686810828487, + "loss": 0.9829, + "step": 10485 + }, + { + "epoch": 0.15, + "grad_norm": 0.62109375, + "learning_rate": 0.00019845248372224144, + "loss": 0.9931, + "step": 10490 + }, + { + "epoch": 0.15, + "grad_norm": 0.51171875, + "learning_rate": 0.00019844809316508418, + "loss": 0.9137, + "step": 10495 + }, + { + "epoch": 0.15, + "grad_norm": 0.51953125, + "learning_rate": 0.00019844369643708828, + "loss": 0.9316, + "step": 10500 + }, + { + "epoch": 0.15, + "grad_norm": 0.54296875, + "learning_rate": 0.00019843929353852944, + "loss": 0.9184, + "step": 10505 + }, + { + "epoch": 0.15, + "grad_norm": 0.578125, + "learning_rate": 0.0001984348844696835, + "loss": 0.9606, + "step": 10510 + }, + { + "epoch": 0.15, + "grad_norm": 0.54296875, + "learning_rate": 0.00019843046923082692, + "loss": 0.9036, + "step": 10515 + }, + { + "epoch": 0.15, + "grad_norm": 0.51171875, + "learning_rate": 0.00019842604782223643, + "loss": 0.9305, + "step": 10520 + }, + { + "epoch": 0.15, + "grad_norm": 0.54296875, + "learning_rate": 0.00019842162024418918, + "loss": 0.8998, + "step": 10525 + }, + { + "epoch": 0.15, + "grad_norm": 0.5703125, + "learning_rate": 0.00019841718649696267, + "loss": 1.0292, + "step": 10530 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.00019841274658083483, + "loss": 1.0275, + "step": 10535 + }, + { + "epoch": 0.15, + "grad_norm": 0.51953125, + "learning_rate": 0.00019840830049608395, + "loss": 0.8424, + "step": 10540 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.00019840384824298867, + "loss": 0.9166, + "step": 10545 + }, + { + "epoch": 0.15, + "grad_norm": 0.6015625, + "learning_rate": 0.00019839938982182815, + "loss": 1.1204, + "step": 10550 + }, + { + "epoch": 0.15, + "grad_norm": 0.4921875, + "learning_rate": 0.00019839492523288183, + "loss": 0.8521, + "step": 10555 + }, + { + "epoch": 0.15, + "grad_norm": 0.578125, + "learning_rate": 0.0001983904544764295, + "loss": 0.9154, + "step": 10560 + }, + { + "epoch": 0.15, + "grad_norm": 0.51953125, + "learning_rate": 0.00019838597755275143, + "loss": 1.0765, + "step": 10565 + }, + { + "epoch": 0.15, + "grad_norm": 0.54296875, + "learning_rate": 0.00019838149446212825, + "loss": 0.899, + "step": 10570 + }, + { + "epoch": 0.15, + "grad_norm": 0.5078125, + "learning_rate": 0.00019837700520484094, + "loss": 0.8723, + "step": 10575 + }, + { + "epoch": 0.15, + "grad_norm": 0.58203125, + "learning_rate": 0.0001983725097811709, + "loss": 1.09, + "step": 10580 + }, + { + "epoch": 0.15, + "grad_norm": 0.67578125, + "learning_rate": 0.0001983680081913999, + "loss": 1.0788, + "step": 10585 + }, + { + "epoch": 0.15, + "grad_norm": 0.59375, + "learning_rate": 0.00019836350043581013, + "loss": 0.9981, + "step": 10590 + }, + { + "epoch": 0.15, + "grad_norm": 0.57421875, + "learning_rate": 0.0001983589865146841, + "loss": 0.9492, + "step": 10595 + }, + { + "epoch": 0.15, + "grad_norm": 0.60546875, + "learning_rate": 0.00019835446642830484, + "loss": 1.0889, + "step": 10600 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.00019834994017695556, + "loss": 1.0074, + "step": 10605 + }, + { + "epoch": 0.15, + "grad_norm": 0.53515625, + "learning_rate": 0.00019834540776092, + "loss": 1.0849, + "step": 10610 + }, + { + "epoch": 0.15, + "grad_norm": 0.60546875, + "learning_rate": 0.0001983408691804823, + "loss": 1.0903, + "step": 10615 + }, + { + "epoch": 0.15, + "grad_norm": 0.52734375, + "learning_rate": 0.00019833632443592693, + "loss": 1.02, + "step": 10620 + }, + { + "epoch": 0.15, + "grad_norm": 0.625, + "learning_rate": 0.00019833177352753873, + "loss": 0.9941, + "step": 10625 + }, + { + "epoch": 0.15, + "grad_norm": 0.59375, + "learning_rate": 0.000198327216455603, + "loss": 1.003, + "step": 10630 + }, + { + "epoch": 0.15, + "grad_norm": 0.5, + "learning_rate": 0.00019832265322040533, + "loss": 0.9198, + "step": 10635 + }, + { + "epoch": 0.15, + "grad_norm": 0.640625, + "learning_rate": 0.00019831808382223177, + "loss": 1.0734, + "step": 10640 + }, + { + "epoch": 0.15, + "grad_norm": 0.59375, + "learning_rate": 0.00019831350826136877, + "loss": 1.1687, + "step": 10645 + }, + { + "epoch": 0.15, + "grad_norm": 0.482421875, + "learning_rate": 0.00019830892653810306, + "loss": 1.0696, + "step": 10650 + }, + { + "epoch": 0.15, + "grad_norm": 0.4921875, + "learning_rate": 0.00019830433865272192, + "loss": 1.0275, + "step": 10655 + }, + { + "epoch": 0.15, + "grad_norm": 0.54296875, + "learning_rate": 0.00019829974460551286, + "loss": 1.0318, + "step": 10660 + }, + { + "epoch": 0.15, + "grad_norm": 0.56640625, + "learning_rate": 0.00019829514439676383, + "loss": 0.8166, + "step": 10665 + }, + { + "epoch": 0.15, + "grad_norm": 0.52734375, + "learning_rate": 0.00019829053802676322, + "loss": 1.0499, + "step": 10670 + }, + { + "epoch": 0.15, + "grad_norm": 0.51171875, + "learning_rate": 0.00019828592549579974, + "loss": 1.0607, + "step": 10675 + }, + { + "epoch": 0.15, + "grad_norm": 0.498046875, + "learning_rate": 0.0001982813068041625, + "loss": 1.0365, + "step": 10680 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.00019827668195214105, + "loss": 0.9428, + "step": 10685 + }, + { + "epoch": 0.15, + "grad_norm": 0.51171875, + "learning_rate": 0.00019827205094002524, + "loss": 0.9886, + "step": 10690 + }, + { + "epoch": 0.15, + "grad_norm": 0.58203125, + "learning_rate": 0.00019826741376810533, + "loss": 1.0607, + "step": 10695 + }, + { + "epoch": 0.15, + "grad_norm": 0.62890625, + "learning_rate": 0.000198262770436672, + "loss": 1.0827, + "step": 10700 + }, + { + "epoch": 0.15, + "grad_norm": 0.52734375, + "learning_rate": 0.00019825812094601632, + "loss": 0.9896, + "step": 10705 + }, + { + "epoch": 0.15, + "grad_norm": 0.62890625, + "learning_rate": 0.0001982534652964297, + "loss": 0.9955, + "step": 10710 + }, + { + "epoch": 0.15, + "grad_norm": 0.58984375, + "learning_rate": 0.000198248803488204, + "loss": 0.8947, + "step": 10715 + }, + { + "epoch": 0.15, + "grad_norm": 0.578125, + "learning_rate": 0.00019824413552163141, + "loss": 1.035, + "step": 10720 + }, + { + "epoch": 0.15, + "grad_norm": 0.5234375, + "learning_rate": 0.0001982394613970045, + "loss": 0.9582, + "step": 10725 + }, + { + "epoch": 0.15, + "grad_norm": 0.60546875, + "learning_rate": 0.00019823478111461625, + "loss": 1.1495, + "step": 10730 + }, + { + "epoch": 0.15, + "grad_norm": 0.59765625, + "learning_rate": 0.00019823009467476006, + "loss": 0.8319, + "step": 10735 + }, + { + "epoch": 0.15, + "grad_norm": 0.51171875, + "learning_rate": 0.00019822540207772965, + "loss": 0.9966, + "step": 10740 + }, + { + "epoch": 0.15, + "grad_norm": 0.51953125, + "learning_rate": 0.00019822070332381917, + "loss": 0.9219, + "step": 10745 + }, + { + "epoch": 0.15, + "grad_norm": 0.53125, + "learning_rate": 0.00019821599841332314, + "loss": 0.9079, + "step": 10750 + }, + { + "epoch": 0.15, + "grad_norm": 0.52734375, + "learning_rate": 0.0001982112873465365, + "loss": 0.9619, + "step": 10755 + }, + { + "epoch": 0.15, + "grad_norm": 0.5234375, + "learning_rate": 0.00019820657012375445, + "loss": 0.9418, + "step": 10760 + }, + { + "epoch": 0.15, + "grad_norm": 0.58984375, + "learning_rate": 0.00019820184674527278, + "loss": 1.1247, + "step": 10765 + }, + { + "epoch": 0.15, + "grad_norm": 0.50390625, + "learning_rate": 0.00019819711721138751, + "loss": 1.0533, + "step": 10770 + }, + { + "epoch": 0.15, + "grad_norm": 0.546875, + "learning_rate": 0.0001981923815223951, + "loss": 0.879, + "step": 10775 + }, + { + "epoch": 0.15, + "grad_norm": 0.6171875, + "learning_rate": 0.0001981876396785924, + "loss": 1.1022, + "step": 10780 + }, + { + "epoch": 0.15, + "grad_norm": 0.5390625, + "learning_rate": 0.0001981828916802766, + "loss": 0.9689, + "step": 10785 + }, + { + "epoch": 0.15, + "grad_norm": 0.53125, + "learning_rate": 0.0001981781375277453, + "loss": 0.852, + "step": 10790 + }, + { + "epoch": 0.15, + "grad_norm": 0.51171875, + "learning_rate": 0.00019817337722129657, + "loss": 1.0256, + "step": 10795 + }, + { + "epoch": 0.15, + "grad_norm": 0.578125, + "learning_rate": 0.00019816861076122873, + "loss": 0.9881, + "step": 10800 + }, + { + "epoch": 0.15, + "grad_norm": 0.57421875, + "learning_rate": 0.00019816383814784055, + "loss": 0.9893, + "step": 10805 + }, + { + "epoch": 0.16, + "grad_norm": 0.54296875, + "learning_rate": 0.0001981590593814312, + "loss": 1.0052, + "step": 10810 + }, + { + "epoch": 0.16, + "grad_norm": 0.6171875, + "learning_rate": 0.00019815427446230022, + "loss": 1.0588, + "step": 10815 + }, + { + "epoch": 0.16, + "grad_norm": 0.5546875, + "learning_rate": 0.00019814948339074752, + "loss": 0.9212, + "step": 10820 + }, + { + "epoch": 0.16, + "grad_norm": 0.58203125, + "learning_rate": 0.00019814468616707344, + "loss": 1.0306, + "step": 10825 + }, + { + "epoch": 0.16, + "grad_norm": 0.53515625, + "learning_rate": 0.00019813988279157862, + "loss": 0.963, + "step": 10830 + }, + { + "epoch": 0.16, + "grad_norm": 0.57421875, + "learning_rate": 0.0001981350732645642, + "loss": 1.1285, + "step": 10835 + }, + { + "epoch": 0.16, + "grad_norm": 0.56640625, + "learning_rate": 0.0001981302575863316, + "loss": 0.8325, + "step": 10840 + }, + { + "epoch": 0.16, + "grad_norm": 0.6015625, + "learning_rate": 0.00019812543575718273, + "loss": 0.8524, + "step": 10845 + }, + { + "epoch": 0.16, + "grad_norm": 0.55078125, + "learning_rate": 0.00019812060777741976, + "loss": 0.9555, + "step": 10850 + }, + { + "epoch": 0.16, + "grad_norm": 0.56640625, + "learning_rate": 0.00019811577364734536, + "loss": 0.8622, + "step": 10855 + }, + { + "epoch": 0.16, + "grad_norm": 0.61328125, + "learning_rate": 0.0001981109333672625, + "loss": 0.9831, + "step": 10860 + }, + { + "epoch": 0.16, + "grad_norm": 0.5625, + "learning_rate": 0.00019810608693747464, + "loss": 0.8993, + "step": 10865 + }, + { + "epoch": 0.16, + "grad_norm": 0.5078125, + "learning_rate": 0.0001981012343582855, + "loss": 0.905, + "step": 10870 + }, + { + "epoch": 0.16, + "grad_norm": 0.56640625, + "learning_rate": 0.00019809637562999925, + "loss": 1.0481, + "step": 10875 + }, + { + "epoch": 0.16, + "grad_norm": 0.59375, + "learning_rate": 0.00019809151075292046, + "loss": 0.9381, + "step": 10880 + }, + { + "epoch": 0.16, + "grad_norm": 0.59375, + "learning_rate": 0.00019808663972735406, + "loss": 0.9586, + "step": 10885 + }, + { + "epoch": 0.16, + "grad_norm": 0.5546875, + "learning_rate": 0.00019808176255360537, + "loss": 0.9594, + "step": 10890 + }, + { + "epoch": 0.16, + "grad_norm": 0.63671875, + "learning_rate": 0.00019807687923198007, + "loss": 1.0916, + "step": 10895 + }, + { + "epoch": 0.16, + "grad_norm": 0.546875, + "learning_rate": 0.00019807198976278434, + "loss": 0.9862, + "step": 10900 + }, + { + "epoch": 0.16, + "grad_norm": 0.494140625, + "learning_rate": 0.00019806709414632457, + "loss": 0.8475, + "step": 10905 + }, + { + "epoch": 0.16, + "grad_norm": 0.51171875, + "learning_rate": 0.0001980621923829076, + "loss": 1.0222, + "step": 10910 + }, + { + "epoch": 0.16, + "grad_norm": 0.51953125, + "learning_rate": 0.00019805728447284078, + "loss": 0.9598, + "step": 10915 + }, + { + "epoch": 0.16, + "grad_norm": 0.546875, + "learning_rate": 0.0001980523704164317, + "loss": 0.9676, + "step": 10920 + }, + { + "epoch": 0.16, + "grad_norm": 0.49609375, + "learning_rate": 0.00019804745021398835, + "loss": 0.9503, + "step": 10925 + }, + { + "epoch": 0.16, + "grad_norm": 0.6953125, + "learning_rate": 0.00019804252386581913, + "loss": 0.967, + "step": 10930 + }, + { + "epoch": 0.16, + "grad_norm": 0.55078125, + "learning_rate": 0.0001980375913722329, + "loss": 0.8711, + "step": 10935 + }, + { + "epoch": 0.16, + "grad_norm": 0.5546875, + "learning_rate": 0.00019803265273353877, + "loss": 0.8866, + "step": 10940 + }, + { + "epoch": 0.16, + "grad_norm": 0.53125, + "learning_rate": 0.0001980277079500463, + "loss": 1.1107, + "step": 10945 + }, + { + "epoch": 0.16, + "grad_norm": 0.578125, + "learning_rate": 0.00019802275702206546, + "loss": 1.0041, + "step": 10950 + }, + { + "epoch": 0.16, + "grad_norm": 0.53125, + "learning_rate": 0.00019801779994990657, + "loss": 0.9462, + "step": 10955 + }, + { + "epoch": 0.16, + "grad_norm": 0.52734375, + "learning_rate": 0.00019801283673388036, + "loss": 0.8799, + "step": 10960 + }, + { + "epoch": 0.16, + "grad_norm": 0.5703125, + "learning_rate": 0.00019800786737429792, + "loss": 1.0314, + "step": 10965 + }, + { + "epoch": 0.16, + "grad_norm": 0.578125, + "learning_rate": 0.00019800289187147068, + "loss": 0.9071, + "step": 10970 + }, + { + "epoch": 0.16, + "grad_norm": 0.56640625, + "learning_rate": 0.0001979979102257106, + "loss": 1.0473, + "step": 10975 + }, + { + "epoch": 0.16, + "grad_norm": 0.546875, + "learning_rate": 0.00019799292243732994, + "loss": 0.8929, + "step": 10980 + }, + { + "epoch": 0.16, + "grad_norm": 0.59375, + "learning_rate": 0.00019798792850664123, + "loss": 0.9483, + "step": 10985 + }, + { + "epoch": 0.16, + "grad_norm": 0.65234375, + "learning_rate": 0.0001979829284339576, + "loss": 1.109, + "step": 10990 + }, + { + "epoch": 0.16, + "grad_norm": 0.6171875, + "learning_rate": 0.00019797792221959242, + "loss": 0.9952, + "step": 10995 + }, + { + "epoch": 0.16, + "grad_norm": 0.53515625, + "learning_rate": 0.00019797290986385945, + "loss": 0.9381, + "step": 11000 + }, + { + "epoch": 0.16, + "grad_norm": 0.53125, + "learning_rate": 0.00019796789136707296, + "loss": 1.0971, + "step": 11005 + }, + { + "epoch": 0.16, + "grad_norm": 0.49609375, + "learning_rate": 0.0001979628667295474, + "loss": 0.9669, + "step": 11010 + }, + { + "epoch": 0.16, + "grad_norm": 0.80859375, + "learning_rate": 0.00019795783595159784, + "loss": 1.0967, + "step": 11015 + }, + { + "epoch": 0.16, + "grad_norm": 0.546875, + "learning_rate": 0.00019795279903353955, + "loss": 0.9798, + "step": 11020 + }, + { + "epoch": 0.16, + "grad_norm": 0.58203125, + "learning_rate": 0.00019794775597568824, + "loss": 0.9328, + "step": 11025 + }, + { + "epoch": 0.16, + "grad_norm": 0.625, + "learning_rate": 0.00019794270677836004, + "loss": 0.8322, + "step": 11030 + }, + { + "epoch": 0.16, + "grad_norm": 0.5234375, + "learning_rate": 0.00019793765144187145, + "loss": 0.8911, + "step": 11035 + }, + { + "epoch": 0.16, + "grad_norm": 0.56640625, + "learning_rate": 0.0001979325899665393, + "loss": 0.9504, + "step": 11040 + }, + { + "epoch": 0.16, + "grad_norm": 0.61328125, + "learning_rate": 0.00019792752235268086, + "loss": 1.0508, + "step": 11045 + }, + { + "epoch": 0.16, + "grad_norm": 0.55078125, + "learning_rate": 0.00019792244860061383, + "loss": 0.9632, + "step": 11050 + }, + { + "epoch": 0.16, + "grad_norm": 0.51953125, + "learning_rate": 0.00019791736871065617, + "loss": 1.0252, + "step": 11055 + }, + { + "epoch": 0.16, + "grad_norm": 0.69921875, + "learning_rate": 0.00019791228268312635, + "loss": 0.9486, + "step": 11060 + }, + { + "epoch": 0.16, + "grad_norm": 0.53515625, + "learning_rate": 0.0001979071905183431, + "loss": 1.2584, + "step": 11065 + }, + { + "epoch": 0.16, + "grad_norm": 0.51953125, + "learning_rate": 0.00019790209221662566, + "loss": 0.8252, + "step": 11070 + }, + { + "epoch": 0.16, + "grad_norm": 0.56640625, + "learning_rate": 0.00019789698777829357, + "loss": 0.9679, + "step": 11075 + }, + { + "epoch": 0.16, + "grad_norm": 0.53125, + "learning_rate": 0.0001978918772036668, + "loss": 1.0461, + "step": 11080 + }, + { + "epoch": 0.16, + "grad_norm": 0.58984375, + "learning_rate": 0.0001978867604930657, + "loss": 1.0895, + "step": 11085 + }, + { + "epoch": 0.16, + "grad_norm": 0.55859375, + "learning_rate": 0.00019788163764681093, + "loss": 1.0478, + "step": 11090 + }, + { + "epoch": 0.16, + "grad_norm": 0.55078125, + "learning_rate": 0.00019787650866522364, + "loss": 0.9994, + "step": 11095 + }, + { + "epoch": 0.16, + "grad_norm": 0.5859375, + "learning_rate": 0.00019787137354862532, + "loss": 1.0482, + "step": 11100 + }, + { + "epoch": 0.16, + "grad_norm": 0.61328125, + "learning_rate": 0.00019786623229733785, + "loss": 1.0325, + "step": 11105 + }, + { + "epoch": 0.16, + "grad_norm": 0.49609375, + "learning_rate": 0.00019786108491168347, + "loss": 0.9032, + "step": 11110 + }, + { + "epoch": 0.16, + "grad_norm": 0.53515625, + "learning_rate": 0.00019785593139198484, + "loss": 0.9816, + "step": 11115 + }, + { + "epoch": 0.16, + "grad_norm": 0.5546875, + "learning_rate": 0.00019785077173856496, + "loss": 0.9775, + "step": 11120 + }, + { + "epoch": 0.16, + "grad_norm": 0.4609375, + "learning_rate": 0.00019784560595174732, + "loss": 0.9643, + "step": 11125 + }, + { + "epoch": 0.16, + "grad_norm": 0.6171875, + "learning_rate": 0.00019784043403185558, + "loss": 0.98, + "step": 11130 + }, + { + "epoch": 0.16, + "grad_norm": 0.6328125, + "learning_rate": 0.00019783525597921408, + "loss": 1.0203, + "step": 11135 + }, + { + "epoch": 0.16, + "grad_norm": 0.76171875, + "learning_rate": 0.00019783007179414728, + "loss": 1.126, + "step": 11140 + }, + { + "epoch": 0.16, + "grad_norm": 0.62890625, + "learning_rate": 0.00019782488147698015, + "loss": 1.0501, + "step": 11145 + }, + { + "epoch": 0.16, + "grad_norm": 0.515625, + "learning_rate": 0.00019781968502803805, + "loss": 0.8757, + "step": 11150 + }, + { + "epoch": 0.16, + "grad_norm": 0.58203125, + "learning_rate": 0.00019781448244764665, + "loss": 0.9667, + "step": 11155 + }, + { + "epoch": 0.16, + "grad_norm": 0.53125, + "learning_rate": 0.00019780927373613217, + "loss": 0.8694, + "step": 11160 + }, + { + "epoch": 0.16, + "grad_norm": 0.53515625, + "learning_rate": 0.00019780405889382094, + "loss": 1.0174, + "step": 11165 + }, + { + "epoch": 0.16, + "grad_norm": 0.515625, + "learning_rate": 0.00019779883792103996, + "loss": 1.0074, + "step": 11170 + }, + { + "epoch": 0.16, + "grad_norm": 0.5625, + "learning_rate": 0.0001977936108181164, + "loss": 0.9958, + "step": 11175 + }, + { + "epoch": 0.16, + "grad_norm": 0.859375, + "learning_rate": 0.00019778837758537793, + "loss": 1.0806, + "step": 11180 + }, + { + "epoch": 0.16, + "grad_norm": 0.5390625, + "learning_rate": 0.0001977831382231526, + "loss": 1.0919, + "step": 11185 + }, + { + "epoch": 0.16, + "grad_norm": 0.6015625, + "learning_rate": 0.0001977778927317688, + "loss": 1.0725, + "step": 11190 + }, + { + "epoch": 0.16, + "grad_norm": 0.6171875, + "learning_rate": 0.00019777264111155534, + "loss": 1.0368, + "step": 11195 + }, + { + "epoch": 0.16, + "grad_norm": 0.609375, + "learning_rate": 0.00019776738336284134, + "loss": 0.9784, + "step": 11200 + }, + { + "epoch": 0.16, + "grad_norm": 0.6171875, + "learning_rate": 0.00019776211948595646, + "loss": 1.0625, + "step": 11205 + }, + { + "epoch": 0.16, + "grad_norm": 0.56640625, + "learning_rate": 0.00019775684948123052, + "loss": 1.036, + "step": 11210 + }, + { + "epoch": 0.16, + "grad_norm": 0.46484375, + "learning_rate": 0.00019775157334899396, + "loss": 0.8934, + "step": 11215 + }, + { + "epoch": 0.16, + "grad_norm": 0.62890625, + "learning_rate": 0.00019774629108957746, + "loss": 0.9584, + "step": 11220 + }, + { + "epoch": 0.16, + "grad_norm": 0.55078125, + "learning_rate": 0.0001977410027033121, + "loss": 1.0195, + "step": 11225 + }, + { + "epoch": 0.16, + "grad_norm": 0.578125, + "learning_rate": 0.00019773570819052938, + "loss": 1.1691, + "step": 11230 + }, + { + "epoch": 0.16, + "grad_norm": 0.5546875, + "learning_rate": 0.00019773040755156115, + "loss": 0.9637, + "step": 11235 + }, + { + "epoch": 0.16, + "grad_norm": 0.55078125, + "learning_rate": 0.00019772510078673965, + "loss": 1.0448, + "step": 11240 + }, + { + "epoch": 0.16, + "grad_norm": 0.56640625, + "learning_rate": 0.00019771978789639758, + "loss": 0.9052, + "step": 11245 + }, + { + "epoch": 0.16, + "grad_norm": 0.50390625, + "learning_rate": 0.00019771446888086787, + "loss": 1.0487, + "step": 11250 + }, + { + "epoch": 0.16, + "grad_norm": 0.59765625, + "learning_rate": 0.00019770914374048397, + "loss": 1.0113, + "step": 11255 + }, + { + "epoch": 0.16, + "grad_norm": 0.59375, + "learning_rate": 0.00019770381247557968, + "loss": 0.9715, + "step": 11260 + }, + { + "epoch": 0.16, + "grad_norm": 0.52734375, + "learning_rate": 0.00019769847508648911, + "loss": 0.8075, + "step": 11265 + }, + { + "epoch": 0.16, + "grad_norm": 0.60546875, + "learning_rate": 0.0001976931315735469, + "loss": 0.871, + "step": 11270 + }, + { + "epoch": 0.16, + "grad_norm": 0.52734375, + "learning_rate": 0.00019768778193708793, + "loss": 1.0295, + "step": 11275 + }, + { + "epoch": 0.16, + "grad_norm": 0.57421875, + "learning_rate": 0.0001976824261774475, + "loss": 1.0336, + "step": 11280 + }, + { + "epoch": 0.16, + "grad_norm": 0.6171875, + "learning_rate": 0.0001976770642949614, + "loss": 1.0173, + "step": 11285 + }, + { + "epoch": 0.16, + "grad_norm": 0.5625, + "learning_rate": 0.00019767169628996558, + "loss": 0.9169, + "step": 11290 + }, + { + "epoch": 0.16, + "grad_norm": 0.55859375, + "learning_rate": 0.00019766632216279664, + "loss": 1.0464, + "step": 11295 + }, + { + "epoch": 0.16, + "grad_norm": 0.64453125, + "learning_rate": 0.0001976609419137914, + "loss": 1.0582, + "step": 11300 + }, + { + "epoch": 0.16, + "grad_norm": 0.5859375, + "learning_rate": 0.00019765555554328713, + "loss": 0.9371, + "step": 11305 + }, + { + "epoch": 0.16, + "grad_norm": 0.51171875, + "learning_rate": 0.0001976501630516214, + "loss": 0.9651, + "step": 11310 + }, + { + "epoch": 0.16, + "grad_norm": 0.55078125, + "learning_rate": 0.00019764476443913221, + "loss": 1.0393, + "step": 11315 + }, + { + "epoch": 0.16, + "grad_norm": 0.671875, + "learning_rate": 0.00019763935970615798, + "loss": 1.0131, + "step": 11320 + }, + { + "epoch": 0.16, + "grad_norm": 0.67578125, + "learning_rate": 0.00019763394885303747, + "loss": 1.0477, + "step": 11325 + }, + { + "epoch": 0.16, + "grad_norm": 0.65234375, + "learning_rate": 0.00019762853188010987, + "loss": 0.9775, + "step": 11330 + }, + { + "epoch": 0.16, + "grad_norm": 0.578125, + "learning_rate": 0.00019762310878771472, + "loss": 1.0786, + "step": 11335 + }, + { + "epoch": 0.16, + "grad_norm": 0.54296875, + "learning_rate": 0.00019761767957619187, + "loss": 0.9287, + "step": 11340 + }, + { + "epoch": 0.16, + "grad_norm": 0.6171875, + "learning_rate": 0.00019761224424588174, + "loss": 1.0053, + "step": 11345 + }, + { + "epoch": 0.16, + "grad_norm": 0.5390625, + "learning_rate": 0.00019760680279712497, + "loss": 0.963, + "step": 11350 + }, + { + "epoch": 0.16, + "grad_norm": 0.5390625, + "learning_rate": 0.00019760135523026263, + "loss": 0.9512, + "step": 11355 + }, + { + "epoch": 0.16, + "grad_norm": 0.5625, + "learning_rate": 0.00019759590154563617, + "loss": 0.987, + "step": 11360 + }, + { + "epoch": 0.16, + "grad_norm": 0.5703125, + "learning_rate": 0.00019759044174358744, + "loss": 0.9325, + "step": 11365 + }, + { + "epoch": 0.16, + "grad_norm": 0.53515625, + "learning_rate": 0.0001975849758244587, + "loss": 0.9854, + "step": 11370 + }, + { + "epoch": 0.16, + "grad_norm": 0.5390625, + "learning_rate": 0.00019757950378859252, + "loss": 1.0722, + "step": 11375 + }, + { + "epoch": 0.16, + "grad_norm": 0.5625, + "learning_rate": 0.00019757402563633193, + "loss": 1.0457, + "step": 11380 + }, + { + "epoch": 0.16, + "grad_norm": 0.5234375, + "learning_rate": 0.00019756854136802025, + "loss": 0.8436, + "step": 11385 + }, + { + "epoch": 0.16, + "grad_norm": 0.64453125, + "learning_rate": 0.00019756305098400133, + "loss": 1.1211, + "step": 11390 + }, + { + "epoch": 0.16, + "grad_norm": 0.6640625, + "learning_rate": 0.0001975575544846192, + "loss": 1.0195, + "step": 11395 + }, + { + "epoch": 0.16, + "grad_norm": 0.57421875, + "learning_rate": 0.00019755205187021847, + "loss": 1.0107, + "step": 11400 + }, + { + "epoch": 0.16, + "grad_norm": 0.578125, + "learning_rate": 0.00019754654314114404, + "loss": 0.9498, + "step": 11405 + }, + { + "epoch": 0.16, + "grad_norm": 0.5625, + "learning_rate": 0.00019754102829774118, + "loss": 1.0059, + "step": 11410 + }, + { + "epoch": 0.16, + "grad_norm": 0.51953125, + "learning_rate": 0.00019753550734035558, + "loss": 0.8315, + "step": 11415 + }, + { + "epoch": 0.16, + "grad_norm": 0.55078125, + "learning_rate": 0.00019752998026933326, + "loss": 1.0347, + "step": 11420 + }, + { + "epoch": 0.16, + "grad_norm": 0.53515625, + "learning_rate": 0.00019752444708502073, + "loss": 0.9961, + "step": 11425 + }, + { + "epoch": 0.16, + "grad_norm": 0.546875, + "learning_rate": 0.0001975189077877648, + "loss": 1.0512, + "step": 11430 + }, + { + "epoch": 0.16, + "grad_norm": 0.69140625, + "learning_rate": 0.00019751336237791263, + "loss": 1.0064, + "step": 11435 + }, + { + "epoch": 0.16, + "grad_norm": 0.6015625, + "learning_rate": 0.0001975078108558119, + "loss": 0.987, + "step": 11440 + }, + { + "epoch": 0.16, + "grad_norm": 0.55078125, + "learning_rate": 0.0001975022532218105, + "loss": 1.0732, + "step": 11445 + }, + { + "epoch": 0.16, + "grad_norm": 0.5390625, + "learning_rate": 0.00019749668947625682, + "loss": 0.9559, + "step": 11450 + }, + { + "epoch": 0.16, + "grad_norm": 0.5546875, + "learning_rate": 0.0001974911196194996, + "loss": 0.9522, + "step": 11455 + }, + { + "epoch": 0.16, + "grad_norm": 0.69921875, + "learning_rate": 0.00019748554365188798, + "loss": 0.9707, + "step": 11460 + }, + { + "epoch": 0.16, + "grad_norm": 0.55859375, + "learning_rate": 0.00019747996157377148, + "loss": 0.8298, + "step": 11465 + }, + { + "epoch": 0.16, + "grad_norm": 0.5859375, + "learning_rate": 0.00019747437338549993, + "loss": 0.9397, + "step": 11470 + }, + { + "epoch": 0.16, + "grad_norm": 0.5546875, + "learning_rate": 0.00019746877908742363, + "loss": 0.8971, + "step": 11475 + }, + { + "epoch": 0.16, + "grad_norm": 0.58203125, + "learning_rate": 0.00019746317867989327, + "loss": 1.0147, + "step": 11480 + }, + { + "epoch": 0.16, + "grad_norm": 0.5390625, + "learning_rate": 0.0001974575721632599, + "loss": 0.9297, + "step": 11485 + }, + { + "epoch": 0.16, + "grad_norm": 0.53515625, + "learning_rate": 0.00019745195953787487, + "loss": 0.9276, + "step": 11490 + }, + { + "epoch": 0.16, + "grad_norm": 0.52734375, + "learning_rate": 0.00019744634080409005, + "loss": 0.8326, + "step": 11495 + }, + { + "epoch": 0.16, + "grad_norm": 0.498046875, + "learning_rate": 0.00019744071596225759, + "loss": 0.9892, + "step": 11500 + }, + { + "epoch": 0.17, + "grad_norm": 0.56640625, + "learning_rate": 0.0001974350850127301, + "loss": 1.118, + "step": 11505 + }, + { + "epoch": 0.17, + "grad_norm": 0.56640625, + "learning_rate": 0.0001974294479558605, + "loss": 0.8706, + "step": 11510 + }, + { + "epoch": 0.17, + "grad_norm": 0.52734375, + "learning_rate": 0.00019742380479200212, + "loss": 0.9876, + "step": 11515 + }, + { + "epoch": 0.17, + "grad_norm": 0.6328125, + "learning_rate": 0.00019741815552150874, + "loss": 1.0086, + "step": 11520 + }, + { + "epoch": 0.17, + "grad_norm": 0.5546875, + "learning_rate": 0.0001974125001447344, + "loss": 0.8989, + "step": 11525 + }, + { + "epoch": 0.17, + "grad_norm": 0.58203125, + "learning_rate": 0.0001974068386620336, + "loss": 0.7927, + "step": 11530 + }, + { + "epoch": 0.17, + "grad_norm": 0.5703125, + "learning_rate": 0.0001974011710737612, + "loss": 1.1665, + "step": 11535 + }, + { + "epoch": 0.17, + "grad_norm": 0.6640625, + "learning_rate": 0.00019739549738027252, + "loss": 0.921, + "step": 11540 + }, + { + "epoch": 0.17, + "grad_norm": 0.6328125, + "learning_rate": 0.0001973898175819231, + "loss": 1.1424, + "step": 11545 + }, + { + "epoch": 0.17, + "grad_norm": 0.482421875, + "learning_rate": 0.000197384131679069, + "loss": 1.1059, + "step": 11550 + }, + { + "epoch": 0.17, + "grad_norm": 0.56640625, + "learning_rate": 0.0001973784396720666, + "loss": 1.1901, + "step": 11555 + }, + { + "epoch": 0.17, + "grad_norm": 0.64453125, + "learning_rate": 0.00019737274156127272, + "loss": 0.9919, + "step": 11560 + }, + { + "epoch": 0.17, + "grad_norm": 0.53125, + "learning_rate": 0.0001973670373470445, + "loss": 0.9388, + "step": 11565 + }, + { + "epoch": 0.17, + "grad_norm": 0.51953125, + "learning_rate": 0.0001973613270297395, + "loss": 1.0375, + "step": 11570 + }, + { + "epoch": 0.17, + "grad_norm": 0.625, + "learning_rate": 0.00019735561060971565, + "loss": 0.942, + "step": 11575 + }, + { + "epoch": 0.17, + "grad_norm": 0.55078125, + "learning_rate": 0.0001973498880873312, + "loss": 1.0162, + "step": 11580 + }, + { + "epoch": 0.17, + "grad_norm": 0.486328125, + "learning_rate": 0.00019734415946294492, + "loss": 1.0539, + "step": 11585 + }, + { + "epoch": 0.17, + "grad_norm": 0.60546875, + "learning_rate": 0.00019733842473691583, + "loss": 1.2076, + "step": 11590 + }, + { + "epoch": 0.17, + "grad_norm": 0.498046875, + "learning_rate": 0.00019733268390960344, + "loss": 0.9424, + "step": 11595 + }, + { + "epoch": 0.17, + "grad_norm": 0.62109375, + "learning_rate": 0.00019732693698136758, + "loss": 0.8787, + "step": 11600 + }, + { + "epoch": 0.17, + "grad_norm": 0.546875, + "learning_rate": 0.00019732118395256846, + "loss": 1.0662, + "step": 11605 + }, + { + "epoch": 0.17, + "grad_norm": 0.59375, + "learning_rate": 0.0001973154248235667, + "loss": 1.0387, + "step": 11610 + }, + { + "epoch": 0.17, + "grad_norm": 0.578125, + "learning_rate": 0.00019730965959472325, + "loss": 1.0715, + "step": 11615 + }, + { + "epoch": 0.17, + "grad_norm": 0.6484375, + "learning_rate": 0.00019730388826639953, + "loss": 0.9547, + "step": 11620 + }, + { + "epoch": 0.17, + "grad_norm": 0.51171875, + "learning_rate": 0.00019729811083895723, + "loss": 0.8558, + "step": 11625 + }, + { + "epoch": 0.17, + "grad_norm": 0.55078125, + "learning_rate": 0.00019729232731275858, + "loss": 0.8651, + "step": 11630 + }, + { + "epoch": 0.17, + "grad_norm": 0.60546875, + "learning_rate": 0.00019728653768816604, + "loss": 1.0039, + "step": 11635 + }, + { + "epoch": 0.17, + "grad_norm": 0.51953125, + "learning_rate": 0.0001972807419655425, + "loss": 0.8732, + "step": 11640 + }, + { + "epoch": 0.17, + "grad_norm": 0.52734375, + "learning_rate": 0.0001972749401452513, + "loss": 1.0182, + "step": 11645 + }, + { + "epoch": 0.17, + "grad_norm": 0.60546875, + "learning_rate": 0.00019726913222765601, + "loss": 0.9004, + "step": 11650 + }, + { + "epoch": 0.17, + "grad_norm": 0.59375, + "learning_rate": 0.00019726331821312077, + "loss": 1.0023, + "step": 11655 + }, + { + "epoch": 0.17, + "grad_norm": 0.5078125, + "learning_rate": 0.00019725749810200994, + "loss": 0.8979, + "step": 11660 + }, + { + "epoch": 0.17, + "grad_norm": 0.625, + "learning_rate": 0.00019725167189468842, + "loss": 1.0709, + "step": 11665 + }, + { + "epoch": 0.17, + "grad_norm": 0.6015625, + "learning_rate": 0.00019724583959152128, + "loss": 1.1703, + "step": 11670 + }, + { + "epoch": 0.17, + "grad_norm": 0.54296875, + "learning_rate": 0.0001972400011928742, + "loss": 1.0387, + "step": 11675 + }, + { + "epoch": 0.17, + "grad_norm": 0.58203125, + "learning_rate": 0.00019723415669911307, + "loss": 1.0629, + "step": 11680 + }, + { + "epoch": 0.17, + "grad_norm": 0.62890625, + "learning_rate": 0.0001972283061106043, + "loss": 0.961, + "step": 11685 + }, + { + "epoch": 0.17, + "grad_norm": 0.52734375, + "learning_rate": 0.00019722244942771452, + "loss": 1.0338, + "step": 11690 + }, + { + "epoch": 0.17, + "grad_norm": 0.62109375, + "learning_rate": 0.0001972165866508109, + "loss": 0.9863, + "step": 11695 + }, + { + "epoch": 0.17, + "grad_norm": 0.55859375, + "learning_rate": 0.0001972107177802609, + "loss": 0.982, + "step": 11700 + }, + { + "epoch": 0.17, + "grad_norm": 0.71484375, + "learning_rate": 0.0001972048428164324, + "loss": 1.0622, + "step": 11705 + }, + { + "epoch": 0.17, + "grad_norm": 0.546875, + "learning_rate": 0.00019719896175969369, + "loss": 0.9321, + "step": 11710 + }, + { + "epoch": 0.17, + "grad_norm": 0.60546875, + "learning_rate": 0.00019719307461041333, + "loss": 0.9296, + "step": 11715 + }, + { + "epoch": 0.17, + "grad_norm": 0.5859375, + "learning_rate": 0.00019718718136896033, + "loss": 0.9618, + "step": 11720 + }, + { + "epoch": 0.17, + "grad_norm": 0.61328125, + "learning_rate": 0.00019718128203570414, + "loss": 1.0673, + "step": 11725 + }, + { + "epoch": 0.17, + "grad_norm": 0.57421875, + "learning_rate": 0.0001971753766110145, + "loss": 0.816, + "step": 11730 + }, + { + "epoch": 0.17, + "grad_norm": 0.71484375, + "learning_rate": 0.00019716946509526156, + "loss": 0.9263, + "step": 11735 + }, + { + "epoch": 0.17, + "grad_norm": 0.62109375, + "learning_rate": 0.00019716354748881593, + "loss": 1.0468, + "step": 11740 + }, + { + "epoch": 0.17, + "grad_norm": 0.51953125, + "learning_rate": 0.00019715762379204845, + "loss": 0.867, + "step": 11745 + }, + { + "epoch": 0.17, + "grad_norm": 0.5703125, + "learning_rate": 0.00019715169400533048, + "loss": 1.0149, + "step": 11750 + }, + { + "epoch": 0.17, + "grad_norm": 0.58984375, + "learning_rate": 0.0001971457581290337, + "loss": 0.7832, + "step": 11755 + }, + { + "epoch": 0.17, + "grad_norm": 0.55078125, + "learning_rate": 0.0001971398161635301, + "loss": 1.0403, + "step": 11760 + }, + { + "epoch": 0.17, + "grad_norm": 0.44921875, + "learning_rate": 0.00019713386810919222, + "loss": 1.0341, + "step": 11765 + }, + { + "epoch": 0.17, + "grad_norm": 0.546875, + "learning_rate": 0.00019712791396639287, + "loss": 0.9868, + "step": 11770 + }, + { + "epoch": 0.17, + "grad_norm": 0.62890625, + "learning_rate": 0.00019712195373550526, + "loss": 1.0789, + "step": 11775 + }, + { + "epoch": 0.17, + "grad_norm": 0.62109375, + "learning_rate": 0.00019711598741690295, + "loss": 1.1115, + "step": 11780 + }, + { + "epoch": 0.17, + "grad_norm": 0.61328125, + "learning_rate": 0.00019711001501096, + "loss": 1.047, + "step": 11785 + }, + { + "epoch": 0.17, + "grad_norm": 0.50390625, + "learning_rate": 0.00019710403651805063, + "loss": 1.0612, + "step": 11790 + }, + { + "epoch": 0.17, + "grad_norm": 0.65234375, + "learning_rate": 0.0001970980519385497, + "loss": 1.044, + "step": 11795 + }, + { + "epoch": 0.17, + "grad_norm": 0.62109375, + "learning_rate": 0.0001970920612728323, + "loss": 1.0137, + "step": 11800 + }, + { + "epoch": 0.17, + "grad_norm": 0.6796875, + "learning_rate": 0.0001970860645212739, + "loss": 1.017, + "step": 11805 + }, + { + "epoch": 0.17, + "grad_norm": 0.6171875, + "learning_rate": 0.0001970800616842504, + "loss": 1.0174, + "step": 11810 + }, + { + "epoch": 0.17, + "grad_norm": 0.578125, + "learning_rate": 0.00019707405276213807, + "loss": 1.0831, + "step": 11815 + }, + { + "epoch": 0.17, + "grad_norm": 0.51171875, + "learning_rate": 0.00019706803775531358, + "loss": 1.0993, + "step": 11820 + }, + { + "epoch": 0.17, + "grad_norm": 0.5546875, + "learning_rate": 0.0001970620166641539, + "loss": 1.0144, + "step": 11825 + }, + { + "epoch": 0.17, + "grad_norm": 0.439453125, + "learning_rate": 0.00019705598948903649, + "loss": 0.8345, + "step": 11830 + }, + { + "epoch": 0.17, + "grad_norm": 0.5546875, + "learning_rate": 0.00019704995623033914, + "loss": 0.954, + "step": 11835 + }, + { + "epoch": 0.17, + "grad_norm": 0.51953125, + "learning_rate": 0.00019704391688843997, + "loss": 0.9742, + "step": 11840 + }, + { + "epoch": 0.17, + "grad_norm": 0.55859375, + "learning_rate": 0.00019703787146371757, + "loss": 0.858, + "step": 11845 + }, + { + "epoch": 0.17, + "grad_norm": 0.5390625, + "learning_rate": 0.0001970318199565509, + "loss": 0.9053, + "step": 11850 + }, + { + "epoch": 0.17, + "grad_norm": 0.58984375, + "learning_rate": 0.0001970257623673192, + "loss": 0.9909, + "step": 11855 + }, + { + "epoch": 0.17, + "grad_norm": 0.66015625, + "learning_rate": 0.0001970196986964022, + "loss": 1.0612, + "step": 11860 + }, + { + "epoch": 0.17, + "grad_norm": 0.55078125, + "learning_rate": 0.00019701362894418005, + "loss": 0.9934, + "step": 11865 + }, + { + "epoch": 0.17, + "grad_norm": 0.52734375, + "learning_rate": 0.00019700755311103315, + "loss": 0.9206, + "step": 11870 + }, + { + "epoch": 0.17, + "grad_norm": 0.5, + "learning_rate": 0.0001970014711973423, + "loss": 0.8881, + "step": 11875 + }, + { + "epoch": 0.17, + "grad_norm": 0.625, + "learning_rate": 0.00019699538320348876, + "loss": 0.99, + "step": 11880 + }, + { + "epoch": 0.17, + "grad_norm": 0.65234375, + "learning_rate": 0.00019698928912985415, + "loss": 1.1218, + "step": 11885 + }, + { + "epoch": 0.17, + "grad_norm": 0.58984375, + "learning_rate": 0.00019698318897682043, + "loss": 0.842, + "step": 11890 + }, + { + "epoch": 0.17, + "grad_norm": 0.734375, + "learning_rate": 0.00019697708274476995, + "loss": 0.9669, + "step": 11895 + }, + { + "epoch": 0.17, + "grad_norm": 0.5546875, + "learning_rate": 0.0001969709704340855, + "loss": 1.0681, + "step": 11900 + }, + { + "epoch": 0.17, + "grad_norm": 0.54296875, + "learning_rate": 0.0001969648520451502, + "loss": 0.9509, + "step": 11905 + }, + { + "epoch": 0.17, + "grad_norm": 0.5390625, + "learning_rate": 0.0001969587275783475, + "loss": 0.9891, + "step": 11910 + }, + { + "epoch": 0.17, + "grad_norm": 0.55859375, + "learning_rate": 0.00019695259703406134, + "loss": 0.9673, + "step": 11915 + }, + { + "epoch": 0.17, + "grad_norm": 0.5625, + "learning_rate": 0.00019694646041267598, + "loss": 1.0024, + "step": 11920 + }, + { + "epoch": 0.17, + "grad_norm": 0.53515625, + "learning_rate": 0.00019694031771457608, + "loss": 1.1921, + "step": 11925 + }, + { + "epoch": 0.17, + "grad_norm": 0.4765625, + "learning_rate": 0.00019693416894014666, + "loss": 0.9211, + "step": 11930 + }, + { + "epoch": 0.17, + "grad_norm": 0.55859375, + "learning_rate": 0.00019692801408977313, + "loss": 0.9258, + "step": 11935 + }, + { + "epoch": 0.17, + "grad_norm": 0.55078125, + "learning_rate": 0.00019692185316384127, + "loss": 1.2574, + "step": 11940 + }, + { + "epoch": 0.17, + "grad_norm": 0.62890625, + "learning_rate": 0.0001969156861627373, + "loss": 1.0631, + "step": 11945 + }, + { + "epoch": 0.17, + "grad_norm": 0.5, + "learning_rate": 0.0001969095130868477, + "loss": 0.9337, + "step": 11950 + }, + { + "epoch": 0.17, + "grad_norm": 0.66796875, + "learning_rate": 0.00019690333393655948, + "loss": 1.0891, + "step": 11955 + }, + { + "epoch": 0.17, + "grad_norm": 0.5546875, + "learning_rate": 0.0001968971487122599, + "loss": 0.8698, + "step": 11960 + }, + { + "epoch": 0.17, + "grad_norm": 0.59765625, + "learning_rate": 0.00019689095741433672, + "loss": 1.0847, + "step": 11965 + }, + { + "epoch": 0.17, + "grad_norm": 0.5859375, + "learning_rate": 0.00019688476004317795, + "loss": 1.0954, + "step": 11970 + }, + { + "epoch": 0.17, + "grad_norm": 0.59375, + "learning_rate": 0.00019687855659917207, + "loss": 1.0362, + "step": 11975 + }, + { + "epoch": 0.17, + "grad_norm": 0.7109375, + "learning_rate": 0.00019687234708270793, + "loss": 0.9058, + "step": 11980 + }, + { + "epoch": 0.17, + "grad_norm": 0.50390625, + "learning_rate": 0.00019686613149417478, + "loss": 0.9234, + "step": 11985 + }, + { + "epoch": 0.17, + "grad_norm": 0.56640625, + "learning_rate": 0.00019685990983396215, + "loss": 0.9797, + "step": 11990 + }, + { + "epoch": 0.17, + "grad_norm": 0.5859375, + "learning_rate": 0.00019685368210246008, + "loss": 0.9666, + "step": 11995 + }, + { + "epoch": 0.17, + "grad_norm": 0.578125, + "learning_rate": 0.00019684744830005893, + "loss": 0.9774, + "step": 12000 + }, + { + "epoch": 0.17, + "grad_norm": 0.5625, + "learning_rate": 0.00019684120842714938, + "loss": 1.1606, + "step": 12005 + }, + { + "epoch": 0.17, + "grad_norm": 0.51171875, + "learning_rate": 0.0001968349624841226, + "loss": 0.9442, + "step": 12010 + }, + { + "epoch": 0.17, + "grad_norm": 0.53515625, + "learning_rate": 0.0001968287104713701, + "loss": 1.0067, + "step": 12015 + }, + { + "epoch": 0.17, + "grad_norm": 0.5078125, + "learning_rate": 0.00019682245238928373, + "loss": 1.0373, + "step": 12020 + }, + { + "epoch": 0.17, + "grad_norm": 0.609375, + "learning_rate": 0.00019681618823825581, + "loss": 1.0156, + "step": 12025 + }, + { + "epoch": 0.17, + "grad_norm": 0.53125, + "learning_rate": 0.00019680991801867892, + "loss": 0.9226, + "step": 12030 + }, + { + "epoch": 0.17, + "grad_norm": 0.54296875, + "learning_rate": 0.00019680364173094609, + "loss": 0.9687, + "step": 12035 + }, + { + "epoch": 0.17, + "grad_norm": 0.5546875, + "learning_rate": 0.00019679735937545075, + "loss": 0.9303, + "step": 12040 + }, + { + "epoch": 0.17, + "grad_norm": 0.59765625, + "learning_rate": 0.00019679107095258672, + "loss": 0.9594, + "step": 12045 + }, + { + "epoch": 0.17, + "grad_norm": 0.59765625, + "learning_rate": 0.0001967847764627481, + "loss": 0.9388, + "step": 12050 + }, + { + "epoch": 0.17, + "grad_norm": 0.53515625, + "learning_rate": 0.00019677847590632947, + "loss": 0.8384, + "step": 12055 + }, + { + "epoch": 0.17, + "grad_norm": 0.5625, + "learning_rate": 0.00019677216928372574, + "loss": 1.0197, + "step": 12060 + }, + { + "epoch": 0.17, + "grad_norm": 0.53515625, + "learning_rate": 0.00019676585659533224, + "loss": 0.9054, + "step": 12065 + }, + { + "epoch": 0.17, + "grad_norm": 0.4765625, + "learning_rate": 0.00019675953784154465, + "loss": 0.8919, + "step": 12070 + }, + { + "epoch": 0.17, + "grad_norm": 0.609375, + "learning_rate": 0.000196753213022759, + "loss": 0.9564, + "step": 12075 + }, + { + "epoch": 0.17, + "grad_norm": 0.6875, + "learning_rate": 0.00019674688213937178, + "loss": 0.9152, + "step": 12080 + }, + { + "epoch": 0.17, + "grad_norm": 0.58203125, + "learning_rate": 0.00019674054519177981, + "loss": 1.0695, + "step": 12085 + }, + { + "epoch": 0.17, + "grad_norm": 0.5703125, + "learning_rate": 0.00019673420218038027, + "loss": 1.0131, + "step": 12090 + }, + { + "epoch": 0.17, + "grad_norm": 0.51953125, + "learning_rate": 0.00019672785310557078, + "loss": 1.0804, + "step": 12095 + }, + { + "epoch": 0.17, + "grad_norm": 0.5703125, + "learning_rate": 0.00019672149796774927, + "loss": 0.9286, + "step": 12100 + }, + { + "epoch": 0.17, + "grad_norm": 0.56640625, + "learning_rate": 0.00019671513676731414, + "loss": 0.992, + "step": 12105 + }, + { + "epoch": 0.17, + "grad_norm": 0.57421875, + "learning_rate": 0.0001967087695046641, + "loss": 1.0514, + "step": 12110 + }, + { + "epoch": 0.17, + "grad_norm": 0.58984375, + "learning_rate": 0.0001967023961801982, + "loss": 1.1135, + "step": 12115 + }, + { + "epoch": 0.17, + "grad_norm": 0.46875, + "learning_rate": 0.000196696016794316, + "loss": 0.8374, + "step": 12120 + }, + { + "epoch": 0.17, + "grad_norm": 0.6015625, + "learning_rate": 0.00019668963134741733, + "loss": 1.0395, + "step": 12125 + }, + { + "epoch": 0.17, + "grad_norm": 0.51171875, + "learning_rate": 0.00019668323983990246, + "loss": 0.776, + "step": 12130 + }, + { + "epoch": 0.17, + "grad_norm": 0.57421875, + "learning_rate": 0.000196676842272172, + "loss": 0.9524, + "step": 12135 + }, + { + "epoch": 0.17, + "grad_norm": 0.6328125, + "learning_rate": 0.00019667043864462694, + "loss": 0.8601, + "step": 12140 + }, + { + "epoch": 0.17, + "grad_norm": 0.58203125, + "learning_rate": 0.00019666402895766871, + "loss": 0.98, + "step": 12145 + }, + { + "epoch": 0.17, + "grad_norm": 0.546875, + "learning_rate": 0.00019665761321169907, + "loss": 0.921, + "step": 12150 + }, + { + "epoch": 0.17, + "grad_norm": 0.455078125, + "learning_rate": 0.0001966511914071201, + "loss": 0.9572, + "step": 12155 + }, + { + "epoch": 0.17, + "grad_norm": 0.54296875, + "learning_rate": 0.00019664476354433444, + "loss": 0.9071, + "step": 12160 + }, + { + "epoch": 0.17, + "grad_norm": 0.5546875, + "learning_rate": 0.00019663832962374487, + "loss": 0.9123, + "step": 12165 + }, + { + "epoch": 0.17, + "grad_norm": 0.5546875, + "learning_rate": 0.00019663188964575475, + "loss": 0.8975, + "step": 12170 + }, + { + "epoch": 0.17, + "grad_norm": 0.5, + "learning_rate": 0.00019662544361076774, + "loss": 0.9547, + "step": 12175 + }, + { + "epoch": 0.17, + "grad_norm": 0.59765625, + "learning_rate": 0.0001966189915191879, + "loss": 1.0201, + "step": 12180 + }, + { + "epoch": 0.17, + "grad_norm": 0.59375, + "learning_rate": 0.00019661253337141963, + "loss": 0.985, + "step": 12185 + }, + { + "epoch": 0.17, + "grad_norm": 0.9765625, + "learning_rate": 0.00019660606916786772, + "loss": 0.9262, + "step": 12190 + }, + { + "epoch": 0.17, + "grad_norm": 0.57421875, + "learning_rate": 0.00019659959890893736, + "loss": 0.9385, + "step": 12195 + }, + { + "epoch": 0.18, + "grad_norm": 0.53515625, + "learning_rate": 0.00019659312259503412, + "loss": 0.9243, + "step": 12200 + }, + { + "epoch": 0.18, + "grad_norm": 0.546875, + "learning_rate": 0.00019658664022656396, + "loss": 0.9716, + "step": 12205 + }, + { + "epoch": 0.18, + "grad_norm": 0.6015625, + "learning_rate": 0.00019658015180393318, + "loss": 1.0187, + "step": 12210 + }, + { + "epoch": 0.18, + "grad_norm": 0.52734375, + "learning_rate": 0.0001965736573275485, + "loss": 0.9634, + "step": 12215 + }, + { + "epoch": 0.18, + "grad_norm": 0.5546875, + "learning_rate": 0.00019656715679781698, + "loss": 0.9778, + "step": 12220 + }, + { + "epoch": 0.18, + "grad_norm": 0.5390625, + "learning_rate": 0.0001965606502151461, + "loss": 0.8875, + "step": 12225 + }, + { + "epoch": 0.18, + "grad_norm": 0.6015625, + "learning_rate": 0.0001965541375799437, + "loss": 0.9571, + "step": 12230 + }, + { + "epoch": 0.18, + "grad_norm": 0.58203125, + "learning_rate": 0.000196547618892618, + "loss": 0.9802, + "step": 12235 + }, + { + "epoch": 0.18, + "grad_norm": 0.66015625, + "learning_rate": 0.00019654109415357754, + "loss": 1.049, + "step": 12240 + }, + { + "epoch": 0.18, + "grad_norm": 0.61328125, + "learning_rate": 0.0001965345633632314, + "loss": 1.0749, + "step": 12245 + }, + { + "epoch": 0.18, + "grad_norm": 0.5703125, + "learning_rate": 0.00019652802652198886, + "loss": 0.8459, + "step": 12250 + }, + { + "epoch": 0.18, + "grad_norm": 0.578125, + "learning_rate": 0.00019652148363025966, + "loss": 1.0139, + "step": 12255 + }, + { + "epoch": 0.18, + "grad_norm": 0.48828125, + "learning_rate": 0.00019651493468845398, + "loss": 0.8877, + "step": 12260 + }, + { + "epoch": 0.18, + "grad_norm": 0.60546875, + "learning_rate": 0.00019650837969698224, + "loss": 0.9687, + "step": 12265 + }, + { + "epoch": 0.18, + "grad_norm": 0.60546875, + "learning_rate": 0.00019650181865625536, + "loss": 0.9932, + "step": 12270 + }, + { + "epoch": 0.18, + "grad_norm": 0.53125, + "learning_rate": 0.0001964952515666846, + "loss": 0.886, + "step": 12275 + }, + { + "epoch": 0.18, + "grad_norm": 0.66015625, + "learning_rate": 0.00019648867842868154, + "loss": 0.963, + "step": 12280 + }, + { + "epoch": 0.18, + "grad_norm": 0.6171875, + "learning_rate": 0.00019648209924265825, + "loss": 1.1426, + "step": 12285 + }, + { + "epoch": 0.18, + "grad_norm": 0.578125, + "learning_rate": 0.00019647551400902704, + "loss": 0.9924, + "step": 12290 + }, + { + "epoch": 0.18, + "grad_norm": 0.5625, + "learning_rate": 0.00019646892272820078, + "loss": 1.0424, + "step": 12295 + }, + { + "epoch": 0.18, + "grad_norm": 0.56640625, + "learning_rate": 0.00019646232540059257, + "loss": 1.1567, + "step": 12300 + }, + { + "epoch": 0.18, + "grad_norm": 0.54296875, + "learning_rate": 0.00019645572202661596, + "loss": 1.0748, + "step": 12305 + }, + { + "epoch": 0.18, + "grad_norm": 0.5546875, + "learning_rate": 0.00019644911260668482, + "loss": 0.9622, + "step": 12310 + }, + { + "epoch": 0.18, + "grad_norm": 0.5390625, + "learning_rate": 0.00019644249714121346, + "loss": 0.9486, + "step": 12315 + }, + { + "epoch": 0.18, + "grad_norm": 0.546875, + "learning_rate": 0.00019643587563061653, + "loss": 0.9666, + "step": 12320 + }, + { + "epoch": 0.18, + "grad_norm": 0.58984375, + "learning_rate": 0.0001964292480753091, + "loss": 1.1216, + "step": 12325 + }, + { + "epoch": 0.18, + "grad_norm": 0.6171875, + "learning_rate": 0.00019642261447570656, + "loss": 1.001, + "step": 12330 + }, + { + "epoch": 0.18, + "grad_norm": 0.5703125, + "learning_rate": 0.00019641597483222476, + "loss": 0.9032, + "step": 12335 + }, + { + "epoch": 0.18, + "grad_norm": 0.5234375, + "learning_rate": 0.0001964093291452798, + "loss": 0.863, + "step": 12340 + }, + { + "epoch": 0.18, + "grad_norm": 0.53125, + "learning_rate": 0.00019640267741528834, + "loss": 1.0108, + "step": 12345 + }, + { + "epoch": 0.18, + "grad_norm": 0.5234375, + "learning_rate": 0.00019639601964266726, + "loss": 1.112, + "step": 12350 + }, + { + "epoch": 0.18, + "grad_norm": 0.625, + "learning_rate": 0.00019638935582783385, + "loss": 0.9948, + "step": 12355 + }, + { + "epoch": 0.18, + "grad_norm": 0.55078125, + "learning_rate": 0.00019638268597120585, + "loss": 0.9937, + "step": 12360 + }, + { + "epoch": 0.18, + "grad_norm": 0.53515625, + "learning_rate": 0.0001963760100732013, + "loss": 1.0482, + "step": 12365 + }, + { + "epoch": 0.18, + "grad_norm": 0.58203125, + "learning_rate": 0.00019636932813423875, + "loss": 0.9995, + "step": 12370 + }, + { + "epoch": 0.18, + "grad_norm": 0.478515625, + "learning_rate": 0.0001963626401547369, + "loss": 0.9609, + "step": 12375 + }, + { + "epoch": 0.18, + "grad_norm": 0.5390625, + "learning_rate": 0.000196355946135115, + "loss": 1.0666, + "step": 12380 + }, + { + "epoch": 0.18, + "grad_norm": 0.515625, + "learning_rate": 0.0001963492460757927, + "loss": 0.8948, + "step": 12385 + }, + { + "epoch": 0.18, + "grad_norm": 0.66796875, + "learning_rate": 0.0001963425399771899, + "loss": 0.9701, + "step": 12390 + }, + { + "epoch": 0.18, + "grad_norm": 0.66015625, + "learning_rate": 0.00019633582783972698, + "loss": 0.9144, + "step": 12395 + }, + { + "epoch": 0.18, + "grad_norm": 0.58203125, + "learning_rate": 0.00019632910966382465, + "loss": 0.9507, + "step": 12400 + }, + { + "epoch": 0.18, + "grad_norm": 0.6328125, + "learning_rate": 0.00019632238544990403, + "loss": 1.0839, + "step": 12405 + }, + { + "epoch": 0.18, + "grad_norm": 0.578125, + "learning_rate": 0.00019631565519838656, + "loss": 1.1661, + "step": 12410 + }, + { + "epoch": 0.18, + "grad_norm": 0.52734375, + "learning_rate": 0.00019630891890969415, + "loss": 1.0304, + "step": 12415 + }, + { + "epoch": 0.18, + "grad_norm": 0.546875, + "learning_rate": 0.000196302176584249, + "loss": 0.8476, + "step": 12420 + }, + { + "epoch": 0.18, + "grad_norm": 0.65625, + "learning_rate": 0.00019629542822247373, + "loss": 1.0206, + "step": 12425 + }, + { + "epoch": 0.18, + "grad_norm": 0.57421875, + "learning_rate": 0.00019628867382479138, + "loss": 0.9953, + "step": 12430 + }, + { + "epoch": 0.18, + "grad_norm": 0.5625, + "learning_rate": 0.0001962819133916253, + "loss": 0.9312, + "step": 12435 + }, + { + "epoch": 0.18, + "grad_norm": 0.61328125, + "learning_rate": 0.0001962751469233992, + "loss": 1.1058, + "step": 12440 + }, + { + "epoch": 0.18, + "grad_norm": 0.57421875, + "learning_rate": 0.00019626837442053726, + "loss": 0.9904, + "step": 12445 + }, + { + "epoch": 0.18, + "grad_norm": 0.56640625, + "learning_rate": 0.00019626159588346392, + "loss": 0.8433, + "step": 12450 + }, + { + "epoch": 0.18, + "grad_norm": 0.52734375, + "learning_rate": 0.00019625481131260418, + "loss": 0.9796, + "step": 12455 + }, + { + "epoch": 0.18, + "grad_norm": 0.62890625, + "learning_rate": 0.00019624802070838325, + "loss": 0.9605, + "step": 12460 + }, + { + "epoch": 0.18, + "grad_norm": 0.546875, + "learning_rate": 0.0001962412240712267, + "loss": 1.0074, + "step": 12465 + }, + { + "epoch": 0.18, + "grad_norm": 0.546875, + "learning_rate": 0.00019623442140156066, + "loss": 0.9153, + "step": 12470 + }, + { + "epoch": 0.18, + "grad_norm": 0.5859375, + "learning_rate": 0.0001962276126998115, + "loss": 0.9167, + "step": 12475 + }, + { + "epoch": 0.18, + "grad_norm": 0.66796875, + "learning_rate": 0.00019622079796640597, + "loss": 1.0444, + "step": 12480 + }, + { + "epoch": 0.18, + "grad_norm": 0.61328125, + "learning_rate": 0.0001962139772017712, + "loss": 1.0472, + "step": 12485 + }, + { + "epoch": 0.18, + "grad_norm": 0.5, + "learning_rate": 0.0001962071504063348, + "loss": 0.9135, + "step": 12490 + }, + { + "epoch": 0.18, + "grad_norm": 0.5546875, + "learning_rate": 0.00019620031758052465, + "loss": 1.1986, + "step": 12495 + }, + { + "epoch": 0.18, + "grad_norm": 0.515625, + "learning_rate": 0.000196193478724769, + "loss": 0.8138, + "step": 12500 + }, + { + "epoch": 0.18, + "grad_norm": 0.50390625, + "learning_rate": 0.00019618663383949656, + "loss": 0.8546, + "step": 12505 + }, + { + "epoch": 0.18, + "grad_norm": 0.56640625, + "learning_rate": 0.00019617978292513634, + "loss": 1.0783, + "step": 12510 + }, + { + "epoch": 0.18, + "grad_norm": 0.484375, + "learning_rate": 0.00019617292598211782, + "loss": 0.8674, + "step": 12515 + }, + { + "epoch": 0.18, + "grad_norm": 0.546875, + "learning_rate": 0.00019616606301087078, + "loss": 0.9813, + "step": 12520 + }, + { + "epoch": 0.18, + "grad_norm": 0.74609375, + "learning_rate": 0.00019615919401182535, + "loss": 0.9427, + "step": 12525 + }, + { + "epoch": 0.18, + "grad_norm": 0.5390625, + "learning_rate": 0.00019615231898541214, + "loss": 1.0899, + "step": 12530 + }, + { + "epoch": 0.18, + "grad_norm": 0.6171875, + "learning_rate": 0.00019614543793206206, + "loss": 1.0309, + "step": 12535 + }, + { + "epoch": 0.18, + "grad_norm": 0.56640625, + "learning_rate": 0.00019613855085220646, + "loss": 1.0088, + "step": 12540 + }, + { + "epoch": 0.18, + "grad_norm": 0.64453125, + "learning_rate": 0.00019613165774627696, + "loss": 0.92, + "step": 12545 + }, + { + "epoch": 0.18, + "grad_norm": 0.58984375, + "learning_rate": 0.0001961247586147057, + "loss": 0.8939, + "step": 12550 + }, + { + "epoch": 0.18, + "grad_norm": 0.55078125, + "learning_rate": 0.00019611785345792507, + "loss": 0.9448, + "step": 12555 + }, + { + "epoch": 0.18, + "grad_norm": 0.5234375, + "learning_rate": 0.0001961109422763679, + "loss": 0.9925, + "step": 12560 + }, + { + "epoch": 0.18, + "grad_norm": 0.5, + "learning_rate": 0.00019610402507046744, + "loss": 1.0034, + "step": 12565 + }, + { + "epoch": 0.18, + "grad_norm": 0.62890625, + "learning_rate": 0.00019609710184065722, + "loss": 0.9819, + "step": 12570 + }, + { + "epoch": 0.18, + "grad_norm": 0.5390625, + "learning_rate": 0.0001960901725873712, + "loss": 0.9709, + "step": 12575 + }, + { + "epoch": 0.18, + "grad_norm": 0.6640625, + "learning_rate": 0.00019608323731104375, + "loss": 0.8864, + "step": 12580 + }, + { + "epoch": 0.18, + "grad_norm": 0.5625, + "learning_rate": 0.00019607629601210954, + "loss": 0.8654, + "step": 12585 + }, + { + "epoch": 0.18, + "grad_norm": 0.458984375, + "learning_rate": 0.0001960693486910037, + "loss": 0.9974, + "step": 12590 + }, + { + "epoch": 0.18, + "grad_norm": 0.59375, + "learning_rate": 0.00019606239534816165, + "loss": 0.9497, + "step": 12595 + }, + { + "epoch": 0.18, + "grad_norm": 0.6171875, + "learning_rate": 0.00019605543598401925, + "loss": 1.0166, + "step": 12600 + }, + { + "epoch": 0.18, + "grad_norm": 0.58984375, + "learning_rate": 0.0001960484705990127, + "loss": 0.8977, + "step": 12605 + }, + { + "epoch": 0.18, + "grad_norm": 0.61328125, + "learning_rate": 0.0001960414991935787, + "loss": 1.0628, + "step": 12610 + }, + { + "epoch": 0.18, + "grad_norm": 0.80859375, + "learning_rate": 0.00019603452176815406, + "loss": 1.0787, + "step": 12615 + }, + { + "epoch": 0.18, + "grad_norm": 0.62890625, + "learning_rate": 0.00019602753832317628, + "loss": 0.9722, + "step": 12620 + }, + { + "epoch": 0.18, + "grad_norm": 0.59375, + "learning_rate": 0.000196020548859083, + "loss": 0.9783, + "step": 12625 + }, + { + "epoch": 0.18, + "grad_norm": 0.62890625, + "learning_rate": 0.00019601355337631237, + "loss": 0.9863, + "step": 12630 + }, + { + "epoch": 0.18, + "grad_norm": 0.515625, + "learning_rate": 0.00019600655187530287, + "loss": 0.9108, + "step": 12635 + }, + { + "epoch": 0.18, + "grad_norm": 0.57421875, + "learning_rate": 0.00019599954435649337, + "loss": 0.925, + "step": 12640 + }, + { + "epoch": 0.18, + "grad_norm": 0.58984375, + "learning_rate": 0.00019599253082032308, + "loss": 0.9331, + "step": 12645 + }, + { + "epoch": 0.18, + "grad_norm": 0.66796875, + "learning_rate": 0.00019598551126723162, + "loss": 1.043, + "step": 12650 + }, + { + "epoch": 0.18, + "grad_norm": 0.57421875, + "learning_rate": 0.000195978485697659, + "loss": 0.8841, + "step": 12655 + }, + { + "epoch": 0.18, + "grad_norm": 0.58984375, + "learning_rate": 0.0001959714541120456, + "loss": 0.9505, + "step": 12660 + }, + { + "epoch": 0.18, + "grad_norm": 0.546875, + "learning_rate": 0.00019596441651083214, + "loss": 0.9653, + "step": 12665 + }, + { + "epoch": 0.18, + "grad_norm": 0.5625, + "learning_rate": 0.00019595737289445977, + "loss": 0.9831, + "step": 12670 + }, + { + "epoch": 0.18, + "grad_norm": 0.59765625, + "learning_rate": 0.00019595032326336998, + "loss": 1.0024, + "step": 12675 + }, + { + "epoch": 0.18, + "grad_norm": 0.58984375, + "learning_rate": 0.00019594326761800462, + "loss": 1.0317, + "step": 12680 + }, + { + "epoch": 0.18, + "grad_norm": 0.50390625, + "learning_rate": 0.00019593620595880602, + "loss": 0.9532, + "step": 12685 + }, + { + "epoch": 0.18, + "grad_norm": 0.62890625, + "learning_rate": 0.00019592913828621672, + "loss": 1.076, + "step": 12690 + }, + { + "epoch": 0.18, + "grad_norm": 0.58984375, + "learning_rate": 0.00019592206460067983, + "loss": 0.862, + "step": 12695 + }, + { + "epoch": 0.18, + "grad_norm": 0.578125, + "learning_rate": 0.00019591498490263866, + "loss": 0.9048, + "step": 12700 + }, + { + "epoch": 0.18, + "grad_norm": 0.640625, + "learning_rate": 0.000195907899192537, + "loss": 0.9484, + "step": 12705 + }, + { + "epoch": 0.18, + "grad_norm": 0.55078125, + "learning_rate": 0.000195900807470819, + "loss": 0.9225, + "step": 12710 + }, + { + "epoch": 0.18, + "grad_norm": 0.5703125, + "learning_rate": 0.00019589370973792914, + "loss": 1.0204, + "step": 12715 + }, + { + "epoch": 0.18, + "grad_norm": 0.6015625, + "learning_rate": 0.00019588660599431234, + "loss": 0.9455, + "step": 12720 + }, + { + "epoch": 0.18, + "grad_norm": 0.5625, + "learning_rate": 0.0001958794962404139, + "loss": 0.8872, + "step": 12725 + }, + { + "epoch": 0.18, + "grad_norm": 0.5859375, + "learning_rate": 0.00019587238047667942, + "loss": 0.9301, + "step": 12730 + }, + { + "epoch": 0.18, + "grad_norm": 0.546875, + "learning_rate": 0.00019586525870355495, + "loss": 1.0429, + "step": 12735 + }, + { + "epoch": 0.18, + "grad_norm": 0.625, + "learning_rate": 0.00019585813092148688, + "loss": 1.094, + "step": 12740 + }, + { + "epoch": 0.18, + "grad_norm": 0.671875, + "learning_rate": 0.00019585099713092198, + "loss": 0.9153, + "step": 12745 + }, + { + "epoch": 0.18, + "grad_norm": 0.59375, + "learning_rate": 0.00019584385733230743, + "loss": 0.9439, + "step": 12750 + }, + { + "epoch": 0.18, + "grad_norm": 0.65234375, + "learning_rate": 0.00019583671152609075, + "loss": 1.0327, + "step": 12755 + }, + { + "epoch": 0.18, + "grad_norm": 0.54296875, + "learning_rate": 0.00019582955971271981, + "loss": 1.1026, + "step": 12760 + }, + { + "epoch": 0.18, + "grad_norm": 0.6640625, + "learning_rate": 0.00019582240189264295, + "loss": 1.0614, + "step": 12765 + }, + { + "epoch": 0.18, + "grad_norm": 0.51953125, + "learning_rate": 0.0001958152380663088, + "loss": 0.9044, + "step": 12770 + }, + { + "epoch": 0.18, + "grad_norm": 0.5078125, + "learning_rate": 0.00019580806823416642, + "loss": 0.9334, + "step": 12775 + }, + { + "epoch": 0.18, + "grad_norm": 0.56640625, + "learning_rate": 0.00019580089239666518, + "loss": 0.9532, + "step": 12780 + }, + { + "epoch": 0.18, + "grad_norm": 0.640625, + "learning_rate": 0.00019579371055425493, + "loss": 1.0215, + "step": 12785 + }, + { + "epoch": 0.18, + "grad_norm": 0.55078125, + "learning_rate": 0.00019578652270738577, + "loss": 0.9541, + "step": 12790 + }, + { + "epoch": 0.18, + "grad_norm": 0.60546875, + "learning_rate": 0.0001957793288565083, + "loss": 0.9159, + "step": 12795 + }, + { + "epoch": 0.18, + "grad_norm": 0.58203125, + "learning_rate": 0.0001957721290020734, + "loss": 1.0008, + "step": 12800 + }, + { + "epoch": 0.18, + "grad_norm": 0.60546875, + "learning_rate": 0.00019576492314453238, + "loss": 0.9768, + "step": 12805 + }, + { + "epoch": 0.18, + "grad_norm": 0.53125, + "learning_rate": 0.00019575771128433692, + "loss": 0.8682, + "step": 12810 + }, + { + "epoch": 0.18, + "grad_norm": 0.609375, + "learning_rate": 0.00019575049342193906, + "loss": 0.926, + "step": 12815 + }, + { + "epoch": 0.18, + "grad_norm": 0.54296875, + "learning_rate": 0.00019574326955779122, + "loss": 1.1345, + "step": 12820 + }, + { + "epoch": 0.18, + "grad_norm": 0.58203125, + "learning_rate": 0.00019573603969234618, + "loss": 0.8593, + "step": 12825 + }, + { + "epoch": 0.18, + "grad_norm": 0.5, + "learning_rate": 0.00019572880382605714, + "loss": 1.0199, + "step": 12830 + }, + { + "epoch": 0.18, + "grad_norm": 0.5703125, + "learning_rate": 0.00019572156195937765, + "loss": 1.0294, + "step": 12835 + }, + { + "epoch": 0.18, + "grad_norm": 0.51953125, + "learning_rate": 0.00019571431409276166, + "loss": 0.9291, + "step": 12840 + }, + { + "epoch": 0.18, + "grad_norm": 0.7109375, + "learning_rate": 0.00019570706022666343, + "loss": 0.8817, + "step": 12845 + }, + { + "epoch": 0.18, + "grad_norm": 0.53125, + "learning_rate": 0.0001956998003615377, + "loss": 0.9355, + "step": 12850 + }, + { + "epoch": 0.18, + "grad_norm": 0.515625, + "learning_rate": 0.00019569253449783943, + "loss": 0.8385, + "step": 12855 + }, + { + "epoch": 0.18, + "grad_norm": 0.60546875, + "learning_rate": 0.00019568526263602417, + "loss": 1.161, + "step": 12860 + }, + { + "epoch": 0.18, + "grad_norm": 0.578125, + "learning_rate": 0.00019567798477654767, + "loss": 1.0588, + "step": 12865 + }, + { + "epoch": 0.18, + "grad_norm": 0.5234375, + "learning_rate": 0.00019567070091986607, + "loss": 0.8892, + "step": 12870 + }, + { + "epoch": 0.18, + "grad_norm": 0.58203125, + "learning_rate": 0.00019566341106643602, + "loss": 0.9518, + "step": 12875 + }, + { + "epoch": 0.18, + "grad_norm": 0.55078125, + "learning_rate": 0.0001956561152167144, + "loss": 0.9279, + "step": 12880 + }, + { + "epoch": 0.18, + "grad_norm": 0.546875, + "learning_rate": 0.00019564881337115852, + "loss": 0.9089, + "step": 12885 + }, + { + "epoch": 0.18, + "grad_norm": 0.5234375, + "learning_rate": 0.0001956415055302261, + "loss": 0.8285, + "step": 12890 + }, + { + "epoch": 0.18, + "grad_norm": 0.6484375, + "learning_rate": 0.00019563419169437517, + "loss": 1.0355, + "step": 12895 + }, + { + "epoch": 0.19, + "grad_norm": 0.51171875, + "learning_rate": 0.00019562687186406422, + "loss": 0.9374, + "step": 12900 + }, + { + "epoch": 0.19, + "grad_norm": 0.55078125, + "learning_rate": 0.000195619546039752, + "loss": 0.9838, + "step": 12905 + }, + { + "epoch": 0.19, + "grad_norm": 0.66015625, + "learning_rate": 0.00019561221422189775, + "loss": 1.0857, + "step": 12910 + }, + { + "epoch": 0.19, + "grad_norm": 0.5078125, + "learning_rate": 0.00019560487641096102, + "loss": 1.0689, + "step": 12915 + }, + { + "epoch": 0.19, + "grad_norm": 0.5859375, + "learning_rate": 0.00019559753260740172, + "loss": 0.9646, + "step": 12920 + }, + { + "epoch": 0.19, + "grad_norm": 0.5078125, + "learning_rate": 0.00019559018281168025, + "loss": 0.9258, + "step": 12925 + }, + { + "epoch": 0.19, + "grad_norm": 0.59375, + "learning_rate": 0.00019558282702425724, + "loss": 1.0102, + "step": 12930 + }, + { + "epoch": 0.19, + "grad_norm": 0.515625, + "learning_rate": 0.00019557546524559373, + "loss": 1.1096, + "step": 12935 + }, + { + "epoch": 0.19, + "grad_norm": 0.51171875, + "learning_rate": 0.00019556809747615125, + "loss": 0.8644, + "step": 12940 + }, + { + "epoch": 0.19, + "grad_norm": 0.53515625, + "learning_rate": 0.00019556072371639156, + "loss": 0.8821, + "step": 12945 + }, + { + "epoch": 0.19, + "grad_norm": 0.48046875, + "learning_rate": 0.00019555334396677688, + "loss": 0.9682, + "step": 12950 + }, + { + "epoch": 0.19, + "grad_norm": 0.640625, + "learning_rate": 0.00019554595822776977, + "loss": 1.192, + "step": 12955 + }, + { + "epoch": 0.19, + "grad_norm": 0.5234375, + "learning_rate": 0.00019553856649983317, + "loss": 1.0012, + "step": 12960 + }, + { + "epoch": 0.19, + "grad_norm": 0.51953125, + "learning_rate": 0.00019553116878343044, + "loss": 1.0151, + "step": 12965 + }, + { + "epoch": 0.19, + "grad_norm": 0.55078125, + "learning_rate": 0.00019552376507902524, + "loss": 0.904, + "step": 12970 + }, + { + "epoch": 0.19, + "grad_norm": 0.55859375, + "learning_rate": 0.00019551635538708162, + "loss": 1.1135, + "step": 12975 + }, + { + "epoch": 0.19, + "grad_norm": 0.55859375, + "learning_rate": 0.0001955089397080641, + "loss": 0.9702, + "step": 12980 + }, + { + "epoch": 0.19, + "grad_norm": 0.73828125, + "learning_rate": 0.00019550151804243745, + "loss": 1.0448, + "step": 12985 + }, + { + "epoch": 0.19, + "grad_norm": 0.53515625, + "learning_rate": 0.00019549409039066687, + "loss": 1.125, + "step": 12990 + }, + { + "epoch": 0.19, + "grad_norm": 0.5703125, + "learning_rate": 0.00019548665675321795, + "loss": 0.9953, + "step": 12995 + }, + { + "epoch": 0.19, + "grad_norm": 0.5546875, + "learning_rate": 0.00019547921713055665, + "loss": 1.0935, + "step": 13000 + }, + { + "epoch": 0.19, + "grad_norm": 0.51953125, + "learning_rate": 0.00019547177152314928, + "loss": 0.9381, + "step": 13005 + }, + { + "epoch": 0.19, + "grad_norm": 0.53515625, + "learning_rate": 0.00019546431993146252, + "loss": 1.1262, + "step": 13010 + }, + { + "epoch": 0.19, + "grad_norm": 0.625, + "learning_rate": 0.0001954568623559635, + "loss": 0.9953, + "step": 13015 + }, + { + "epoch": 0.19, + "grad_norm": 0.66015625, + "learning_rate": 0.0001954493987971196, + "loss": 1.2148, + "step": 13020 + }, + { + "epoch": 0.19, + "grad_norm": 0.54296875, + "learning_rate": 0.00019544192925539865, + "loss": 1.1218, + "step": 13025 + }, + { + "epoch": 0.19, + "grad_norm": 0.5625, + "learning_rate": 0.0001954344537312689, + "loss": 0.9282, + "step": 13030 + }, + { + "epoch": 0.19, + "grad_norm": 0.4921875, + "learning_rate": 0.0001954269722251989, + "loss": 1.0104, + "step": 13035 + }, + { + "epoch": 0.19, + "grad_norm": 0.56640625, + "learning_rate": 0.0001954194847376576, + "loss": 0.8732, + "step": 13040 + }, + { + "epoch": 0.19, + "grad_norm": 0.51953125, + "learning_rate": 0.00019541199126911433, + "loss": 1.0397, + "step": 13045 + }, + { + "epoch": 0.19, + "grad_norm": 0.58203125, + "learning_rate": 0.00019540449182003878, + "loss": 0.9709, + "step": 13050 + }, + { + "epoch": 0.19, + "grad_norm": 0.5703125, + "learning_rate": 0.000195396986390901, + "loss": 0.9555, + "step": 13055 + }, + { + "epoch": 0.19, + "grad_norm": 0.64453125, + "learning_rate": 0.0001953894749821715, + "loss": 0.9479, + "step": 13060 + }, + { + "epoch": 0.19, + "grad_norm": 0.515625, + "learning_rate": 0.00019538195759432106, + "loss": 0.88, + "step": 13065 + }, + { + "epoch": 0.19, + "grad_norm": 0.62109375, + "learning_rate": 0.00019537443422782089, + "loss": 0.985, + "step": 13070 + }, + { + "epoch": 0.19, + "grad_norm": 0.546875, + "learning_rate": 0.00019536690488314252, + "loss": 0.8963, + "step": 13075 + }, + { + "epoch": 0.19, + "grad_norm": 0.5703125, + "learning_rate": 0.000195359369560758, + "loss": 0.9127, + "step": 13080 + }, + { + "epoch": 0.19, + "grad_norm": 0.5625, + "learning_rate": 0.00019535182826113954, + "loss": 1.0403, + "step": 13085 + }, + { + "epoch": 0.19, + "grad_norm": 0.5859375, + "learning_rate": 0.00019534428098475992, + "loss": 0.8949, + "step": 13090 + }, + { + "epoch": 0.19, + "grad_norm": 0.5703125, + "learning_rate": 0.00019533672773209217, + "loss": 0.8789, + "step": 13095 + }, + { + "epoch": 0.19, + "grad_norm": 0.5546875, + "learning_rate": 0.00019532916850360975, + "loss": 1.138, + "step": 13100 + }, + { + "epoch": 0.19, + "grad_norm": 0.55859375, + "learning_rate": 0.0001953216032997865, + "loss": 0.9711, + "step": 13105 + }, + { + "epoch": 0.19, + "grad_norm": 0.53125, + "learning_rate": 0.00019531403212109653, + "loss": 1.0461, + "step": 13110 + }, + { + "epoch": 0.19, + "grad_norm": 0.55078125, + "learning_rate": 0.0001953064549680145, + "loss": 0.8697, + "step": 13115 + }, + { + "epoch": 0.19, + "grad_norm": 0.5390625, + "learning_rate": 0.00019529887184101537, + "loss": 0.8727, + "step": 13120 + }, + { + "epoch": 0.19, + "grad_norm": 0.4921875, + "learning_rate": 0.00019529128274057436, + "loss": 0.905, + "step": 13125 + }, + { + "epoch": 0.19, + "grad_norm": 0.59375, + "learning_rate": 0.00019528368766716725, + "loss": 1.0066, + "step": 13130 + }, + { + "epoch": 0.19, + "grad_norm": 0.55078125, + "learning_rate": 0.00019527608662127009, + "loss": 0.9105, + "step": 13135 + }, + { + "epoch": 0.19, + "grad_norm": 0.5859375, + "learning_rate": 0.00019526847960335927, + "loss": 0.8907, + "step": 13140 + }, + { + "epoch": 0.19, + "grad_norm": 0.54296875, + "learning_rate": 0.00019526086661391168, + "loss": 1.1167, + "step": 13145 + }, + { + "epoch": 0.19, + "grad_norm": 0.56640625, + "learning_rate": 0.00019525324765340445, + "loss": 0.9489, + "step": 13150 + }, + { + "epoch": 0.19, + "grad_norm": 0.546875, + "learning_rate": 0.00019524562272231518, + "loss": 1.1006, + "step": 13155 + }, + { + "epoch": 0.19, + "grad_norm": 0.6484375, + "learning_rate": 0.00019523799182112183, + "loss": 1.1097, + "step": 13160 + }, + { + "epoch": 0.19, + "grad_norm": 0.54296875, + "learning_rate": 0.00019523035495030267, + "loss": 0.9017, + "step": 13165 + }, + { + "epoch": 0.19, + "grad_norm": 0.546875, + "learning_rate": 0.0001952227121103364, + "loss": 0.8943, + "step": 13170 + }, + { + "epoch": 0.19, + "grad_norm": 0.5390625, + "learning_rate": 0.00019521506330170206, + "loss": 1.0787, + "step": 13175 + }, + { + "epoch": 0.19, + "grad_norm": 0.51953125, + "learning_rate": 0.00019520740852487916, + "loss": 0.9864, + "step": 13180 + }, + { + "epoch": 0.19, + "grad_norm": 0.53515625, + "learning_rate": 0.00019519974778034742, + "loss": 1.0124, + "step": 13185 + }, + { + "epoch": 0.19, + "grad_norm": 0.58984375, + "learning_rate": 0.00019519208106858708, + "loss": 1.1291, + "step": 13190 + }, + { + "epoch": 0.19, + "grad_norm": 0.72265625, + "learning_rate": 0.0001951844083900787, + "loss": 1.0683, + "step": 13195 + }, + { + "epoch": 0.19, + "grad_norm": 0.5234375, + "learning_rate": 0.0001951767297453032, + "loss": 0.9281, + "step": 13200 + }, + { + "epoch": 0.19, + "grad_norm": 0.59375, + "learning_rate": 0.00019516904513474187, + "loss": 1.0935, + "step": 13205 + }, + { + "epoch": 0.19, + "grad_norm": 0.55859375, + "learning_rate": 0.00019516135455887638, + "loss": 0.951, + "step": 13210 + }, + { + "epoch": 0.19, + "grad_norm": 0.546875, + "learning_rate": 0.00019515365801818884, + "loss": 0.9578, + "step": 13215 + }, + { + "epoch": 0.19, + "grad_norm": 0.55859375, + "learning_rate": 0.00019514595551316162, + "loss": 1.0304, + "step": 13220 + }, + { + "epoch": 0.19, + "grad_norm": 0.5703125, + "learning_rate": 0.00019513824704427756, + "loss": 0.8505, + "step": 13225 + }, + { + "epoch": 0.19, + "grad_norm": 0.50390625, + "learning_rate": 0.00019513053261201982, + "loss": 0.9522, + "step": 13230 + }, + { + "epoch": 0.19, + "grad_norm": 0.52734375, + "learning_rate": 0.00019512281221687193, + "loss": 0.9674, + "step": 13235 + }, + { + "epoch": 0.19, + "grad_norm": 0.56640625, + "learning_rate": 0.00019511508585931786, + "loss": 1.0524, + "step": 13240 + }, + { + "epoch": 0.19, + "grad_norm": 0.53125, + "learning_rate": 0.00019510735353984185, + "loss": 0.874, + "step": 13245 + }, + { + "epoch": 0.19, + "grad_norm": 0.546875, + "learning_rate": 0.00019509961525892867, + "loss": 1.0424, + "step": 13250 + }, + { + "epoch": 0.19, + "grad_norm": 0.51953125, + "learning_rate": 0.00019509187101706324, + "loss": 0.7539, + "step": 13255 + }, + { + "epoch": 0.19, + "grad_norm": 0.609375, + "learning_rate": 0.00019508412081473104, + "loss": 0.9879, + "step": 13260 + }, + { + "epoch": 0.19, + "grad_norm": 0.62109375, + "learning_rate": 0.00019507636465241785, + "loss": 1.2274, + "step": 13265 + }, + { + "epoch": 0.19, + "grad_norm": 0.56640625, + "learning_rate": 0.0001950686025306099, + "loss": 0.9844, + "step": 13270 + }, + { + "epoch": 0.19, + "grad_norm": 0.55859375, + "learning_rate": 0.0001950608344497936, + "loss": 1.0499, + "step": 13275 + }, + { + "epoch": 0.19, + "grad_norm": 0.58984375, + "learning_rate": 0.00019505306041045595, + "loss": 0.9718, + "step": 13280 + }, + { + "epoch": 0.19, + "grad_norm": 0.51953125, + "learning_rate": 0.00019504528041308422, + "loss": 1.0062, + "step": 13285 + }, + { + "epoch": 0.19, + "grad_norm": 0.55078125, + "learning_rate": 0.00019503749445816607, + "loss": 0.9244, + "step": 13290 + }, + { + "epoch": 0.19, + "grad_norm": 0.53125, + "learning_rate": 0.00019502970254618954, + "loss": 0.875, + "step": 13295 + }, + { + "epoch": 0.19, + "grad_norm": 0.625, + "learning_rate": 0.000195021904677643, + "loss": 0.9849, + "step": 13300 + }, + { + "epoch": 0.19, + "grad_norm": 0.58984375, + "learning_rate": 0.00019501410085301526, + "loss": 0.9765, + "step": 13305 + }, + { + "epoch": 0.19, + "grad_norm": 0.54296875, + "learning_rate": 0.0001950062910727955, + "loss": 0.8658, + "step": 13310 + }, + { + "epoch": 0.19, + "grad_norm": 0.515625, + "learning_rate": 0.00019499847533747315, + "loss": 0.8656, + "step": 13315 + }, + { + "epoch": 0.19, + "grad_norm": 0.60546875, + "learning_rate": 0.00019499065364753822, + "loss": 0.9422, + "step": 13320 + }, + { + "epoch": 0.19, + "grad_norm": 0.578125, + "learning_rate": 0.0001949828260034809, + "loss": 1.0623, + "step": 13325 + }, + { + "epoch": 0.19, + "grad_norm": 0.59375, + "learning_rate": 0.0001949749924057919, + "loss": 1.1, + "step": 13330 + }, + { + "epoch": 0.19, + "grad_norm": 0.58203125, + "learning_rate": 0.0001949671528549622, + "loss": 0.8578, + "step": 13335 + }, + { + "epoch": 0.19, + "grad_norm": 0.578125, + "learning_rate": 0.0001949593073514832, + "loss": 0.9376, + "step": 13340 + }, + { + "epoch": 0.19, + "grad_norm": 0.546875, + "learning_rate": 0.00019495145589584664, + "loss": 0.9657, + "step": 13345 + }, + { + "epoch": 0.19, + "grad_norm": 0.6171875, + "learning_rate": 0.00019494359848854474, + "loss": 1.0854, + "step": 13350 + }, + { + "epoch": 0.19, + "grad_norm": 0.6171875, + "learning_rate": 0.00019493573513006994, + "loss": 0.8838, + "step": 13355 + }, + { + "epoch": 0.19, + "grad_norm": 0.50390625, + "learning_rate": 0.00019492786582091508, + "loss": 0.9277, + "step": 13360 + }, + { + "epoch": 0.19, + "grad_norm": 0.53125, + "learning_rate": 0.00019491999056157352, + "loss": 0.9805, + "step": 13365 + }, + { + "epoch": 0.19, + "grad_norm": 0.52734375, + "learning_rate": 0.00019491210935253887, + "loss": 1.0989, + "step": 13370 + }, + { + "epoch": 0.19, + "grad_norm": 0.59375, + "learning_rate": 0.00019490422219430506, + "loss": 1.0407, + "step": 13375 + }, + { + "epoch": 0.19, + "grad_norm": 0.55859375, + "learning_rate": 0.00019489632908736658, + "loss": 0.9724, + "step": 13380 + }, + { + "epoch": 0.19, + "grad_norm": 0.60546875, + "learning_rate": 0.00019488843003221805, + "loss": 0.9384, + "step": 13385 + }, + { + "epoch": 0.19, + "grad_norm": 0.6015625, + "learning_rate": 0.00019488052502935469, + "loss": 1.0806, + "step": 13390 + }, + { + "epoch": 0.19, + "grad_norm": 0.625, + "learning_rate": 0.00019487261407927196, + "loss": 0.9685, + "step": 13395 + }, + { + "epoch": 0.19, + "grad_norm": 0.578125, + "learning_rate": 0.00019486469718246573, + "loss": 0.936, + "step": 13400 + }, + { + "epoch": 0.19, + "grad_norm": 0.55078125, + "learning_rate": 0.0001948567743394322, + "loss": 0.9386, + "step": 13405 + }, + { + "epoch": 0.19, + "grad_norm": 0.5234375, + "learning_rate": 0.00019484884555066807, + "loss": 0.9172, + "step": 13410 + }, + { + "epoch": 0.19, + "grad_norm": 0.6171875, + "learning_rate": 0.00019484091081667027, + "loss": 1.1164, + "step": 13415 + }, + { + "epoch": 0.19, + "grad_norm": 0.5546875, + "learning_rate": 0.00019483297013793617, + "loss": 1.0413, + "step": 13420 + }, + { + "epoch": 0.19, + "grad_norm": 0.60546875, + "learning_rate": 0.00019482502351496345, + "loss": 1.0837, + "step": 13425 + }, + { + "epoch": 0.19, + "grad_norm": 0.7109375, + "learning_rate": 0.0001948170709482503, + "loss": 0.9828, + "step": 13430 + }, + { + "epoch": 0.19, + "grad_norm": 0.58984375, + "learning_rate": 0.00019480911243829514, + "loss": 1.0261, + "step": 13435 + }, + { + "epoch": 0.19, + "grad_norm": 0.578125, + "learning_rate": 0.00019480114798559685, + "loss": 1.0415, + "step": 13440 + }, + { + "epoch": 0.19, + "grad_norm": 0.5625, + "learning_rate": 0.00019479317759065463, + "loss": 0.8573, + "step": 13445 + }, + { + "epoch": 0.19, + "grad_norm": 0.52734375, + "learning_rate": 0.00019478520125396805, + "loss": 0.9587, + "step": 13450 + }, + { + "epoch": 0.19, + "grad_norm": 0.53515625, + "learning_rate": 0.00019477721897603715, + "loss": 1.146, + "step": 13455 + }, + { + "epoch": 0.19, + "grad_norm": 0.54296875, + "learning_rate": 0.00019476923075736218, + "loss": 0.9267, + "step": 13460 + }, + { + "epoch": 0.19, + "grad_norm": 0.578125, + "learning_rate": 0.0001947612365984439, + "loss": 0.979, + "step": 13465 + }, + { + "epoch": 0.19, + "grad_norm": 0.5234375, + "learning_rate": 0.00019475323649978344, + "loss": 0.9616, + "step": 13470 + }, + { + "epoch": 0.19, + "grad_norm": 0.578125, + "learning_rate": 0.00019474523046188216, + "loss": 0.9822, + "step": 13475 + }, + { + "epoch": 0.19, + "grad_norm": 0.53515625, + "learning_rate": 0.00019473721848524195, + "loss": 0.897, + "step": 13480 + }, + { + "epoch": 0.19, + "grad_norm": 0.5859375, + "learning_rate": 0.00019472920057036493, + "loss": 0.9629, + "step": 13485 + }, + { + "epoch": 0.19, + "grad_norm": 0.6796875, + "learning_rate": 0.0001947211767177538, + "loss": 1.0021, + "step": 13490 + }, + { + "epoch": 0.19, + "grad_norm": 0.57421875, + "learning_rate": 0.00019471314692791138, + "loss": 1.0638, + "step": 13495 + }, + { + "epoch": 0.19, + "grad_norm": 0.5546875, + "learning_rate": 0.00019470511120134106, + "loss": 0.9434, + "step": 13500 + }, + { + "epoch": 0.19, + "grad_norm": 0.66796875, + "learning_rate": 0.00019469706953854652, + "loss": 1.125, + "step": 13505 + }, + { + "epoch": 0.19, + "grad_norm": 0.498046875, + "learning_rate": 0.0001946890219400318, + "loss": 0.7527, + "step": 13510 + }, + { + "epoch": 0.19, + "grad_norm": 0.66015625, + "learning_rate": 0.00019468096840630136, + "loss": 0.971, + "step": 13515 + }, + { + "epoch": 0.19, + "grad_norm": 0.5703125, + "learning_rate": 0.00019467290893785996, + "loss": 0.9305, + "step": 13520 + }, + { + "epoch": 0.19, + "grad_norm": 0.5625, + "learning_rate": 0.00019466484353521284, + "loss": 0.9969, + "step": 13525 + }, + { + "epoch": 0.19, + "grad_norm": 0.484375, + "learning_rate": 0.00019465677219886548, + "loss": 0.8256, + "step": 13530 + }, + { + "epoch": 0.19, + "grad_norm": 0.66015625, + "learning_rate": 0.00019464869492932384, + "loss": 1.0183, + "step": 13535 + }, + { + "epoch": 0.19, + "grad_norm": 0.61328125, + "learning_rate": 0.0001946406117270942, + "loss": 0.9527, + "step": 13540 + }, + { + "epoch": 0.19, + "grad_norm": 0.515625, + "learning_rate": 0.00019463252259268327, + "loss": 0.9422, + "step": 13545 + }, + { + "epoch": 0.19, + "grad_norm": 0.5078125, + "learning_rate": 0.000194624427526598, + "loss": 1.123, + "step": 13550 + }, + { + "epoch": 0.19, + "grad_norm": 0.55078125, + "learning_rate": 0.00019461632652934587, + "loss": 0.9995, + "step": 13555 + }, + { + "epoch": 0.19, + "grad_norm": 0.51171875, + "learning_rate": 0.00019460821960143462, + "loss": 0.9578, + "step": 13560 + }, + { + "epoch": 0.19, + "grad_norm": 0.66015625, + "learning_rate": 0.00019460010674337243, + "loss": 0.9716, + "step": 13565 + }, + { + "epoch": 0.19, + "grad_norm": 0.76171875, + "learning_rate": 0.00019459198795566783, + "loss": 1.0666, + "step": 13570 + }, + { + "epoch": 0.19, + "grad_norm": 0.6015625, + "learning_rate": 0.00019458386323882966, + "loss": 0.9313, + "step": 13575 + }, + { + "epoch": 0.19, + "grad_norm": 0.6015625, + "learning_rate": 0.0001945757325933672, + "loss": 0.8971, + "step": 13580 + }, + { + "epoch": 0.19, + "grad_norm": 0.5234375, + "learning_rate": 0.00019456759601979014, + "loss": 0.9537, + "step": 13585 + }, + { + "epoch": 0.19, + "grad_norm": 0.67578125, + "learning_rate": 0.00019455945351860848, + "loss": 0.9548, + "step": 13590 + }, + { + "epoch": 0.2, + "grad_norm": 0.5625, + "learning_rate": 0.00019455130509033253, + "loss": 1.0053, + "step": 13595 + }, + { + "epoch": 0.2, + "grad_norm": 0.59765625, + "learning_rate": 0.00019454315073547313, + "loss": 1.0101, + "step": 13600 + }, + { + "epoch": 0.2, + "grad_norm": 0.52734375, + "learning_rate": 0.00019453499045454136, + "loss": 0.9598, + "step": 13605 + }, + { + "epoch": 0.2, + "grad_norm": 0.50390625, + "learning_rate": 0.00019452682424804868, + "loss": 0.8437, + "step": 13610 + }, + { + "epoch": 0.2, + "grad_norm": 0.56640625, + "learning_rate": 0.00019451865211650703, + "loss": 0.8778, + "step": 13615 + }, + { + "epoch": 0.2, + "grad_norm": 0.5859375, + "learning_rate": 0.00019451047406042864, + "loss": 0.9147, + "step": 13620 + }, + { + "epoch": 0.2, + "grad_norm": 0.63671875, + "learning_rate": 0.00019450229008032607, + "loss": 1.0566, + "step": 13625 + }, + { + "epoch": 0.2, + "grad_norm": 0.58203125, + "learning_rate": 0.00019449410017671236, + "loss": 1.0564, + "step": 13630 + }, + { + "epoch": 0.2, + "grad_norm": 0.59765625, + "learning_rate": 0.0001944859043501008, + "loss": 1.0702, + "step": 13635 + }, + { + "epoch": 0.2, + "grad_norm": 0.52734375, + "learning_rate": 0.00019447770260100513, + "loss": 0.8292, + "step": 13640 + }, + { + "epoch": 0.2, + "grad_norm": 0.53515625, + "learning_rate": 0.0001944694949299395, + "loss": 0.9116, + "step": 13645 + }, + { + "epoch": 0.2, + "grad_norm": 0.5546875, + "learning_rate": 0.0001944612813374183, + "loss": 0.8663, + "step": 13650 + }, + { + "epoch": 0.2, + "grad_norm": 0.5625, + "learning_rate": 0.00019445306182395644, + "loss": 1.0415, + "step": 13655 + }, + { + "epoch": 0.2, + "grad_norm": 0.53515625, + "learning_rate": 0.00019444483639006904, + "loss": 0.8293, + "step": 13660 + }, + { + "epoch": 0.2, + "grad_norm": 0.52734375, + "learning_rate": 0.0001944366050362718, + "loss": 0.9584, + "step": 13665 + }, + { + "epoch": 0.2, + "grad_norm": 0.80859375, + "learning_rate": 0.00019442836776308056, + "loss": 1.0026, + "step": 13670 + }, + { + "epoch": 0.2, + "grad_norm": 0.5234375, + "learning_rate": 0.00019442012457101168, + "loss": 0.8867, + "step": 13675 + }, + { + "epoch": 0.2, + "grad_norm": 0.578125, + "learning_rate": 0.00019441187546058187, + "loss": 0.9783, + "step": 13680 + }, + { + "epoch": 0.2, + "grad_norm": 0.609375, + "learning_rate": 0.00019440362043230818, + "loss": 0.871, + "step": 13685 + }, + { + "epoch": 0.2, + "grad_norm": 0.56640625, + "learning_rate": 0.00019439535948670805, + "loss": 0.9344, + "step": 13690 + }, + { + "epoch": 0.2, + "grad_norm": 0.640625, + "learning_rate": 0.00019438709262429926, + "loss": 1.0194, + "step": 13695 + }, + { + "epoch": 0.2, + "grad_norm": 0.48046875, + "learning_rate": 0.00019437881984560003, + "loss": 0.9557, + "step": 13700 + }, + { + "epoch": 0.2, + "grad_norm": 0.55078125, + "learning_rate": 0.00019437054115112887, + "loss": 0.9423, + "step": 13705 + }, + { + "epoch": 0.2, + "grad_norm": 0.5859375, + "learning_rate": 0.00019436225654140472, + "loss": 1.0194, + "step": 13710 + }, + { + "epoch": 0.2, + "grad_norm": 0.609375, + "learning_rate": 0.00019435396601694686, + "loss": 1.0522, + "step": 13715 + }, + { + "epoch": 0.2, + "grad_norm": 0.59375, + "learning_rate": 0.00019434566957827492, + "loss": 0.9833, + "step": 13720 + }, + { + "epoch": 0.2, + "grad_norm": 0.55859375, + "learning_rate": 0.000194337367225909, + "loss": 0.8905, + "step": 13725 + }, + { + "epoch": 0.2, + "grad_norm": 0.5625, + "learning_rate": 0.00019432905896036944, + "loss": 1.0386, + "step": 13730 + }, + { + "epoch": 0.2, + "grad_norm": 0.69140625, + "learning_rate": 0.000194320744782177, + "loss": 1.0965, + "step": 13735 + }, + { + "epoch": 0.2, + "grad_norm": 0.57421875, + "learning_rate": 0.00019431242469185293, + "loss": 0.9553, + "step": 13740 + }, + { + "epoch": 0.2, + "grad_norm": 0.56640625, + "learning_rate": 0.00019430409868991864, + "loss": 0.9008, + "step": 13745 + }, + { + "epoch": 0.2, + "grad_norm": 0.5703125, + "learning_rate": 0.000194295766776896, + "loss": 0.9997, + "step": 13750 + }, + { + "epoch": 0.2, + "grad_norm": 0.5625, + "learning_rate": 0.00019428742895330733, + "loss": 0.9672, + "step": 13755 + }, + { + "epoch": 0.2, + "grad_norm": 0.60546875, + "learning_rate": 0.00019427908521967523, + "loss": 0.9669, + "step": 13760 + }, + { + "epoch": 0.2, + "grad_norm": 0.609375, + "learning_rate": 0.0001942707355765227, + "loss": 1.1381, + "step": 13765 + }, + { + "epoch": 0.2, + "grad_norm": 0.5625, + "learning_rate": 0.0001942623800243731, + "loss": 1.1075, + "step": 13770 + }, + { + "epoch": 0.2, + "grad_norm": 0.59765625, + "learning_rate": 0.00019425401856375016, + "loss": 1.0386, + "step": 13775 + }, + { + "epoch": 0.2, + "grad_norm": 0.5390625, + "learning_rate": 0.000194245651195178, + "loss": 0.9591, + "step": 13780 + }, + { + "epoch": 0.2, + "grad_norm": 0.63671875, + "learning_rate": 0.0001942372779191811, + "loss": 0.9495, + "step": 13785 + }, + { + "epoch": 0.2, + "grad_norm": 0.5546875, + "learning_rate": 0.00019422889873628424, + "loss": 0.8961, + "step": 13790 + }, + { + "epoch": 0.2, + "grad_norm": 0.65234375, + "learning_rate": 0.00019422051364701272, + "loss": 0.9591, + "step": 13795 + }, + { + "epoch": 0.2, + "grad_norm": 0.5703125, + "learning_rate": 0.0001942121226518921, + "loss": 0.887, + "step": 13800 + }, + { + "epoch": 0.2, + "grad_norm": 0.546875, + "learning_rate": 0.00019420372575144833, + "loss": 1.0454, + "step": 13805 + }, + { + "epoch": 0.2, + "grad_norm": 0.54296875, + "learning_rate": 0.00019419532294620773, + "loss": 0.9947, + "step": 13810 + }, + { + "epoch": 0.2, + "grad_norm": 0.58203125, + "learning_rate": 0.00019418691423669704, + "loss": 0.9633, + "step": 13815 + }, + { + "epoch": 0.2, + "grad_norm": 0.55859375, + "learning_rate": 0.00019417849962344327, + "loss": 1.0141, + "step": 13820 + }, + { + "epoch": 0.2, + "grad_norm": 0.5, + "learning_rate": 0.00019417007910697387, + "loss": 0.9378, + "step": 13825 + }, + { + "epoch": 0.2, + "grad_norm": 0.50390625, + "learning_rate": 0.00019416165268781668, + "loss": 0.9427, + "step": 13830 + }, + { + "epoch": 0.2, + "grad_norm": 0.53515625, + "learning_rate": 0.00019415322036649986, + "loss": 0.8609, + "step": 13835 + }, + { + "epoch": 0.2, + "grad_norm": 0.5859375, + "learning_rate": 0.00019414478214355193, + "loss": 0.8503, + "step": 13840 + }, + { + "epoch": 0.2, + "grad_norm": 0.59765625, + "learning_rate": 0.00019413633801950187, + "loss": 1.0469, + "step": 13845 + }, + { + "epoch": 0.2, + "grad_norm": 0.62890625, + "learning_rate": 0.0001941278879948789, + "loss": 1.0054, + "step": 13850 + }, + { + "epoch": 0.2, + "grad_norm": 0.5703125, + "learning_rate": 0.00019411943207021275, + "loss": 1.0992, + "step": 13855 + }, + { + "epoch": 0.2, + "grad_norm": 0.53515625, + "learning_rate": 0.0001941109702460334, + "loss": 1.0576, + "step": 13860 + }, + { + "epoch": 0.2, + "grad_norm": 0.50390625, + "learning_rate": 0.0001941025025228712, + "loss": 0.9899, + "step": 13865 + }, + { + "epoch": 0.2, + "grad_norm": 0.55859375, + "learning_rate": 0.00019409402890125697, + "loss": 1.023, + "step": 13870 + }, + { + "epoch": 0.2, + "grad_norm": 0.5546875, + "learning_rate": 0.0001940855493817219, + "loss": 1.0024, + "step": 13875 + }, + { + "epoch": 0.2, + "grad_norm": 0.5234375, + "learning_rate": 0.0001940770639647974, + "loss": 0.9097, + "step": 13880 + }, + { + "epoch": 0.2, + "grad_norm": 0.55078125, + "learning_rate": 0.0001940685726510154, + "loss": 0.9621, + "step": 13885 + }, + { + "epoch": 0.2, + "grad_norm": 0.53125, + "learning_rate": 0.00019406007544090813, + "loss": 0.9476, + "step": 13890 + }, + { + "epoch": 0.2, + "grad_norm": 0.6171875, + "learning_rate": 0.0001940515723350082, + "loss": 0.957, + "step": 13895 + }, + { + "epoch": 0.2, + "grad_norm": 0.5546875, + "learning_rate": 0.00019404306333384864, + "loss": 1.1225, + "step": 13900 + }, + { + "epoch": 0.2, + "grad_norm": 0.640625, + "learning_rate": 0.0001940345484379627, + "loss": 0.9353, + "step": 13905 + }, + { + "epoch": 0.2, + "grad_norm": 0.6328125, + "learning_rate": 0.0001940260276478842, + "loss": 0.9559, + "step": 13910 + }, + { + "epoch": 0.2, + "grad_norm": 0.5078125, + "learning_rate": 0.00019401750096414722, + "loss": 1.0461, + "step": 13915 + }, + { + "epoch": 0.2, + "grad_norm": 0.578125, + "learning_rate": 0.00019400896838728618, + "loss": 1.0111, + "step": 13920 + }, + { + "epoch": 0.2, + "grad_norm": 0.578125, + "learning_rate": 0.00019400042991783592, + "loss": 0.9265, + "step": 13925 + }, + { + "epoch": 0.2, + "grad_norm": 0.70703125, + "learning_rate": 0.00019399188555633168, + "loss": 1.1099, + "step": 13930 + }, + { + "epoch": 0.2, + "grad_norm": 0.53515625, + "learning_rate": 0.000193983335303309, + "loss": 1.0102, + "step": 13935 + }, + { + "epoch": 0.2, + "grad_norm": 0.6796875, + "learning_rate": 0.0001939747791593038, + "loss": 0.9348, + "step": 13940 + }, + { + "epoch": 0.2, + "grad_norm": 0.609375, + "learning_rate": 0.00019396621712485245, + "loss": 0.846, + "step": 13945 + }, + { + "epoch": 0.2, + "grad_norm": 0.59375, + "learning_rate": 0.00019395764920049157, + "loss": 0.994, + "step": 13950 + }, + { + "epoch": 0.2, + "grad_norm": 0.609375, + "learning_rate": 0.00019394907538675823, + "loss": 1.009, + "step": 13955 + }, + { + "epoch": 0.2, + "grad_norm": 0.54296875, + "learning_rate": 0.00019394049568418985, + "loss": 0.9882, + "step": 13960 + }, + { + "epoch": 0.2, + "grad_norm": 0.50390625, + "learning_rate": 0.0001939319100933242, + "loss": 0.983, + "step": 13965 + }, + { + "epoch": 0.2, + "grad_norm": 0.5390625, + "learning_rate": 0.0001939233186146995, + "loss": 0.9918, + "step": 13970 + }, + { + "epoch": 0.2, + "grad_norm": 0.5703125, + "learning_rate": 0.00019391472124885418, + "loss": 0.9375, + "step": 13975 + }, + { + "epoch": 0.2, + "grad_norm": 0.57421875, + "learning_rate": 0.00019390611799632716, + "loss": 0.9159, + "step": 13980 + }, + { + "epoch": 0.2, + "grad_norm": 0.6328125, + "learning_rate": 0.00019389750885765773, + "loss": 1.0269, + "step": 13985 + }, + { + "epoch": 0.2, + "grad_norm": 0.5859375, + "learning_rate": 0.00019388889383338548, + "loss": 0.9451, + "step": 13990 + }, + { + "epoch": 0.2, + "grad_norm": 0.60546875, + "learning_rate": 0.00019388027292405045, + "loss": 1.1171, + "step": 13995 + }, + { + "epoch": 0.2, + "grad_norm": 0.498046875, + "learning_rate": 0.000193871646130193, + "loss": 0.9076, + "step": 14000 + }, + { + "epoch": 0.2, + "grad_norm": 0.53125, + "learning_rate": 0.00019386301345235385, + "loss": 0.9822, + "step": 14005 + }, + { + "epoch": 0.2, + "grad_norm": 0.54296875, + "learning_rate": 0.0001938543748910741, + "loss": 1.1278, + "step": 14010 + }, + { + "epoch": 0.2, + "grad_norm": 0.73046875, + "learning_rate": 0.0001938457304468953, + "loss": 1.0708, + "step": 14015 + }, + { + "epoch": 0.2, + "grad_norm": 0.625, + "learning_rate": 0.0001938370801203592, + "loss": 0.9955, + "step": 14020 + }, + { + "epoch": 0.2, + "grad_norm": 0.51953125, + "learning_rate": 0.000193828423912008, + "loss": 0.8778, + "step": 14025 + }, + { + "epoch": 0.2, + "grad_norm": 0.61328125, + "learning_rate": 0.0001938197618223844, + "loss": 0.9365, + "step": 14030 + }, + { + "epoch": 0.2, + "grad_norm": 0.55078125, + "learning_rate": 0.00019381109385203124, + "loss": 0.9629, + "step": 14035 + }, + { + "epoch": 0.2, + "grad_norm": 0.640625, + "learning_rate": 0.0001938024200014919, + "loss": 0.9758, + "step": 14040 + }, + { + "epoch": 0.2, + "grad_norm": 0.48828125, + "learning_rate": 0.00019379374027131, + "loss": 0.9081, + "step": 14045 + }, + { + "epoch": 0.2, + "grad_norm": 0.64453125, + "learning_rate": 0.00019378505466202968, + "loss": 1.1199, + "step": 14050 + }, + { + "epoch": 0.2, + "grad_norm": 0.57421875, + "learning_rate": 0.0001937763631741953, + "loss": 0.9182, + "step": 14055 + }, + { + "epoch": 0.2, + "grad_norm": 0.55859375, + "learning_rate": 0.0001937676658083517, + "loss": 0.9741, + "step": 14060 + }, + { + "epoch": 0.2, + "grad_norm": 0.59765625, + "learning_rate": 0.00019375896256504399, + "loss": 0.9724, + "step": 14065 + }, + { + "epoch": 0.2, + "grad_norm": 0.51953125, + "learning_rate": 0.00019375025344481772, + "loss": 1.0913, + "step": 14070 + }, + { + "epoch": 0.2, + "grad_norm": 0.53515625, + "learning_rate": 0.0001937415384482188, + "loss": 1.0246, + "step": 14075 + }, + { + "epoch": 0.2, + "grad_norm": 0.61328125, + "learning_rate": 0.00019373281757579348, + "loss": 1.2072, + "step": 14080 + }, + { + "epoch": 0.2, + "grad_norm": 0.61328125, + "learning_rate": 0.0001937240908280884, + "loss": 1.1545, + "step": 14085 + }, + { + "epoch": 0.2, + "grad_norm": 0.63671875, + "learning_rate": 0.00019371535820565056, + "loss": 1.0993, + "step": 14090 + }, + { + "epoch": 0.2, + "grad_norm": 0.55078125, + "learning_rate": 0.00019370661970902737, + "loss": 1.0449, + "step": 14095 + }, + { + "epoch": 0.2, + "grad_norm": 0.62109375, + "learning_rate": 0.0001936978753387665, + "loss": 1.0995, + "step": 14100 + }, + { + "epoch": 0.2, + "grad_norm": 0.51953125, + "learning_rate": 0.0001936891250954161, + "loss": 0.9807, + "step": 14105 + }, + { + "epoch": 0.2, + "grad_norm": 0.609375, + "learning_rate": 0.00019368036897952464, + "loss": 1.0008, + "step": 14110 + }, + { + "epoch": 0.2, + "grad_norm": 0.59765625, + "learning_rate": 0.00019367160699164097, + "loss": 0.99, + "step": 14115 + }, + { + "epoch": 0.2, + "grad_norm": 0.59375, + "learning_rate": 0.00019366283913231427, + "loss": 1.0677, + "step": 14120 + }, + { + "epoch": 0.2, + "grad_norm": 0.80078125, + "learning_rate": 0.00019365406540209414, + "loss": 1.0974, + "step": 14125 + }, + { + "epoch": 0.2, + "grad_norm": 0.5859375, + "learning_rate": 0.00019364528580153055, + "loss": 0.7966, + "step": 14130 + }, + { + "epoch": 0.2, + "grad_norm": 0.59765625, + "learning_rate": 0.00019363650033117375, + "loss": 0.8748, + "step": 14135 + }, + { + "epoch": 0.2, + "grad_norm": 0.62109375, + "learning_rate": 0.00019362770899157452, + "loss": 0.9802, + "step": 14140 + }, + { + "epoch": 0.2, + "grad_norm": 0.5546875, + "learning_rate": 0.00019361891178328383, + "loss": 1.0155, + "step": 14145 + }, + { + "epoch": 0.2, + "grad_norm": 0.62890625, + "learning_rate": 0.00019361010870685313, + "loss": 1.0516, + "step": 14150 + }, + { + "epoch": 0.2, + "grad_norm": 0.53515625, + "learning_rate": 0.0001936012997628342, + "loss": 0.9472, + "step": 14155 + }, + { + "epoch": 0.2, + "grad_norm": 0.5859375, + "learning_rate": 0.0001935924849517792, + "loss": 0.8923, + "step": 14160 + }, + { + "epoch": 0.2, + "grad_norm": 0.55078125, + "learning_rate": 0.00019358366427424064, + "loss": 0.9306, + "step": 14165 + }, + { + "epoch": 0.2, + "grad_norm": 0.498046875, + "learning_rate": 0.00019357483773077143, + "loss": 0.9439, + "step": 14170 + }, + { + "epoch": 0.2, + "grad_norm": 0.6171875, + "learning_rate": 0.0001935660053219248, + "loss": 0.9135, + "step": 14175 + }, + { + "epoch": 0.2, + "grad_norm": 0.5, + "learning_rate": 0.00019355716704825442, + "loss": 0.9764, + "step": 14180 + }, + { + "epoch": 0.2, + "grad_norm": 0.55078125, + "learning_rate": 0.00019354832291031425, + "loss": 0.8676, + "step": 14185 + }, + { + "epoch": 0.2, + "grad_norm": 0.546875, + "learning_rate": 0.0001935394729086587, + "loss": 1.0203, + "step": 14190 + }, + { + "epoch": 0.2, + "grad_norm": 0.55078125, + "learning_rate": 0.00019353061704384237, + "loss": 0.8991, + "step": 14195 + }, + { + "epoch": 0.2, + "grad_norm": 0.57421875, + "learning_rate": 0.00019352175531642049, + "loss": 0.933, + "step": 14200 + }, + { + "epoch": 0.2, + "grad_norm": 0.5390625, + "learning_rate": 0.00019351288772694847, + "loss": 0.9262, + "step": 14205 + }, + { + "epoch": 0.2, + "grad_norm": 0.498046875, + "learning_rate": 0.00019350401427598214, + "loss": 0.9976, + "step": 14210 + }, + { + "epoch": 0.2, + "grad_norm": 0.5, + "learning_rate": 0.00019349513496407772, + "loss": 0.8633, + "step": 14215 + }, + { + "epoch": 0.2, + "grad_norm": 0.51953125, + "learning_rate": 0.00019348624979179173, + "loss": 1.0477, + "step": 14220 + }, + { + "epoch": 0.2, + "grad_norm": 0.6328125, + "learning_rate": 0.00019347735875968115, + "loss": 1.0083, + "step": 14225 + }, + { + "epoch": 0.2, + "grad_norm": 0.5078125, + "learning_rate": 0.00019346846186830326, + "loss": 0.9553, + "step": 14230 + }, + { + "epoch": 0.2, + "grad_norm": 0.59375, + "learning_rate": 0.00019345955911821572, + "loss": 0.9713, + "step": 14235 + }, + { + "epoch": 0.2, + "grad_norm": 0.51953125, + "learning_rate": 0.00019345065050997657, + "loss": 0.9801, + "step": 14240 + }, + { + "epoch": 0.2, + "grad_norm": 0.6953125, + "learning_rate": 0.0001934417360441442, + "loss": 1.0721, + "step": 14245 + }, + { + "epoch": 0.2, + "grad_norm": 0.55859375, + "learning_rate": 0.00019343281572127742, + "loss": 1.081, + "step": 14250 + }, + { + "epoch": 0.2, + "grad_norm": 0.56640625, + "learning_rate": 0.00019342388954193536, + "loss": 0.8543, + "step": 14255 + }, + { + "epoch": 0.2, + "grad_norm": 0.55859375, + "learning_rate": 0.00019341495750667748, + "loss": 0.878, + "step": 14260 + }, + { + "epoch": 0.2, + "grad_norm": 0.61328125, + "learning_rate": 0.00019340601961606365, + "loss": 1.0285, + "step": 14265 + }, + { + "epoch": 0.2, + "grad_norm": 0.5390625, + "learning_rate": 0.00019339707587065415, + "loss": 0.9378, + "step": 14270 + }, + { + "epoch": 0.2, + "grad_norm": 0.57421875, + "learning_rate": 0.00019338812627100958, + "loss": 1.1077, + "step": 14275 + }, + { + "epoch": 0.2, + "grad_norm": 0.51171875, + "learning_rate": 0.0001933791708176909, + "loss": 1.0227, + "step": 14280 + }, + { + "epoch": 0.2, + "grad_norm": 0.6171875, + "learning_rate": 0.00019337020951125942, + "loss": 1.2616, + "step": 14285 + }, + { + "epoch": 0.2, + "grad_norm": 0.56640625, + "learning_rate": 0.00019336124235227686, + "loss": 0.9189, + "step": 14290 + }, + { + "epoch": 0.21, + "grad_norm": 0.5, + "learning_rate": 0.0001933522693413053, + "loss": 0.9835, + "step": 14295 + }, + { + "epoch": 0.21, + "grad_norm": 0.59375, + "learning_rate": 0.0001933432904789072, + "loss": 1.0314, + "step": 14300 + }, + { + "epoch": 0.21, + "grad_norm": 0.53515625, + "learning_rate": 0.00019333430576564534, + "loss": 1.0991, + "step": 14305 + }, + { + "epoch": 0.21, + "grad_norm": 0.51953125, + "learning_rate": 0.00019332531520208286, + "loss": 1.0224, + "step": 14310 + }, + { + "epoch": 0.21, + "grad_norm": 0.50390625, + "learning_rate": 0.00019331631878878337, + "loss": 0.9544, + "step": 14315 + }, + { + "epoch": 0.21, + "grad_norm": 0.53125, + "learning_rate": 0.00019330731652631073, + "loss": 0.9824, + "step": 14320 + }, + { + "epoch": 0.21, + "grad_norm": 0.55859375, + "learning_rate": 0.00019329830841522923, + "loss": 1.0508, + "step": 14325 + }, + { + "epoch": 0.21, + "grad_norm": 0.60546875, + "learning_rate": 0.00019328929445610351, + "loss": 0.8179, + "step": 14330 + }, + { + "epoch": 0.21, + "grad_norm": 0.490234375, + "learning_rate": 0.00019328027464949853, + "loss": 1.1035, + "step": 14335 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019327124899597972, + "loss": 0.9759, + "step": 14340 + }, + { + "epoch": 0.21, + "grad_norm": 0.625, + "learning_rate": 0.00019326221749611282, + "loss": 1.1146, + "step": 14345 + }, + { + "epoch": 0.21, + "grad_norm": 0.52734375, + "learning_rate": 0.00019325318015046388, + "loss": 0.9521, + "step": 14350 + }, + { + "epoch": 0.21, + "grad_norm": 0.62109375, + "learning_rate": 0.0001932441369595994, + "loss": 0.9631, + "step": 14355 + }, + { + "epoch": 0.21, + "grad_norm": 0.5625, + "learning_rate": 0.0001932350879240862, + "loss": 0.879, + "step": 14360 + }, + { + "epoch": 0.21, + "grad_norm": 0.51171875, + "learning_rate": 0.00019322603304449155, + "loss": 0.8715, + "step": 14365 + }, + { + "epoch": 0.21, + "grad_norm": 1.2421875, + "learning_rate": 0.00019321697232138296, + "loss": 0.9968, + "step": 14370 + }, + { + "epoch": 0.21, + "grad_norm": 0.5546875, + "learning_rate": 0.00019320790575532836, + "loss": 0.8983, + "step": 14375 + }, + { + "epoch": 0.21, + "grad_norm": 0.5625, + "learning_rate": 0.0001931988333468961, + "loss": 0.9373, + "step": 14380 + }, + { + "epoch": 0.21, + "grad_norm": 0.5078125, + "learning_rate": 0.0001931897550966548, + "loss": 0.9609, + "step": 14385 + }, + { + "epoch": 0.21, + "grad_norm": 0.6328125, + "learning_rate": 0.0001931806710051735, + "loss": 0.8751, + "step": 14390 + }, + { + "epoch": 0.21, + "grad_norm": 0.56640625, + "learning_rate": 0.00019317158107302166, + "loss": 0.9652, + "step": 14395 + }, + { + "epoch": 0.21, + "grad_norm": 0.53515625, + "learning_rate": 0.000193162485300769, + "loss": 0.9237, + "step": 14400 + }, + { + "epoch": 0.21, + "grad_norm": 0.5078125, + "learning_rate": 0.00019315338368898564, + "loss": 0.932, + "step": 14405 + }, + { + "epoch": 0.21, + "grad_norm": 0.59375, + "learning_rate": 0.0001931442762382421, + "loss": 0.9976, + "step": 14410 + }, + { + "epoch": 0.21, + "grad_norm": 0.5859375, + "learning_rate": 0.00019313516294910924, + "loss": 0.9154, + "step": 14415 + }, + { + "epoch": 0.21, + "grad_norm": 0.55078125, + "learning_rate": 0.0001931260438221583, + "loss": 1.094, + "step": 14420 + }, + { + "epoch": 0.21, + "grad_norm": 0.70703125, + "learning_rate": 0.00019311691885796087, + "loss": 1.0531, + "step": 14425 + }, + { + "epoch": 0.21, + "grad_norm": 0.578125, + "learning_rate": 0.00019310778805708888, + "loss": 1.0205, + "step": 14430 + }, + { + "epoch": 0.21, + "grad_norm": 0.56640625, + "learning_rate": 0.00019309865142011474, + "loss": 1.0082, + "step": 14435 + }, + { + "epoch": 0.21, + "grad_norm": 0.6171875, + "learning_rate": 0.0001930895089476111, + "loss": 1.0266, + "step": 14440 + }, + { + "epoch": 0.21, + "grad_norm": 0.52734375, + "learning_rate": 0.000193080360640151, + "loss": 0.9064, + "step": 14445 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.0001930712064983079, + "loss": 0.8931, + "step": 14450 + }, + { + "epoch": 0.21, + "grad_norm": 0.5546875, + "learning_rate": 0.00019306204652265558, + "loss": 0.907, + "step": 14455 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019305288071376817, + "loss": 0.9252, + "step": 14460 + }, + { + "epoch": 0.21, + "grad_norm": 0.85546875, + "learning_rate": 0.0001930437090722202, + "loss": 0.946, + "step": 14465 + }, + { + "epoch": 0.21, + "grad_norm": 0.61328125, + "learning_rate": 0.00019303453159858665, + "loss": 1.1208, + "step": 14470 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019302534829344266, + "loss": 0.9683, + "step": 14475 + }, + { + "epoch": 0.21, + "grad_norm": 0.5703125, + "learning_rate": 0.0001930161591573639, + "loss": 0.8724, + "step": 14480 + }, + { + "epoch": 0.21, + "grad_norm": 0.5390625, + "learning_rate": 0.0001930069641909263, + "loss": 1.0984, + "step": 14485 + }, + { + "epoch": 0.21, + "grad_norm": 0.55859375, + "learning_rate": 0.00019299776339470632, + "loss": 0.9404, + "step": 14490 + }, + { + "epoch": 0.21, + "grad_norm": 0.59375, + "learning_rate": 0.0001929885567692806, + "loss": 0.9694, + "step": 14495 + }, + { + "epoch": 0.21, + "grad_norm": 0.51953125, + "learning_rate": 0.00019297934431522623, + "loss": 1.0963, + "step": 14500 + }, + { + "epoch": 0.21, + "grad_norm": 0.6015625, + "learning_rate": 0.00019297012603312067, + "loss": 0.9967, + "step": 14505 + }, + { + "epoch": 0.21, + "grad_norm": 0.578125, + "learning_rate": 0.0001929609019235417, + "loss": 0.8824, + "step": 14510 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019295167198706757, + "loss": 0.9929, + "step": 14515 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019294243622427674, + "loss": 1.0092, + "step": 14520 + }, + { + "epoch": 0.21, + "grad_norm": 0.546875, + "learning_rate": 0.00019293319463574817, + "loss": 1.0737, + "step": 14525 + }, + { + "epoch": 0.21, + "grad_norm": 0.546875, + "learning_rate": 0.0001929239472220611, + "loss": 0.932, + "step": 14530 + }, + { + "epoch": 0.21, + "grad_norm": 0.61328125, + "learning_rate": 0.00019291469398379524, + "loss": 0.906, + "step": 14535 + }, + { + "epoch": 0.21, + "grad_norm": 1.046875, + "learning_rate": 0.00019290543492153045, + "loss": 1.0696, + "step": 14540 + }, + { + "epoch": 0.21, + "grad_norm": 0.5703125, + "learning_rate": 0.0001928961700358473, + "loss": 0.8614, + "step": 14545 + }, + { + "epoch": 0.21, + "grad_norm": 0.5625, + "learning_rate": 0.00019288689932732634, + "loss": 1.0545, + "step": 14550 + }, + { + "epoch": 0.21, + "grad_norm": 0.640625, + "learning_rate": 0.00019287762279654872, + "loss": 0.9849, + "step": 14555 + }, + { + "epoch": 0.21, + "grad_norm": 0.55078125, + "learning_rate": 0.00019286834044409597, + "loss": 0.9447, + "step": 14560 + }, + { + "epoch": 0.21, + "grad_norm": 0.56640625, + "learning_rate": 0.00019285905227054985, + "loss": 0.9369, + "step": 14565 + }, + { + "epoch": 0.21, + "grad_norm": 0.578125, + "learning_rate": 0.00019284975827649258, + "loss": 0.9404, + "step": 14570 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019284045846250673, + "loss": 0.899, + "step": 14575 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.0001928311528291752, + "loss": 1.0397, + "step": 14580 + }, + { + "epoch": 0.21, + "grad_norm": 0.66796875, + "learning_rate": 0.0001928218413770813, + "loss": 1.0044, + "step": 14585 + }, + { + "epoch": 0.21, + "grad_norm": 0.51171875, + "learning_rate": 0.00019281252410680862, + "loss": 0.9415, + "step": 14590 + }, + { + "epoch": 0.21, + "grad_norm": 0.6171875, + "learning_rate": 0.00019280320101894128, + "loss": 0.9335, + "step": 14595 + }, + { + "epoch": 0.21, + "grad_norm": 0.54296875, + "learning_rate": 0.00019279387211406358, + "loss": 0.8984, + "step": 14600 + }, + { + "epoch": 0.21, + "grad_norm": 0.53125, + "learning_rate": 0.00019278453739276027, + "loss": 1.1, + "step": 14605 + }, + { + "epoch": 0.21, + "grad_norm": 0.58984375, + "learning_rate": 0.0001927751968556165, + "loss": 1.0613, + "step": 14610 + }, + { + "epoch": 0.21, + "grad_norm": 0.61328125, + "learning_rate": 0.00019276585050321775, + "loss": 1.1709, + "step": 14615 + }, + { + "epoch": 0.21, + "grad_norm": 0.71875, + "learning_rate": 0.00019275649833614983, + "loss": 0.8661, + "step": 14620 + }, + { + "epoch": 0.21, + "grad_norm": 0.61328125, + "learning_rate": 0.00019274714035499895, + "loss": 0.8943, + "step": 14625 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019273777656035168, + "loss": 0.9973, + "step": 14630 + }, + { + "epoch": 0.21, + "grad_norm": 0.625, + "learning_rate": 0.00019272840695279495, + "loss": 0.9958, + "step": 14635 + }, + { + "epoch": 0.21, + "grad_norm": 0.65234375, + "learning_rate": 0.00019271903153291606, + "loss": 0.953, + "step": 14640 + }, + { + "epoch": 0.21, + "grad_norm": 1.1484375, + "learning_rate": 0.0001927096503013027, + "loss": 0.8613, + "step": 14645 + }, + { + "epoch": 0.21, + "grad_norm": 0.5234375, + "learning_rate": 0.00019270026325854284, + "loss": 0.8887, + "step": 14650 + }, + { + "epoch": 0.21, + "grad_norm": 0.60546875, + "learning_rate": 0.00019269087040522495, + "loss": 0.9941, + "step": 14655 + }, + { + "epoch": 0.21, + "grad_norm": 0.50390625, + "learning_rate": 0.00019268147174193766, + "loss": 0.8934, + "step": 14660 + }, + { + "epoch": 0.21, + "grad_norm": 0.5859375, + "learning_rate": 0.00019267206726927025, + "loss": 0.959, + "step": 14665 + }, + { + "epoch": 0.21, + "grad_norm": 0.54296875, + "learning_rate": 0.00019266265698781205, + "loss": 0.9928, + "step": 14670 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019265324089815303, + "loss": 0.8441, + "step": 14675 + }, + { + "epoch": 0.21, + "grad_norm": 0.6640625, + "learning_rate": 0.00019264381900088333, + "loss": 0.9877, + "step": 14680 + }, + { + "epoch": 0.21, + "grad_norm": 0.5390625, + "learning_rate": 0.0001926343912965935, + "loss": 0.8847, + "step": 14685 + }, + { + "epoch": 0.21, + "grad_norm": 0.609375, + "learning_rate": 0.00019262495778587458, + "loss": 1.1266, + "step": 14690 + }, + { + "epoch": 0.21, + "grad_norm": 0.53515625, + "learning_rate": 0.00019261551846931778, + "loss": 1.1062, + "step": 14695 + }, + { + "epoch": 0.21, + "grad_norm": 0.5390625, + "learning_rate": 0.00019260607334751483, + "loss": 1.1259, + "step": 14700 + }, + { + "epoch": 0.21, + "grad_norm": 0.62890625, + "learning_rate": 0.0001925966224210577, + "loss": 0.9801, + "step": 14705 + }, + { + "epoch": 0.21, + "grad_norm": 0.55859375, + "learning_rate": 0.00019258716569053885, + "loss": 0.9701, + "step": 14710 + }, + { + "epoch": 0.21, + "grad_norm": 0.625, + "learning_rate": 0.00019257770315655098, + "loss": 1.0589, + "step": 14715 + }, + { + "epoch": 0.21, + "grad_norm": 0.5546875, + "learning_rate": 0.00019256823481968727, + "loss": 0.9431, + "step": 14720 + }, + { + "epoch": 0.21, + "grad_norm": 0.59765625, + "learning_rate": 0.00019255876068054116, + "loss": 0.8557, + "step": 14725 + }, + { + "epoch": 0.21, + "grad_norm": 0.640625, + "learning_rate": 0.0001925492807397065, + "loss": 1.1344, + "step": 14730 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019253979499777755, + "loss": 0.9122, + "step": 14735 + }, + { + "epoch": 0.21, + "grad_norm": 0.58984375, + "learning_rate": 0.00019253030345534882, + "loss": 1.0495, + "step": 14740 + }, + { + "epoch": 0.21, + "grad_norm": 0.59375, + "learning_rate": 0.0001925208061130153, + "loss": 0.97, + "step": 14745 + }, + { + "epoch": 0.21, + "grad_norm": 0.5703125, + "learning_rate": 0.00019251130297137227, + "loss": 0.9526, + "step": 14750 + }, + { + "epoch": 0.21, + "grad_norm": 0.56640625, + "learning_rate": 0.00019250179403101542, + "loss": 1.0612, + "step": 14755 + }, + { + "epoch": 0.21, + "grad_norm": 0.5546875, + "learning_rate": 0.00019249227929254078, + "loss": 0.9857, + "step": 14760 + }, + { + "epoch": 0.21, + "grad_norm": 0.5390625, + "learning_rate": 0.00019248275875654474, + "loss": 0.9431, + "step": 14765 + }, + { + "epoch": 0.21, + "grad_norm": 0.46875, + "learning_rate": 0.00019247323242362402, + "loss": 0.9341, + "step": 14770 + }, + { + "epoch": 0.21, + "grad_norm": 0.52734375, + "learning_rate": 0.0001924637002943758, + "loss": 0.9805, + "step": 14775 + }, + { + "epoch": 0.21, + "grad_norm": 0.5625, + "learning_rate": 0.00019245416236939752, + "loss": 1.0725, + "step": 14780 + }, + { + "epoch": 0.21, + "grad_norm": 0.5859375, + "learning_rate": 0.00019244461864928707, + "loss": 1.0164, + "step": 14785 + }, + { + "epoch": 0.21, + "grad_norm": 0.62890625, + "learning_rate": 0.00019243506913464261, + "loss": 1.2681, + "step": 14790 + }, + { + "epoch": 0.21, + "grad_norm": 0.61328125, + "learning_rate": 0.00019242551382606275, + "loss": 1.1246, + "step": 14795 + }, + { + "epoch": 0.21, + "grad_norm": 0.609375, + "learning_rate": 0.00019241595272414643, + "loss": 1.091, + "step": 14800 + }, + { + "epoch": 0.21, + "grad_norm": 0.515625, + "learning_rate": 0.00019240638582949292, + "loss": 0.8939, + "step": 14805 + }, + { + "epoch": 0.21, + "grad_norm": 0.765625, + "learning_rate": 0.00019239681314270194, + "loss": 1.2496, + "step": 14810 + }, + { + "epoch": 0.21, + "grad_norm": 0.53125, + "learning_rate": 0.00019238723466437346, + "loss": 0.9049, + "step": 14815 + }, + { + "epoch": 0.21, + "grad_norm": 0.6015625, + "learning_rate": 0.0001923776503951079, + "loss": 0.8829, + "step": 14820 + }, + { + "epoch": 0.21, + "grad_norm": 0.53515625, + "learning_rate": 0.00019236806033550599, + "loss": 0.901, + "step": 14825 + }, + { + "epoch": 0.21, + "grad_norm": 0.55859375, + "learning_rate": 0.00019235846448616885, + "loss": 1.0361, + "step": 14830 + }, + { + "epoch": 0.21, + "grad_norm": 0.62109375, + "learning_rate": 0.00019234886284769798, + "loss": 0.9696, + "step": 14835 + }, + { + "epoch": 0.21, + "grad_norm": 0.5859375, + "learning_rate": 0.00019233925542069523, + "loss": 0.9323, + "step": 14840 + }, + { + "epoch": 0.21, + "grad_norm": 0.6015625, + "learning_rate": 0.00019232964220576275, + "loss": 0.9367, + "step": 14845 + }, + { + "epoch": 0.21, + "grad_norm": 0.6171875, + "learning_rate": 0.00019232002320350317, + "loss": 1.1198, + "step": 14850 + }, + { + "epoch": 0.21, + "grad_norm": 0.5546875, + "learning_rate": 0.00019231039841451937, + "loss": 0.9123, + "step": 14855 + }, + { + "epoch": 0.21, + "grad_norm": 0.58203125, + "learning_rate": 0.00019230076783941468, + "loss": 0.8982, + "step": 14860 + }, + { + "epoch": 0.21, + "grad_norm": 0.47265625, + "learning_rate": 0.00019229113147879273, + "loss": 0.9017, + "step": 14865 + }, + { + "epoch": 0.21, + "grad_norm": 0.57421875, + "learning_rate": 0.00019228148933325755, + "loss": 0.9551, + "step": 14870 + }, + { + "epoch": 0.21, + "grad_norm": 0.59765625, + "learning_rate": 0.0001922718414034135, + "loss": 0.8955, + "step": 14875 + }, + { + "epoch": 0.21, + "grad_norm": 0.44921875, + "learning_rate": 0.0001922621876898654, + "loss": 0.9203, + "step": 14880 + }, + { + "epoch": 0.21, + "grad_norm": 0.54296875, + "learning_rate": 0.00019225252819321826, + "loss": 0.9607, + "step": 14885 + }, + { + "epoch": 0.21, + "grad_norm": 0.57421875, + "learning_rate": 0.0001922428629140776, + "loss": 0.8713, + "step": 14890 + }, + { + "epoch": 0.21, + "grad_norm": 0.62890625, + "learning_rate": 0.0001922331918530492, + "loss": 0.9595, + "step": 14895 + }, + { + "epoch": 0.21, + "grad_norm": 0.5546875, + "learning_rate": 0.00019222351501073934, + "loss": 0.9485, + "step": 14900 + }, + { + "epoch": 0.21, + "grad_norm": 0.5625, + "learning_rate": 0.00019221383238775448, + "loss": 0.9874, + "step": 14905 + }, + { + "epoch": 0.21, + "grad_norm": 0.55859375, + "learning_rate": 0.00019220414398470162, + "loss": 0.9396, + "step": 14910 + }, + { + "epoch": 0.21, + "grad_norm": 0.515625, + "learning_rate": 0.00019219444980218798, + "loss": 0.9336, + "step": 14915 + }, + { + "epoch": 0.21, + "grad_norm": 0.703125, + "learning_rate": 0.00019218474984082122, + "loss": 0.9949, + "step": 14920 + }, + { + "epoch": 0.21, + "grad_norm": 0.55859375, + "learning_rate": 0.00019217504410120936, + "loss": 1.0587, + "step": 14925 + }, + { + "epoch": 0.21, + "grad_norm": 0.57421875, + "learning_rate": 0.00019216533258396078, + "loss": 1.0473, + "step": 14930 + }, + { + "epoch": 0.21, + "grad_norm": 0.5390625, + "learning_rate": 0.00019215561528968414, + "loss": 0.9306, + "step": 14935 + }, + { + "epoch": 0.21, + "grad_norm": 0.65625, + "learning_rate": 0.00019214589221898862, + "loss": 0.8904, + "step": 14940 + }, + { + "epoch": 0.21, + "grad_norm": 0.55859375, + "learning_rate": 0.00019213616337248362, + "loss": 0.9354, + "step": 14945 + }, + { + "epoch": 0.21, + "grad_norm": 0.62109375, + "learning_rate": 0.00019212642875077895, + "loss": 0.9908, + "step": 14950 + }, + { + "epoch": 0.21, + "grad_norm": 0.453125, + "learning_rate": 0.0001921166883544848, + "loss": 1.0877, + "step": 14955 + }, + { + "epoch": 0.21, + "grad_norm": 0.625, + "learning_rate": 0.00019210694218421175, + "loss": 1.0058, + "step": 14960 + }, + { + "epoch": 0.21, + "grad_norm": 0.55078125, + "learning_rate": 0.00019209719024057063, + "loss": 0.8071, + "step": 14965 + }, + { + "epoch": 0.21, + "grad_norm": 0.51953125, + "learning_rate": 0.00019208743252417275, + "loss": 0.9298, + "step": 14970 + }, + { + "epoch": 0.21, + "grad_norm": 0.53125, + "learning_rate": 0.0001920776690356297, + "loss": 0.853, + "step": 14975 + }, + { + "epoch": 0.21, + "grad_norm": 0.5625, + "learning_rate": 0.0001920678997755535, + "loss": 1.0874, + "step": 14980 + }, + { + "epoch": 0.21, + "grad_norm": 0.6015625, + "learning_rate": 0.00019205812474455648, + "loss": 1.062, + "step": 14985 + }, + { + "epoch": 0.22, + "grad_norm": 0.58203125, + "learning_rate": 0.00019204834394325135, + "loss": 1.0068, + "step": 14990 + }, + { + "epoch": 0.22, + "grad_norm": 0.57421875, + "learning_rate": 0.00019203855737225122, + "loss": 0.8639, + "step": 14995 + }, + { + "epoch": 0.22, + "grad_norm": 0.671875, + "learning_rate": 0.00019202876503216946, + "loss": 1.007, + "step": 15000 + }, + { + "epoch": 0.22, + "grad_norm": 0.58203125, + "learning_rate": 0.0001920189669236199, + "loss": 1.0242, + "step": 15005 + }, + { + "epoch": 0.22, + "grad_norm": 0.5234375, + "learning_rate": 0.00019200916304721672, + "loss": 1.0116, + "step": 15010 + }, + { + "epoch": 0.22, + "grad_norm": 0.58203125, + "learning_rate": 0.00019199935340357438, + "loss": 1.0486, + "step": 15015 + }, + { + "epoch": 0.22, + "grad_norm": 0.66015625, + "learning_rate": 0.0001919895379933078, + "loss": 1.1622, + "step": 15020 + }, + { + "epoch": 0.22, + "grad_norm": 0.52734375, + "learning_rate": 0.0001919797168170322, + "loss": 1.0576, + "step": 15025 + }, + { + "epoch": 0.22, + "grad_norm": 0.5390625, + "learning_rate": 0.0001919698898753632, + "loss": 0.9873, + "step": 15030 + }, + { + "epoch": 0.22, + "grad_norm": 0.6015625, + "learning_rate": 0.00019196005716891676, + "loss": 1.0213, + "step": 15035 + }, + { + "epoch": 0.22, + "grad_norm": 0.51953125, + "learning_rate": 0.00019195021869830922, + "loss": 1.0241, + "step": 15040 + }, + { + "epoch": 0.22, + "grad_norm": 0.62890625, + "learning_rate": 0.00019194037446415723, + "loss": 0.8688, + "step": 15045 + }, + { + "epoch": 0.22, + "grad_norm": 0.55859375, + "learning_rate": 0.00019193052446707785, + "loss": 0.8734, + "step": 15050 + }, + { + "epoch": 0.22, + "grad_norm": 0.5546875, + "learning_rate": 0.00019192066870768853, + "loss": 0.8426, + "step": 15055 + }, + { + "epoch": 0.22, + "grad_norm": 0.55078125, + "learning_rate": 0.00019191080718660697, + "loss": 0.949, + "step": 15060 + }, + { + "epoch": 0.22, + "grad_norm": 0.5390625, + "learning_rate": 0.00019190093990445134, + "loss": 0.9425, + "step": 15065 + }, + { + "epoch": 0.22, + "grad_norm": 0.5703125, + "learning_rate": 0.00019189106686184014, + "loss": 0.8756, + "step": 15070 + }, + { + "epoch": 0.22, + "grad_norm": 0.5859375, + "learning_rate": 0.00019188118805939222, + "loss": 0.9335, + "step": 15075 + }, + { + "epoch": 0.22, + "grad_norm": 0.546875, + "learning_rate": 0.0001918713034977268, + "loss": 0.9527, + "step": 15080 + }, + { + "epoch": 0.22, + "grad_norm": 0.66796875, + "learning_rate": 0.00019186141317746342, + "loss": 0.9751, + "step": 15085 + }, + { + "epoch": 0.22, + "grad_norm": 0.5390625, + "learning_rate": 0.00019185151709922205, + "loss": 0.9198, + "step": 15090 + }, + { + "epoch": 0.22, + "grad_norm": 0.5703125, + "learning_rate": 0.00019184161526362298, + "loss": 0.8982, + "step": 15095 + }, + { + "epoch": 0.22, + "grad_norm": 0.435546875, + "learning_rate": 0.00019183170767128686, + "loss": 0.9553, + "step": 15100 + }, + { + "epoch": 0.22, + "grad_norm": 0.671875, + "learning_rate": 0.00019182179432283473, + "loss": 0.9782, + "step": 15105 + }, + { + "epoch": 0.22, + "grad_norm": 0.5078125, + "learning_rate": 0.00019181187521888796, + "loss": 0.8864, + "step": 15110 + }, + { + "epoch": 0.22, + "grad_norm": 0.515625, + "learning_rate": 0.00019180195036006825, + "loss": 0.9765, + "step": 15115 + }, + { + "epoch": 0.22, + "grad_norm": 0.61328125, + "learning_rate": 0.00019179201974699775, + "loss": 0.9345, + "step": 15120 + }, + { + "epoch": 0.22, + "grad_norm": 0.5546875, + "learning_rate": 0.00019178208338029894, + "loss": 1.0356, + "step": 15125 + }, + { + "epoch": 0.22, + "grad_norm": 0.625, + "learning_rate": 0.00019177214126059458, + "loss": 1.0129, + "step": 15130 + }, + { + "epoch": 0.22, + "grad_norm": 0.57421875, + "learning_rate": 0.0001917621933885079, + "loss": 0.9623, + "step": 15135 + }, + { + "epoch": 0.22, + "grad_norm": 0.53125, + "learning_rate": 0.00019175223976466242, + "loss": 0.8772, + "step": 15140 + }, + { + "epoch": 0.22, + "grad_norm": 0.53515625, + "learning_rate": 0.00019174228038968205, + "loss": 1.0285, + "step": 15145 + }, + { + "epoch": 0.22, + "grad_norm": 0.4296875, + "learning_rate": 0.0001917323152641911, + "loss": 1.0615, + "step": 15150 + }, + { + "epoch": 0.22, + "grad_norm": 0.5546875, + "learning_rate": 0.00019172234438881412, + "loss": 0.952, + "step": 15155 + }, + { + "epoch": 0.22, + "grad_norm": 0.68359375, + "learning_rate": 0.00019171236776417612, + "loss": 1.0457, + "step": 15160 + }, + { + "epoch": 0.22, + "grad_norm": 0.49609375, + "learning_rate": 0.0001917023853909025, + "loss": 1.1488, + "step": 15165 + }, + { + "epoch": 0.22, + "grad_norm": 0.5625, + "learning_rate": 0.00019169239726961887, + "loss": 0.9128, + "step": 15170 + }, + { + "epoch": 0.22, + "grad_norm": 0.63671875, + "learning_rate": 0.0001916824034009514, + "loss": 1.0244, + "step": 15175 + }, + { + "epoch": 0.22, + "grad_norm": 0.6328125, + "learning_rate": 0.00019167240378552644, + "loss": 0.9686, + "step": 15180 + }, + { + "epoch": 0.22, + "grad_norm": 0.5546875, + "learning_rate": 0.0001916623984239708, + "loss": 0.9202, + "step": 15185 + }, + { + "epoch": 0.22, + "grad_norm": 0.51953125, + "learning_rate": 0.00019165238731691164, + "loss": 0.8916, + "step": 15190 + }, + { + "epoch": 0.22, + "grad_norm": 0.578125, + "learning_rate": 0.00019164237046497646, + "loss": 0.8949, + "step": 15195 + }, + { + "epoch": 0.22, + "grad_norm": 0.53515625, + "learning_rate": 0.00019163234786879314, + "loss": 0.962, + "step": 15200 + }, + { + "epoch": 0.22, + "grad_norm": 0.51953125, + "learning_rate": 0.00019162231952898987, + "loss": 0.85, + "step": 15205 + }, + { + "epoch": 0.22, + "grad_norm": 0.65625, + "learning_rate": 0.0001916122854461953, + "loss": 0.9933, + "step": 15210 + }, + { + "epoch": 0.22, + "grad_norm": 0.59375, + "learning_rate": 0.00019160224562103832, + "loss": 0.9851, + "step": 15215 + }, + { + "epoch": 0.22, + "grad_norm": 0.58984375, + "learning_rate": 0.00019159220005414825, + "loss": 0.9937, + "step": 15220 + }, + { + "epoch": 0.22, + "grad_norm": 0.52734375, + "learning_rate": 0.00019158214874615475, + "loss": 0.9975, + "step": 15225 + }, + { + "epoch": 0.22, + "grad_norm": 0.69140625, + "learning_rate": 0.0001915720916976879, + "loss": 1.0316, + "step": 15230 + }, + { + "epoch": 0.22, + "grad_norm": 0.9140625, + "learning_rate": 0.00019156202890937803, + "loss": 1.1671, + "step": 15235 + }, + { + "epoch": 0.22, + "grad_norm": 0.63671875, + "learning_rate": 0.00019155196038185592, + "loss": 1.0737, + "step": 15240 + }, + { + "epoch": 0.22, + "grad_norm": 0.6171875, + "learning_rate": 0.00019154188611575265, + "loss": 1.0276, + "step": 15245 + }, + { + "epoch": 0.22, + "grad_norm": 0.5546875, + "learning_rate": 0.0001915318061116997, + "loss": 0.9565, + "step": 15250 + }, + { + "epoch": 0.22, + "grad_norm": 0.625, + "learning_rate": 0.00019152172037032892, + "loss": 1.0703, + "step": 15255 + }, + { + "epoch": 0.22, + "grad_norm": 0.58203125, + "learning_rate": 0.00019151162889227246, + "loss": 0.8682, + "step": 15260 + }, + { + "epoch": 0.22, + "grad_norm": 0.59375, + "learning_rate": 0.00019150153167816288, + "loss": 1.1064, + "step": 15265 + }, + { + "epoch": 0.22, + "grad_norm": 0.51171875, + "learning_rate": 0.0001914914287286331, + "loss": 0.9682, + "step": 15270 + }, + { + "epoch": 0.22, + "grad_norm": 0.671875, + "learning_rate": 0.00019148132004431632, + "loss": 0.9411, + "step": 15275 + }, + { + "epoch": 0.22, + "grad_norm": 0.6171875, + "learning_rate": 0.00019147120562584624, + "loss": 0.8581, + "step": 15280 + }, + { + "epoch": 0.22, + "grad_norm": 0.609375, + "learning_rate": 0.0001914610854738568, + "loss": 0.9584, + "step": 15285 + }, + { + "epoch": 0.22, + "grad_norm": 0.56640625, + "learning_rate": 0.00019145095958898235, + "loss": 0.8806, + "step": 15290 + }, + { + "epoch": 0.22, + "grad_norm": 0.5625, + "learning_rate": 0.00019144082797185763, + "loss": 1.026, + "step": 15295 + }, + { + "epoch": 0.22, + "grad_norm": 0.578125, + "learning_rate": 0.00019143069062311767, + "loss": 0.9719, + "step": 15300 + }, + { + "epoch": 0.22, + "grad_norm": 0.60546875, + "learning_rate": 0.0001914205475433979, + "loss": 0.891, + "step": 15305 + }, + { + "epoch": 0.22, + "grad_norm": 0.482421875, + "learning_rate": 0.00019141039873333404, + "loss": 0.9281, + "step": 15310 + }, + { + "epoch": 0.22, + "grad_norm": 0.52734375, + "learning_rate": 0.00019140024419356233, + "loss": 1.0256, + "step": 15315 + }, + { + "epoch": 0.22, + "grad_norm": 0.546875, + "learning_rate": 0.00019139008392471917, + "loss": 1.1613, + "step": 15320 + }, + { + "epoch": 0.22, + "grad_norm": 0.5, + "learning_rate": 0.0001913799179274415, + "loss": 0.969, + "step": 15325 + }, + { + "epoch": 0.22, + "grad_norm": 0.65234375, + "learning_rate": 0.00019136974620236652, + "loss": 0.8954, + "step": 15330 + }, + { + "epoch": 0.22, + "grad_norm": 0.68359375, + "learning_rate": 0.00019135956875013176, + "loss": 1.04, + "step": 15335 + }, + { + "epoch": 0.22, + "grad_norm": 0.58203125, + "learning_rate": 0.00019134938557137515, + "loss": 1.0477, + "step": 15340 + }, + { + "epoch": 0.22, + "grad_norm": 0.55859375, + "learning_rate": 0.00019133919666673507, + "loss": 0.9627, + "step": 15345 + }, + { + "epoch": 0.22, + "grad_norm": 0.6171875, + "learning_rate": 0.00019132900203685006, + "loss": 0.8964, + "step": 15350 + }, + { + "epoch": 0.22, + "grad_norm": 0.5234375, + "learning_rate": 0.00019131880168235922, + "loss": 0.9475, + "step": 15355 + }, + { + "epoch": 0.22, + "grad_norm": 0.58203125, + "learning_rate": 0.0001913085956039019, + "loss": 1.0046, + "step": 15360 + }, + { + "epoch": 0.22, + "grad_norm": 0.54296875, + "learning_rate": 0.0001912983838021178, + "loss": 0.8238, + "step": 15365 + }, + { + "epoch": 0.22, + "grad_norm": 0.546875, + "learning_rate": 0.00019128816627764702, + "loss": 0.8787, + "step": 15370 + }, + { + "epoch": 0.22, + "grad_norm": 0.625, + "learning_rate": 0.00019127794303113002, + "loss": 0.9763, + "step": 15375 + }, + { + "epoch": 0.22, + "grad_norm": 0.5234375, + "learning_rate": 0.00019126771406320758, + "loss": 0.8963, + "step": 15380 + }, + { + "epoch": 0.22, + "grad_norm": 0.59375, + "learning_rate": 0.0001912574793745209, + "loss": 0.9862, + "step": 15385 + }, + { + "epoch": 0.22, + "grad_norm": 0.55078125, + "learning_rate": 0.00019124723896571147, + "loss": 1.0727, + "step": 15390 + }, + { + "epoch": 0.22, + "grad_norm": 0.56640625, + "learning_rate": 0.00019123699283742117, + "loss": 0.9808, + "step": 15395 + }, + { + "epoch": 0.22, + "grad_norm": 0.671875, + "learning_rate": 0.00019122674099029225, + "loss": 1.028, + "step": 15400 + }, + { + "epoch": 0.22, + "grad_norm": 0.56640625, + "learning_rate": 0.00019121648342496731, + "loss": 0.9697, + "step": 15405 + }, + { + "epoch": 0.22, + "grad_norm": 0.51953125, + "learning_rate": 0.00019120622014208932, + "loss": 0.8534, + "step": 15410 + }, + { + "epoch": 0.22, + "grad_norm": 0.7890625, + "learning_rate": 0.00019119595114230154, + "loss": 1.0241, + "step": 15415 + }, + { + "epoch": 0.22, + "grad_norm": 0.734375, + "learning_rate": 0.0001911856764262477, + "loss": 1.088, + "step": 15420 + }, + { + "epoch": 0.22, + "grad_norm": 0.65625, + "learning_rate": 0.00019117539599457182, + "loss": 1.0458, + "step": 15425 + }, + { + "epoch": 0.22, + "grad_norm": 0.609375, + "learning_rate": 0.00019116510984791825, + "loss": 0.9081, + "step": 15430 + }, + { + "epoch": 0.22, + "grad_norm": 0.56640625, + "learning_rate": 0.0001911548179869318, + "loss": 0.9618, + "step": 15435 + }, + { + "epoch": 0.22, + "grad_norm": 0.58984375, + "learning_rate": 0.0001911445204122575, + "loss": 0.9968, + "step": 15440 + }, + { + "epoch": 0.22, + "grad_norm": 0.49609375, + "learning_rate": 0.0001911342171245409, + "loss": 0.9958, + "step": 15445 + }, + { + "epoch": 0.22, + "grad_norm": 0.5703125, + "learning_rate": 0.00019112390812442773, + "loss": 0.984, + "step": 15450 + }, + { + "epoch": 0.22, + "grad_norm": 0.6015625, + "learning_rate": 0.00019111359341256426, + "loss": 1.0099, + "step": 15455 + }, + { + "epoch": 0.22, + "grad_norm": 0.62890625, + "learning_rate": 0.00019110327298959697, + "loss": 0.879, + "step": 15460 + }, + { + "epoch": 0.22, + "grad_norm": 0.5234375, + "learning_rate": 0.00019109294685617275, + "loss": 1.2221, + "step": 15465 + }, + { + "epoch": 0.22, + "grad_norm": 0.53125, + "learning_rate": 0.0001910826150129389, + "loss": 0.8186, + "step": 15470 + }, + { + "epoch": 0.22, + "grad_norm": 0.58203125, + "learning_rate": 0.00019107227746054302, + "loss": 0.8354, + "step": 15475 + }, + { + "epoch": 0.22, + "grad_norm": 0.484375, + "learning_rate": 0.00019106193419963304, + "loss": 1.0459, + "step": 15480 + }, + { + "epoch": 0.22, + "grad_norm": 0.62890625, + "learning_rate": 0.00019105158523085734, + "loss": 1.156, + "step": 15485 + }, + { + "epoch": 0.22, + "grad_norm": 0.54296875, + "learning_rate": 0.00019104123055486454, + "loss": 0.9457, + "step": 15490 + }, + { + "epoch": 0.22, + "grad_norm": 0.625, + "learning_rate": 0.00019103087017230375, + "loss": 0.8631, + "step": 15495 + }, + { + "epoch": 0.22, + "grad_norm": 0.58984375, + "learning_rate": 0.00019102050408382432, + "loss": 1.0076, + "step": 15500 + }, + { + "epoch": 0.22, + "grad_norm": 0.51953125, + "learning_rate": 0.00019101013229007606, + "loss": 0.8997, + "step": 15505 + }, + { + "epoch": 0.22, + "grad_norm": 0.5859375, + "learning_rate": 0.00019099975479170903, + "loss": 1.0633, + "step": 15510 + }, + { + "epoch": 0.22, + "grad_norm": 0.51953125, + "learning_rate": 0.00019098937158937374, + "loss": 0.8956, + "step": 15515 + }, + { + "epoch": 0.22, + "grad_norm": 0.62109375, + "learning_rate": 0.000190978982683721, + "loss": 1.0573, + "step": 15520 + }, + { + "epoch": 0.22, + "grad_norm": 0.55859375, + "learning_rate": 0.000190968588075402, + "loss": 1.0133, + "step": 15525 + }, + { + "epoch": 0.22, + "grad_norm": 0.546875, + "learning_rate": 0.00019095818776506836, + "loss": 0.9309, + "step": 15530 + }, + { + "epoch": 0.22, + "grad_norm": 0.6484375, + "learning_rate": 0.00019094778175337185, + "loss": 1.1443, + "step": 15535 + }, + { + "epoch": 0.22, + "grad_norm": 0.65625, + "learning_rate": 0.00019093737004096485, + "loss": 1.0728, + "step": 15540 + }, + { + "epoch": 0.22, + "grad_norm": 0.625, + "learning_rate": 0.00019092695262849988, + "loss": 0.9081, + "step": 15545 + }, + { + "epoch": 0.22, + "grad_norm": 0.71484375, + "learning_rate": 0.00019091652951662997, + "loss": 0.9224, + "step": 15550 + }, + { + "epoch": 0.22, + "grad_norm": 0.62109375, + "learning_rate": 0.00019090610070600844, + "loss": 0.9895, + "step": 15555 + }, + { + "epoch": 0.22, + "grad_norm": 0.5390625, + "learning_rate": 0.00019089566619728902, + "loss": 0.8279, + "step": 15560 + }, + { + "epoch": 0.22, + "grad_norm": 0.58984375, + "learning_rate": 0.0001908852259911257, + "loss": 0.8762, + "step": 15565 + }, + { + "epoch": 0.22, + "grad_norm": 0.53125, + "learning_rate": 0.0001908747800881729, + "loss": 0.9472, + "step": 15570 + }, + { + "epoch": 0.22, + "grad_norm": 0.5078125, + "learning_rate": 0.0001908643284890854, + "loss": 0.9305, + "step": 15575 + }, + { + "epoch": 0.22, + "grad_norm": 0.59765625, + "learning_rate": 0.0001908538711945183, + "loss": 0.9279, + "step": 15580 + }, + { + "epoch": 0.22, + "grad_norm": 0.609375, + "learning_rate": 0.00019084340820512706, + "loss": 1.154, + "step": 15585 + }, + { + "epoch": 0.22, + "grad_norm": 0.56640625, + "learning_rate": 0.00019083293952156755, + "loss": 1.019, + "step": 15590 + }, + { + "epoch": 0.22, + "grad_norm": 0.59765625, + "learning_rate": 0.00019082246514449594, + "loss": 1.0037, + "step": 15595 + }, + { + "epoch": 0.22, + "grad_norm": 0.671875, + "learning_rate": 0.00019081198507456878, + "loss": 0.8323, + "step": 15600 + }, + { + "epoch": 0.22, + "grad_norm": 0.55078125, + "learning_rate": 0.00019080149931244297, + "loss": 0.9139, + "step": 15605 + }, + { + "epoch": 0.22, + "grad_norm": 0.5546875, + "learning_rate": 0.00019079100785877577, + "loss": 0.7942, + "step": 15610 + }, + { + "epoch": 0.22, + "grad_norm": 0.51953125, + "learning_rate": 0.00019078051071422478, + "loss": 0.9099, + "step": 15615 + }, + { + "epoch": 0.22, + "grad_norm": 0.55859375, + "learning_rate": 0.000190770007879448, + "loss": 1.0347, + "step": 15620 + }, + { + "epoch": 0.22, + "grad_norm": 0.5234375, + "learning_rate": 0.00019075949935510374, + "loss": 0.9075, + "step": 15625 + }, + { + "epoch": 0.22, + "grad_norm": 0.451171875, + "learning_rate": 0.00019074898514185072, + "loss": 0.8973, + "step": 15630 + }, + { + "epoch": 0.22, + "grad_norm": 0.6640625, + "learning_rate": 0.00019073846524034793, + "loss": 1.0093, + "step": 15635 + }, + { + "epoch": 0.22, + "grad_norm": 0.6171875, + "learning_rate": 0.0001907279396512548, + "loss": 1.025, + "step": 15640 + }, + { + "epoch": 0.22, + "grad_norm": 0.609375, + "learning_rate": 0.0001907174083752311, + "loss": 1.1046, + "step": 15645 + }, + { + "epoch": 0.22, + "grad_norm": 0.52734375, + "learning_rate": 0.00019070687141293689, + "loss": 0.807, + "step": 15650 + }, + { + "epoch": 0.22, + "grad_norm": 0.5859375, + "learning_rate": 0.00019069632876503269, + "loss": 0.9024, + "step": 15655 + }, + { + "epoch": 0.22, + "grad_norm": 0.5390625, + "learning_rate": 0.00019068578043217934, + "loss": 0.8765, + "step": 15660 + }, + { + "epoch": 0.22, + "grad_norm": 0.66796875, + "learning_rate": 0.00019067522641503794, + "loss": 0.976, + "step": 15665 + }, + { + "epoch": 0.22, + "grad_norm": 0.6328125, + "learning_rate": 0.0001906646667142701, + "loss": 1.0139, + "step": 15670 + }, + { + "epoch": 0.22, + "grad_norm": 0.498046875, + "learning_rate": 0.00019065410133053766, + "loss": 0.979, + "step": 15675 + }, + { + "epoch": 0.22, + "grad_norm": 0.625, + "learning_rate": 0.00019064353026450296, + "loss": 1.1048, + "step": 15680 + }, + { + "epoch": 0.22, + "grad_norm": 0.55859375, + "learning_rate": 0.00019063295351682852, + "loss": 0.975, + "step": 15685 + }, + { + "epoch": 0.23, + "grad_norm": 0.5, + "learning_rate": 0.00019062237108817732, + "loss": 0.9718, + "step": 15690 + }, + { + "epoch": 0.23, + "grad_norm": 0.43359375, + "learning_rate": 0.00019061178297921272, + "loss": 0.962, + "step": 15695 + }, + { + "epoch": 0.23, + "grad_norm": 0.5390625, + "learning_rate": 0.00019060118919059834, + "loss": 1.074, + "step": 15700 + }, + { + "epoch": 0.23, + "grad_norm": 0.62890625, + "learning_rate": 0.00019059058972299825, + "loss": 1.0041, + "step": 15705 + }, + { + "epoch": 0.23, + "grad_norm": 0.59375, + "learning_rate": 0.00019057998457707682, + "loss": 0.9185, + "step": 15710 + }, + { + "epoch": 0.23, + "grad_norm": 0.60546875, + "learning_rate": 0.00019056937375349877, + "loss": 0.9434, + "step": 15715 + }, + { + "epoch": 0.23, + "grad_norm": 0.8203125, + "learning_rate": 0.00019055875725292927, + "loss": 0.7916, + "step": 15720 + }, + { + "epoch": 0.23, + "grad_norm": 0.5625, + "learning_rate": 0.00019054813507603368, + "loss": 0.9599, + "step": 15725 + }, + { + "epoch": 0.23, + "grad_norm": 0.53125, + "learning_rate": 0.0001905375072234779, + "loss": 0.8582, + "step": 15730 + }, + { + "epoch": 0.23, + "grad_norm": 0.55078125, + "learning_rate": 0.00019052687369592802, + "loss": 0.8423, + "step": 15735 + }, + { + "epoch": 0.23, + "grad_norm": 0.466796875, + "learning_rate": 0.00019051623449405062, + "loss": 0.9381, + "step": 15740 + }, + { + "epoch": 0.23, + "grad_norm": 0.5703125, + "learning_rate": 0.00019050558961851254, + "loss": 0.8004, + "step": 15745 + }, + { + "epoch": 0.23, + "grad_norm": 0.59375, + "learning_rate": 0.00019049493906998102, + "loss": 0.9623, + "step": 15750 + }, + { + "epoch": 0.23, + "grad_norm": 0.578125, + "learning_rate": 0.00019048428284912364, + "loss": 1.1378, + "step": 15755 + }, + { + "epoch": 0.23, + "grad_norm": 0.5703125, + "learning_rate": 0.0001904736209566084, + "loss": 1.0624, + "step": 15760 + }, + { + "epoch": 0.23, + "grad_norm": 0.4921875, + "learning_rate": 0.00019046295339310353, + "loss": 0.8383, + "step": 15765 + }, + { + "epoch": 0.23, + "grad_norm": 0.546875, + "learning_rate": 0.00019045228015927772, + "loss": 0.9157, + "step": 15770 + }, + { + "epoch": 0.23, + "grad_norm": 0.5546875, + "learning_rate": 0.00019044160125579994, + "loss": 0.9132, + "step": 15775 + }, + { + "epoch": 0.23, + "grad_norm": 0.59375, + "learning_rate": 0.00019043091668333965, + "loss": 1.1574, + "step": 15780 + }, + { + "epoch": 0.23, + "grad_norm": 0.59765625, + "learning_rate": 0.00019042022644256648, + "loss": 1.047, + "step": 15785 + }, + { + "epoch": 0.23, + "grad_norm": 0.73046875, + "learning_rate": 0.00019040953053415055, + "loss": 0.919, + "step": 15790 + }, + { + "epoch": 0.23, + "grad_norm": 0.6171875, + "learning_rate": 0.00019039882895876224, + "loss": 0.9386, + "step": 15795 + }, + { + "epoch": 0.23, + "grad_norm": 0.58203125, + "learning_rate": 0.00019038812171707242, + "loss": 0.9384, + "step": 15800 + }, + { + "epoch": 0.23, + "grad_norm": 0.59765625, + "learning_rate": 0.00019037740880975217, + "loss": 1.1885, + "step": 15805 + }, + { + "epoch": 0.23, + "grad_norm": 0.59375, + "learning_rate": 0.000190366690237473, + "loss": 0.9901, + "step": 15810 + }, + { + "epoch": 0.23, + "grad_norm": 0.49609375, + "learning_rate": 0.00019035596600090675, + "loss": 0.9708, + "step": 15815 + }, + { + "epoch": 0.23, + "grad_norm": 0.5390625, + "learning_rate": 0.0001903452361007257, + "loss": 1.0002, + "step": 15820 + }, + { + "epoch": 0.23, + "grad_norm": 0.55859375, + "learning_rate": 0.0001903345005376023, + "loss": 1.0308, + "step": 15825 + }, + { + "epoch": 0.23, + "grad_norm": 0.578125, + "learning_rate": 0.00019032375931220954, + "loss": 0.9574, + "step": 15830 + }, + { + "epoch": 0.23, + "grad_norm": 0.52734375, + "learning_rate": 0.0001903130124252207, + "loss": 0.972, + "step": 15835 + }, + { + "epoch": 0.23, + "grad_norm": 0.58203125, + "learning_rate": 0.00019030225987730934, + "loss": 1.0213, + "step": 15840 + }, + { + "epoch": 0.23, + "grad_norm": 0.55078125, + "learning_rate": 0.0001902915016691495, + "loss": 1.0051, + "step": 15845 + }, + { + "epoch": 0.23, + "grad_norm": 0.51171875, + "learning_rate": 0.00019028073780141553, + "loss": 1.1086, + "step": 15850 + }, + { + "epoch": 0.23, + "grad_norm": 0.54296875, + "learning_rate": 0.00019026996827478207, + "loss": 0.8926, + "step": 15855 + }, + { + "epoch": 0.23, + "grad_norm": 0.6328125, + "learning_rate": 0.0001902591930899242, + "loss": 1.1078, + "step": 15860 + }, + { + "epoch": 0.23, + "grad_norm": 0.60546875, + "learning_rate": 0.00019024841224751728, + "loss": 0.9868, + "step": 15865 + }, + { + "epoch": 0.23, + "grad_norm": 0.71875, + "learning_rate": 0.00019023762574823714, + "loss": 1.1652, + "step": 15870 + }, + { + "epoch": 0.23, + "grad_norm": 0.56640625, + "learning_rate": 0.0001902268335927598, + "loss": 0.849, + "step": 15875 + }, + { + "epoch": 0.23, + "grad_norm": 0.6015625, + "learning_rate": 0.00019021603578176183, + "loss": 1.0021, + "step": 15880 + }, + { + "epoch": 0.23, + "grad_norm": 0.546875, + "learning_rate": 0.00019020523231591993, + "loss": 0.8994, + "step": 15885 + }, + { + "epoch": 0.23, + "grad_norm": 0.578125, + "learning_rate": 0.00019019442319591137, + "loss": 1.0758, + "step": 15890 + }, + { + "epoch": 0.23, + "grad_norm": 0.6015625, + "learning_rate": 0.00019018360842241362, + "loss": 0.9965, + "step": 15895 + }, + { + "epoch": 0.23, + "grad_norm": 0.58203125, + "learning_rate": 0.0001901727879961046, + "loss": 0.8621, + "step": 15900 + }, + { + "epoch": 0.23, + "grad_norm": 0.65234375, + "learning_rate": 0.00019016196191766255, + "loss": 1.0158, + "step": 15905 + }, + { + "epoch": 0.23, + "grad_norm": 0.455078125, + "learning_rate": 0.000190151130187766, + "loss": 1.0215, + "step": 15910 + }, + { + "epoch": 0.23, + "grad_norm": 0.51171875, + "learning_rate": 0.00019014029280709397, + "loss": 0.9089, + "step": 15915 + }, + { + "epoch": 0.23, + "grad_norm": 0.65234375, + "learning_rate": 0.0001901294497763257, + "loss": 0.8874, + "step": 15920 + }, + { + "epoch": 0.23, + "grad_norm": 0.49609375, + "learning_rate": 0.00019011860109614088, + "loss": 0.908, + "step": 15925 + }, + { + "epoch": 0.23, + "grad_norm": 0.54296875, + "learning_rate": 0.00019010774676721947, + "loss": 0.9505, + "step": 15930 + }, + { + "epoch": 0.23, + "grad_norm": 0.5859375, + "learning_rate": 0.0001900968867902419, + "loss": 0.8472, + "step": 15935 + }, + { + "epoch": 0.23, + "grad_norm": 0.59375, + "learning_rate": 0.0001900860211658889, + "loss": 0.9841, + "step": 15940 + }, + { + "epoch": 0.23, + "grad_norm": 0.69140625, + "learning_rate": 0.00019007514989484144, + "loss": 1.1091, + "step": 15945 + }, + { + "epoch": 0.23, + "grad_norm": 0.486328125, + "learning_rate": 0.000190064272977781, + "loss": 0.9715, + "step": 15950 + }, + { + "epoch": 0.23, + "grad_norm": 0.6796875, + "learning_rate": 0.00019005339041538937, + "loss": 1.041, + "step": 15955 + }, + { + "epoch": 0.23, + "grad_norm": 0.5546875, + "learning_rate": 0.00019004250220834866, + "loss": 0.941, + "step": 15960 + }, + { + "epoch": 0.23, + "grad_norm": 0.62890625, + "learning_rate": 0.00019003160835734135, + "loss": 1.0787, + "step": 15965 + }, + { + "epoch": 0.23, + "grad_norm": 0.53125, + "learning_rate": 0.0001900207088630503, + "loss": 0.8941, + "step": 15970 + }, + { + "epoch": 0.23, + "grad_norm": 0.64453125, + "learning_rate": 0.0001900098037261587, + "loss": 1.229, + "step": 15975 + }, + { + "epoch": 0.23, + "grad_norm": 0.55078125, + "learning_rate": 0.0001899988929473501, + "loss": 1.0056, + "step": 15980 + }, + { + "epoch": 0.23, + "grad_norm": 0.50390625, + "learning_rate": 0.00018998797652730837, + "loss": 0.9708, + "step": 15985 + }, + { + "epoch": 0.23, + "grad_norm": 0.54296875, + "learning_rate": 0.00018997705446671778, + "loss": 0.8722, + "step": 15990 + }, + { + "epoch": 0.23, + "grad_norm": 0.6484375, + "learning_rate": 0.00018996612676626295, + "loss": 1.0668, + "step": 15995 + }, + { + "epoch": 0.23, + "grad_norm": 0.486328125, + "learning_rate": 0.00018995519342662883, + "loss": 1.0248, + "step": 16000 + }, + { + "epoch": 0.23, + "grad_norm": 0.5078125, + "learning_rate": 0.00018994425444850076, + "loss": 0.9662, + "step": 16005 + }, + { + "epoch": 0.23, + "grad_norm": 0.578125, + "learning_rate": 0.00018993330983256435, + "loss": 0.9792, + "step": 16010 + }, + { + "epoch": 0.23, + "grad_norm": 0.6484375, + "learning_rate": 0.00018992235957950566, + "loss": 1.0946, + "step": 16015 + }, + { + "epoch": 0.23, + "grad_norm": 0.55078125, + "learning_rate": 0.00018991140369001107, + "loss": 0.8866, + "step": 16020 + }, + { + "epoch": 0.23, + "grad_norm": 0.54296875, + "learning_rate": 0.0001899004421647673, + "loss": 0.9569, + "step": 16025 + }, + { + "epoch": 0.23, + "grad_norm": 0.60546875, + "learning_rate": 0.00018988947500446138, + "loss": 1.0165, + "step": 16030 + }, + { + "epoch": 0.23, + "grad_norm": 0.515625, + "learning_rate": 0.00018987850220978086, + "loss": 0.9958, + "step": 16035 + }, + { + "epoch": 0.23, + "grad_norm": 0.62890625, + "learning_rate": 0.00018986752378141343, + "loss": 1.1076, + "step": 16040 + }, + { + "epoch": 0.23, + "grad_norm": 0.5546875, + "learning_rate": 0.0001898565397200472, + "loss": 0.9612, + "step": 16045 + }, + { + "epoch": 0.23, + "grad_norm": 0.5546875, + "learning_rate": 0.0001898455500263708, + "loss": 0.9617, + "step": 16050 + }, + { + "epoch": 0.23, + "grad_norm": 0.58203125, + "learning_rate": 0.000189834554701073, + "loss": 1.0497, + "step": 16055 + }, + { + "epoch": 0.23, + "grad_norm": 0.56640625, + "learning_rate": 0.00018982355374484296, + "loss": 0.9259, + "step": 16060 + }, + { + "epoch": 0.23, + "grad_norm": 0.66796875, + "learning_rate": 0.0001898125471583703, + "loss": 0.955, + "step": 16065 + }, + { + "epoch": 0.23, + "grad_norm": 0.5859375, + "learning_rate": 0.00018980153494234486, + "loss": 0.8977, + "step": 16070 + }, + { + "epoch": 0.23, + "grad_norm": 0.515625, + "learning_rate": 0.000189790517097457, + "loss": 0.8006, + "step": 16075 + }, + { + "epoch": 0.23, + "grad_norm": 0.5625, + "learning_rate": 0.0001897794936243972, + "loss": 1.0832, + "step": 16080 + }, + { + "epoch": 0.23, + "grad_norm": 0.6875, + "learning_rate": 0.00018976846452385652, + "loss": 1.0089, + "step": 16085 + }, + { + "epoch": 0.23, + "grad_norm": 0.64453125, + "learning_rate": 0.00018975742979652623, + "loss": 0.9711, + "step": 16090 + }, + { + "epoch": 0.23, + "grad_norm": 0.59765625, + "learning_rate": 0.00018974638944309802, + "loss": 0.8898, + "step": 16095 + }, + { + "epoch": 0.23, + "grad_norm": 0.5703125, + "learning_rate": 0.0001897353434642639, + "loss": 0.8701, + "step": 16100 + }, + { + "epoch": 0.23, + "grad_norm": 0.5390625, + "learning_rate": 0.0001897242918607163, + "loss": 0.9969, + "step": 16105 + }, + { + "epoch": 0.23, + "grad_norm": 0.546875, + "learning_rate": 0.00018971323463314784, + "loss": 0.9325, + "step": 16110 + }, + { + "epoch": 0.23, + "grad_norm": 0.57421875, + "learning_rate": 0.00018970217178225168, + "loss": 0.9804, + "step": 16115 + }, + { + "epoch": 0.23, + "grad_norm": 0.546875, + "learning_rate": 0.0001896911033087212, + "loss": 0.8709, + "step": 16120 + }, + { + "epoch": 0.23, + "grad_norm": 0.52734375, + "learning_rate": 0.00018968002921325027, + "loss": 1.0715, + "step": 16125 + }, + { + "epoch": 0.23, + "grad_norm": 0.609375, + "learning_rate": 0.00018966894949653293, + "loss": 1.1704, + "step": 16130 + }, + { + "epoch": 0.23, + "grad_norm": 0.50390625, + "learning_rate": 0.0001896578641592637, + "loss": 0.8445, + "step": 16135 + }, + { + "epoch": 0.23, + "grad_norm": 0.56640625, + "learning_rate": 0.00018964677320213748, + "loss": 0.8603, + "step": 16140 + }, + { + "epoch": 0.23, + "grad_norm": 0.6015625, + "learning_rate": 0.0001896356766258494, + "loss": 0.9586, + "step": 16145 + }, + { + "epoch": 0.23, + "grad_norm": 0.5859375, + "learning_rate": 0.00018962457443109503, + "loss": 0.9594, + "step": 16150 + }, + { + "epoch": 0.23, + "grad_norm": 0.54296875, + "learning_rate": 0.00018961346661857021, + "loss": 0.9027, + "step": 16155 + }, + { + "epoch": 0.23, + "grad_norm": 0.7265625, + "learning_rate": 0.00018960235318897132, + "loss": 0.9021, + "step": 16160 + }, + { + "epoch": 0.23, + "grad_norm": 0.83203125, + "learning_rate": 0.00018959123414299484, + "loss": 1.0627, + "step": 16165 + }, + { + "epoch": 0.23, + "grad_norm": 0.6171875, + "learning_rate": 0.00018958010948133777, + "loss": 1.0311, + "step": 16170 + }, + { + "epoch": 0.23, + "grad_norm": 0.59765625, + "learning_rate": 0.00018956897920469743, + "loss": 0.9209, + "step": 16175 + }, + { + "epoch": 0.23, + "grad_norm": 0.61328125, + "learning_rate": 0.0001895578433137714, + "loss": 0.9624, + "step": 16180 + }, + { + "epoch": 0.23, + "grad_norm": 0.6171875, + "learning_rate": 0.00018954670180925783, + "loss": 1.0426, + "step": 16185 + }, + { + "epoch": 0.23, + "grad_norm": 0.640625, + "learning_rate": 0.000189535554691855, + "loss": 0.9737, + "step": 16190 + }, + { + "epoch": 0.23, + "grad_norm": 0.59375, + "learning_rate": 0.0001895244019622616, + "loss": 1.0148, + "step": 16195 + }, + { + "epoch": 0.23, + "grad_norm": 0.5703125, + "learning_rate": 0.00018951324362117674, + "loss": 0.9849, + "step": 16200 + }, + { + "epoch": 0.23, + "grad_norm": 0.5625, + "learning_rate": 0.0001895020796692998, + "loss": 0.985, + "step": 16205 + }, + { + "epoch": 0.23, + "grad_norm": 0.57421875, + "learning_rate": 0.0001894909101073306, + "loss": 0.7954, + "step": 16210 + }, + { + "epoch": 0.23, + "grad_norm": 0.60546875, + "learning_rate": 0.0001894797349359692, + "loss": 0.9957, + "step": 16215 + }, + { + "epoch": 0.23, + "grad_norm": 0.63671875, + "learning_rate": 0.00018946855415591615, + "loss": 1.074, + "step": 16220 + }, + { + "epoch": 0.23, + "grad_norm": 0.5234375, + "learning_rate": 0.0001894573677678722, + "loss": 0.9898, + "step": 16225 + }, + { + "epoch": 0.23, + "grad_norm": 0.5625, + "learning_rate": 0.00018944617577253855, + "loss": 0.8775, + "step": 16230 + }, + { + "epoch": 0.23, + "grad_norm": 0.640625, + "learning_rate": 0.00018943497817061676, + "loss": 1.0057, + "step": 16235 + }, + { + "epoch": 0.23, + "grad_norm": 0.68359375, + "learning_rate": 0.00018942377496280867, + "loss": 0.8942, + "step": 16240 + }, + { + "epoch": 0.23, + "grad_norm": 0.61328125, + "learning_rate": 0.0001894125661498165, + "loss": 1.0112, + "step": 16245 + }, + { + "epoch": 0.23, + "grad_norm": 0.478515625, + "learning_rate": 0.00018940135173234286, + "loss": 0.7383, + "step": 16250 + }, + { + "epoch": 0.23, + "grad_norm": 0.63671875, + "learning_rate": 0.00018939013171109068, + "loss": 0.965, + "step": 16255 + }, + { + "epoch": 0.23, + "grad_norm": 0.59765625, + "learning_rate": 0.00018937890608676324, + "loss": 0.8931, + "step": 16260 + }, + { + "epoch": 0.23, + "grad_norm": 0.5, + "learning_rate": 0.00018936767486006417, + "loss": 0.9322, + "step": 16265 + }, + { + "epoch": 0.23, + "grad_norm": 0.60546875, + "learning_rate": 0.00018935643803169746, + "loss": 1.1366, + "step": 16270 + }, + { + "epoch": 0.23, + "grad_norm": 0.57421875, + "learning_rate": 0.00018934519560236744, + "loss": 0.8809, + "step": 16275 + }, + { + "epoch": 0.23, + "grad_norm": 0.484375, + "learning_rate": 0.0001893339475727788, + "loss": 0.9132, + "step": 16280 + }, + { + "epoch": 0.23, + "grad_norm": 0.5703125, + "learning_rate": 0.0001893226939436366, + "loss": 0.8929, + "step": 16285 + }, + { + "epoch": 0.23, + "grad_norm": 0.54296875, + "learning_rate": 0.00018931143471564622, + "loss": 0.919, + "step": 16290 + }, + { + "epoch": 0.23, + "grad_norm": 0.52734375, + "learning_rate": 0.00018930016988951334, + "loss": 0.8347, + "step": 16295 + }, + { + "epoch": 0.23, + "grad_norm": 0.51953125, + "learning_rate": 0.00018928889946594416, + "loss": 0.8911, + "step": 16300 + }, + { + "epoch": 0.23, + "grad_norm": 0.55859375, + "learning_rate": 0.00018927762344564503, + "loss": 1.0486, + "step": 16305 + }, + { + "epoch": 0.23, + "grad_norm": 0.59375, + "learning_rate": 0.00018926634182932282, + "loss": 0.9989, + "step": 16310 + }, + { + "epoch": 0.23, + "grad_norm": 0.65234375, + "learning_rate": 0.00018925505461768466, + "loss": 0.949, + "step": 16315 + }, + { + "epoch": 0.23, + "grad_norm": 0.6328125, + "learning_rate": 0.00018924376181143798, + "loss": 1.0124, + "step": 16320 + }, + { + "epoch": 0.23, + "grad_norm": 0.61328125, + "learning_rate": 0.00018923246341129066, + "loss": 0.9844, + "step": 16325 + }, + { + "epoch": 0.23, + "grad_norm": 0.54296875, + "learning_rate": 0.00018922115941795092, + "loss": 0.9543, + "step": 16330 + }, + { + "epoch": 0.23, + "grad_norm": 0.55078125, + "learning_rate": 0.0001892098498321273, + "loss": 1.0143, + "step": 16335 + }, + { + "epoch": 0.23, + "grad_norm": 0.4921875, + "learning_rate": 0.0001891985346545287, + "loss": 0.997, + "step": 16340 + }, + { + "epoch": 0.23, + "grad_norm": 0.55859375, + "learning_rate": 0.00018918721388586435, + "loss": 1.0011, + "step": 16345 + }, + { + "epoch": 0.23, + "grad_norm": 0.5859375, + "learning_rate": 0.00018917588752684385, + "loss": 0.9518, + "step": 16350 + }, + { + "epoch": 0.23, + "grad_norm": 0.5078125, + "learning_rate": 0.00018916455557817718, + "loss": 0.9175, + "step": 16355 + }, + { + "epoch": 0.23, + "grad_norm": 0.58984375, + "learning_rate": 0.0001891532180405746, + "loss": 0.9749, + "step": 16360 + }, + { + "epoch": 0.23, + "grad_norm": 0.4921875, + "learning_rate": 0.00018914187491474678, + "loss": 0.9813, + "step": 16365 + }, + { + "epoch": 0.23, + "grad_norm": 0.5078125, + "learning_rate": 0.0001891305262014047, + "loss": 0.9044, + "step": 16370 + }, + { + "epoch": 0.23, + "grad_norm": 0.56640625, + "learning_rate": 0.00018911917190125976, + "loss": 1.0183, + "step": 16375 + }, + { + "epoch": 0.23, + "grad_norm": 0.578125, + "learning_rate": 0.0001891078120150236, + "loss": 1.0059, + "step": 16380 + }, + { + "epoch": 0.24, + "grad_norm": 0.6484375, + "learning_rate": 0.0001890964465434083, + "loss": 0.9395, + "step": 16385 + }, + { + "epoch": 0.24, + "grad_norm": 0.486328125, + "learning_rate": 0.00018908507548712626, + "loss": 0.9566, + "step": 16390 + }, + { + "epoch": 0.24, + "grad_norm": 0.5703125, + "learning_rate": 0.00018907369884689024, + "loss": 0.9328, + "step": 16395 + }, + { + "epoch": 0.24, + "grad_norm": 0.7578125, + "learning_rate": 0.00018906231662341332, + "loss": 1.021, + "step": 16400 + }, + { + "epoch": 0.24, + "grad_norm": 0.5625, + "learning_rate": 0.00018905092881740899, + "loss": 0.9493, + "step": 16405 + }, + { + "epoch": 0.24, + "grad_norm": 0.6171875, + "learning_rate": 0.00018903953542959097, + "loss": 0.9246, + "step": 16410 + }, + { + "epoch": 0.24, + "grad_norm": 0.6328125, + "learning_rate": 0.0001890281364606735, + "loss": 1.0011, + "step": 16415 + }, + { + "epoch": 0.24, + "grad_norm": 0.58984375, + "learning_rate": 0.00018901673191137102, + "loss": 0.9221, + "step": 16420 + }, + { + "epoch": 0.24, + "grad_norm": 0.62890625, + "learning_rate": 0.0001890053217823984, + "loss": 0.9183, + "step": 16425 + }, + { + "epoch": 0.24, + "grad_norm": 0.55078125, + "learning_rate": 0.00018899390607447086, + "loss": 1.0647, + "step": 16430 + }, + { + "epoch": 0.24, + "grad_norm": 0.54296875, + "learning_rate": 0.0001889824847883039, + "loss": 1.0372, + "step": 16435 + }, + { + "epoch": 0.24, + "grad_norm": 0.578125, + "learning_rate": 0.0001889710579246135, + "loss": 0.9747, + "step": 16440 + }, + { + "epoch": 0.24, + "grad_norm": 0.625, + "learning_rate": 0.00018895962548411583, + "loss": 0.9561, + "step": 16445 + }, + { + "epoch": 0.24, + "grad_norm": 0.6171875, + "learning_rate": 0.0001889481874675275, + "loss": 0.9192, + "step": 16450 + }, + { + "epoch": 0.24, + "grad_norm": 0.5859375, + "learning_rate": 0.0001889367438755655, + "loss": 0.9859, + "step": 16455 + }, + { + "epoch": 0.24, + "grad_norm": 0.703125, + "learning_rate": 0.00018892529470894713, + "loss": 1.0249, + "step": 16460 + }, + { + "epoch": 0.24, + "grad_norm": 0.5859375, + "learning_rate": 0.00018891383996838998, + "loss": 0.9751, + "step": 16465 + }, + { + "epoch": 0.24, + "grad_norm": 0.5390625, + "learning_rate": 0.00018890237965461207, + "loss": 0.8279, + "step": 16470 + }, + { + "epoch": 0.24, + "grad_norm": 0.5546875, + "learning_rate": 0.00018889091376833177, + "loss": 1.1476, + "step": 16475 + }, + { + "epoch": 0.24, + "grad_norm": 0.5234375, + "learning_rate": 0.00018887944231026774, + "loss": 0.9666, + "step": 16480 + }, + { + "epoch": 0.24, + "grad_norm": 0.50390625, + "learning_rate": 0.00018886796528113907, + "loss": 0.9175, + "step": 16485 + }, + { + "epoch": 0.24, + "grad_norm": 0.55078125, + "learning_rate": 0.00018885648268166509, + "loss": 0.8682, + "step": 16490 + }, + { + "epoch": 0.24, + "grad_norm": 0.5078125, + "learning_rate": 0.00018884499451256556, + "loss": 0.8868, + "step": 16495 + }, + { + "epoch": 0.24, + "grad_norm": 0.66015625, + "learning_rate": 0.00018883350077456064, + "loss": 0.9363, + "step": 16500 + }, + { + "epoch": 0.24, + "grad_norm": 0.51171875, + "learning_rate": 0.00018882200146837067, + "loss": 0.9751, + "step": 16505 + }, + { + "epoch": 0.24, + "grad_norm": 0.5078125, + "learning_rate": 0.00018881049659471652, + "loss": 1.0708, + "step": 16510 + }, + { + "epoch": 0.24, + "grad_norm": 0.57421875, + "learning_rate": 0.00018879898615431931, + "loss": 1.0396, + "step": 16515 + }, + { + "epoch": 0.24, + "grad_norm": 0.53125, + "learning_rate": 0.00018878747014790048, + "loss": 0.7451, + "step": 16520 + }, + { + "epoch": 0.24, + "grad_norm": 0.5390625, + "learning_rate": 0.00018877594857618193, + "loss": 0.9338, + "step": 16525 + }, + { + "epoch": 0.24, + "grad_norm": 0.63671875, + "learning_rate": 0.0001887644214398858, + "loss": 1.0115, + "step": 16530 + }, + { + "epoch": 0.24, + "grad_norm": 0.478515625, + "learning_rate": 0.00018875288873973465, + "loss": 0.8998, + "step": 16535 + }, + { + "epoch": 0.24, + "grad_norm": 0.65234375, + "learning_rate": 0.00018874135047645133, + "loss": 1.0624, + "step": 16540 + }, + { + "epoch": 0.24, + "grad_norm": 0.546875, + "learning_rate": 0.0001887298066507591, + "loss": 0.9733, + "step": 16545 + }, + { + "epoch": 0.24, + "grad_norm": 0.59765625, + "learning_rate": 0.00018871825726338157, + "loss": 0.9463, + "step": 16550 + }, + { + "epoch": 0.24, + "grad_norm": 0.59765625, + "learning_rate": 0.00018870670231504257, + "loss": 1.0434, + "step": 16555 + }, + { + "epoch": 0.24, + "grad_norm": 0.59765625, + "learning_rate": 0.0001886951418064665, + "loss": 0.9086, + "step": 16560 + }, + { + "epoch": 0.24, + "grad_norm": 0.48046875, + "learning_rate": 0.00018868357573837788, + "loss": 1.0508, + "step": 16565 + }, + { + "epoch": 0.24, + "grad_norm": 0.55078125, + "learning_rate": 0.00018867200411150177, + "loss": 1.0226, + "step": 16570 + }, + { + "epoch": 0.24, + "grad_norm": 0.53515625, + "learning_rate": 0.00018866042692656344, + "loss": 0.8664, + "step": 16575 + }, + { + "epoch": 0.24, + "grad_norm": 0.6171875, + "learning_rate": 0.00018864884418428855, + "loss": 0.908, + "step": 16580 + }, + { + "epoch": 0.24, + "grad_norm": 0.68359375, + "learning_rate": 0.00018863725588540316, + "loss": 0.9478, + "step": 16585 + }, + { + "epoch": 0.24, + "grad_norm": 0.59375, + "learning_rate": 0.00018862566203063365, + "loss": 1.0568, + "step": 16590 + }, + { + "epoch": 0.24, + "grad_norm": 0.609375, + "learning_rate": 0.0001886140626207067, + "loss": 1.0956, + "step": 16595 + }, + { + "epoch": 0.24, + "grad_norm": 0.58203125, + "learning_rate": 0.00018860245765634935, + "loss": 0.9001, + "step": 16600 + }, + { + "epoch": 0.24, + "grad_norm": 0.54296875, + "learning_rate": 0.00018859084713828908, + "loss": 1.122, + "step": 16605 + }, + { + "epoch": 0.24, + "grad_norm": 0.56640625, + "learning_rate": 0.00018857923106725357, + "loss": 0.9613, + "step": 16610 + }, + { + "epoch": 0.24, + "grad_norm": 0.5078125, + "learning_rate": 0.000188567609443971, + "loss": 1.0178, + "step": 16615 + }, + { + "epoch": 0.24, + "grad_norm": 0.69921875, + "learning_rate": 0.0001885559822691698, + "loss": 1.1274, + "step": 16620 + }, + { + "epoch": 0.24, + "grad_norm": 0.61328125, + "learning_rate": 0.00018854434954357875, + "loss": 0.9865, + "step": 16625 + }, + { + "epoch": 0.24, + "grad_norm": 0.546875, + "learning_rate": 0.00018853271126792706, + "loss": 0.9709, + "step": 16630 + }, + { + "epoch": 0.24, + "grad_norm": 0.578125, + "learning_rate": 0.0001885210674429442, + "loss": 0.9059, + "step": 16635 + }, + { + "epoch": 0.24, + "grad_norm": 0.58984375, + "learning_rate": 0.00018850941806936004, + "loss": 0.7779, + "step": 16640 + }, + { + "epoch": 0.24, + "grad_norm": 0.5625, + "learning_rate": 0.0001884977631479047, + "loss": 0.9693, + "step": 16645 + }, + { + "epoch": 0.24, + "grad_norm": 0.6484375, + "learning_rate": 0.00018848610267930877, + "loss": 0.9123, + "step": 16650 + }, + { + "epoch": 0.24, + "grad_norm": 0.5859375, + "learning_rate": 0.00018847443666430316, + "loss": 1.0178, + "step": 16655 + }, + { + "epoch": 0.24, + "grad_norm": 0.5703125, + "learning_rate": 0.0001884627651036191, + "loss": 0.8877, + "step": 16660 + }, + { + "epoch": 0.24, + "grad_norm": 0.5859375, + "learning_rate": 0.0001884510879979882, + "loss": 1.0474, + "step": 16665 + }, + { + "epoch": 0.24, + "grad_norm": 0.53125, + "learning_rate": 0.00018843940534814233, + "loss": 0.9245, + "step": 16670 + }, + { + "epoch": 0.24, + "grad_norm": 0.59765625, + "learning_rate": 0.00018842771715481382, + "loss": 0.9171, + "step": 16675 + }, + { + "epoch": 0.24, + "grad_norm": 0.494140625, + "learning_rate": 0.00018841602341873527, + "loss": 0.8864, + "step": 16680 + }, + { + "epoch": 0.24, + "grad_norm": 0.6328125, + "learning_rate": 0.00018840432414063974, + "loss": 0.922, + "step": 16685 + }, + { + "epoch": 0.24, + "grad_norm": 0.52734375, + "learning_rate": 0.00018839261932126044, + "loss": 0.8619, + "step": 16690 + }, + { + "epoch": 0.24, + "grad_norm": 0.54296875, + "learning_rate": 0.00018838090896133107, + "loss": 0.9576, + "step": 16695 + }, + { + "epoch": 0.24, + "grad_norm": 0.6328125, + "learning_rate": 0.00018836919306158568, + "loss": 1.0071, + "step": 16700 + }, + { + "epoch": 0.24, + "grad_norm": 0.67578125, + "learning_rate": 0.00018835747162275864, + "loss": 1.0803, + "step": 16705 + }, + { + "epoch": 0.24, + "grad_norm": 0.5390625, + "learning_rate": 0.00018834574464558464, + "loss": 0.8814, + "step": 16710 + }, + { + "epoch": 0.24, + "grad_norm": 0.5546875, + "learning_rate": 0.00018833401213079877, + "loss": 0.938, + "step": 16715 + }, + { + "epoch": 0.24, + "grad_norm": 0.5859375, + "learning_rate": 0.00018832227407913638, + "loss": 1.0113, + "step": 16720 + }, + { + "epoch": 0.24, + "grad_norm": 0.59765625, + "learning_rate": 0.00018831053049133328, + "loss": 0.9646, + "step": 16725 + }, + { + "epoch": 0.24, + "grad_norm": 0.58203125, + "learning_rate": 0.00018829878136812558, + "loss": 1.1012, + "step": 16730 + }, + { + "epoch": 0.24, + "grad_norm": 0.57421875, + "learning_rate": 0.00018828702671024966, + "loss": 1.0641, + "step": 16735 + }, + { + "epoch": 0.24, + "grad_norm": 0.6328125, + "learning_rate": 0.00018827526651844236, + "loss": 0.8878, + "step": 16740 + }, + { + "epoch": 0.24, + "grad_norm": 0.58984375, + "learning_rate": 0.00018826350079344085, + "loss": 0.9392, + "step": 16745 + }, + { + "epoch": 0.24, + "grad_norm": 0.5625, + "learning_rate": 0.00018825172953598252, + "loss": 0.9466, + "step": 16750 + }, + { + "epoch": 0.24, + "grad_norm": 0.5078125, + "learning_rate": 0.00018823995274680532, + "loss": 0.9176, + "step": 16755 + }, + { + "epoch": 0.24, + "grad_norm": 0.6328125, + "learning_rate": 0.0001882281704266474, + "loss": 0.8922, + "step": 16760 + }, + { + "epoch": 0.24, + "grad_norm": 0.54296875, + "learning_rate": 0.00018821638257624724, + "loss": 0.95, + "step": 16765 + }, + { + "epoch": 0.24, + "grad_norm": 0.62109375, + "learning_rate": 0.00018820458919634376, + "loss": 0.9833, + "step": 16770 + }, + { + "epoch": 0.24, + "grad_norm": 0.5625, + "learning_rate": 0.00018819279028767618, + "loss": 0.9822, + "step": 16775 + }, + { + "epoch": 0.24, + "grad_norm": 0.609375, + "learning_rate": 0.00018818098585098405, + "loss": 1.0689, + "step": 16780 + }, + { + "epoch": 0.24, + "grad_norm": 0.58984375, + "learning_rate": 0.00018816917588700732, + "loss": 0.8965, + "step": 16785 + }, + { + "epoch": 0.24, + "grad_norm": 0.56640625, + "learning_rate": 0.0001881573603964862, + "loss": 0.9886, + "step": 16790 + }, + { + "epoch": 0.24, + "grad_norm": 0.625, + "learning_rate": 0.00018814553938016135, + "loss": 1.0189, + "step": 16795 + }, + { + "epoch": 0.24, + "grad_norm": 0.60546875, + "learning_rate": 0.00018813371283877368, + "loss": 1.0355, + "step": 16800 + }, + { + "epoch": 0.24, + "grad_norm": 0.53125, + "learning_rate": 0.0001881218807730645, + "loss": 0.9595, + "step": 16805 + }, + { + "epoch": 0.24, + "grad_norm": 0.59375, + "learning_rate": 0.00018811004318377552, + "loss": 0.9699, + "step": 16810 + }, + { + "epoch": 0.24, + "grad_norm": 0.5625, + "learning_rate": 0.00018809820007164863, + "loss": 0.9936, + "step": 16815 + }, + { + "epoch": 0.24, + "grad_norm": 0.5234375, + "learning_rate": 0.00018808635143742627, + "loss": 0.9886, + "step": 16820 + }, + { + "epoch": 0.24, + "grad_norm": 0.59375, + "learning_rate": 0.00018807449728185108, + "loss": 0.9061, + "step": 16825 + }, + { + "epoch": 0.24, + "grad_norm": 0.6640625, + "learning_rate": 0.00018806263760566606, + "loss": 0.9264, + "step": 16830 + }, + { + "epoch": 0.24, + "grad_norm": 0.57421875, + "learning_rate": 0.00018805077240961464, + "loss": 1.0603, + "step": 16835 + }, + { + "epoch": 0.24, + "grad_norm": 0.51171875, + "learning_rate": 0.0001880389016944405, + "loss": 0.8612, + "step": 16840 + }, + { + "epoch": 0.24, + "grad_norm": 0.5390625, + "learning_rate": 0.0001880270254608878, + "loss": 0.983, + "step": 16845 + }, + { + "epoch": 0.24, + "grad_norm": 0.640625, + "learning_rate": 0.0001880151437097008, + "loss": 0.9251, + "step": 16850 + }, + { + "epoch": 0.24, + "grad_norm": 0.58203125, + "learning_rate": 0.00018800325644162443, + "loss": 1.1613, + "step": 16855 + }, + { + "epoch": 0.24, + "grad_norm": 0.57421875, + "learning_rate": 0.00018799136365740368, + "loss": 0.9665, + "step": 16860 + }, + { + "epoch": 0.24, + "grad_norm": 0.5703125, + "learning_rate": 0.00018797946535778403, + "loss": 1.066, + "step": 16865 + }, + { + "epoch": 0.24, + "grad_norm": 0.5390625, + "learning_rate": 0.00018796756154351133, + "loss": 0.9028, + "step": 16870 + }, + { + "epoch": 0.24, + "grad_norm": 0.5234375, + "learning_rate": 0.00018795565221533167, + "loss": 0.9527, + "step": 16875 + }, + { + "epoch": 0.24, + "grad_norm": 0.6328125, + "learning_rate": 0.00018794373737399152, + "loss": 1.0282, + "step": 16880 + }, + { + "epoch": 0.24, + "grad_norm": 0.51953125, + "learning_rate": 0.0001879318170202378, + "loss": 0.8708, + "step": 16885 + }, + { + "epoch": 0.24, + "grad_norm": 0.54296875, + "learning_rate": 0.00018791989115481762, + "loss": 0.9773, + "step": 16890 + }, + { + "epoch": 0.24, + "grad_norm": 0.51171875, + "learning_rate": 0.00018790795977847852, + "loss": 0.9057, + "step": 16895 + }, + { + "epoch": 0.24, + "grad_norm": 0.56640625, + "learning_rate": 0.00018789602289196838, + "loss": 1.0618, + "step": 16900 + }, + { + "epoch": 0.24, + "grad_norm": 0.5703125, + "learning_rate": 0.0001878840804960355, + "loss": 0.8619, + "step": 16905 + }, + { + "epoch": 0.24, + "grad_norm": 0.55859375, + "learning_rate": 0.00018787213259142827, + "loss": 0.9153, + "step": 16910 + }, + { + "epoch": 0.24, + "grad_norm": 0.578125, + "learning_rate": 0.0001878601791788957, + "loss": 1.0297, + "step": 16915 + }, + { + "epoch": 0.24, + "grad_norm": 0.578125, + "learning_rate": 0.00018784822025918706, + "loss": 0.9308, + "step": 16920 + }, + { + "epoch": 0.24, + "grad_norm": 0.6796875, + "learning_rate": 0.00018783625583305194, + "loss": 1.0167, + "step": 16925 + }, + { + "epoch": 0.24, + "grad_norm": 0.66796875, + "learning_rate": 0.00018782428590124025, + "loss": 0.984, + "step": 16930 + }, + { + "epoch": 0.24, + "grad_norm": 0.61328125, + "learning_rate": 0.0001878123104645023, + "loss": 0.9254, + "step": 16935 + }, + { + "epoch": 0.24, + "grad_norm": 0.66796875, + "learning_rate": 0.0001878003295235887, + "loss": 1.0324, + "step": 16940 + }, + { + "epoch": 0.24, + "grad_norm": 0.5625, + "learning_rate": 0.00018778834307925046, + "loss": 1.0301, + "step": 16945 + }, + { + "epoch": 0.24, + "grad_norm": 0.490234375, + "learning_rate": 0.00018777635113223888, + "loss": 0.8193, + "step": 16950 + }, + { + "epoch": 0.24, + "grad_norm": 0.65234375, + "learning_rate": 0.00018776435368330567, + "loss": 1.0801, + "step": 16955 + }, + { + "epoch": 0.24, + "grad_norm": 0.515625, + "learning_rate": 0.0001877523507332028, + "loss": 0.9553, + "step": 16960 + }, + { + "epoch": 0.24, + "grad_norm": 0.5625, + "learning_rate": 0.00018774034228268265, + "loss": 0.8491, + "step": 16965 + }, + { + "epoch": 0.24, + "grad_norm": 0.5859375, + "learning_rate": 0.00018772832833249792, + "loss": 0.9802, + "step": 16970 + }, + { + "epoch": 0.24, + "grad_norm": 0.6328125, + "learning_rate": 0.00018771630888340165, + "loss": 0.9014, + "step": 16975 + }, + { + "epoch": 0.24, + "grad_norm": 0.5546875, + "learning_rate": 0.00018770428393614724, + "loss": 0.9266, + "step": 16980 + }, + { + "epoch": 0.24, + "grad_norm": 0.59375, + "learning_rate": 0.00018769225349148842, + "loss": 1.0798, + "step": 16985 + }, + { + "epoch": 0.24, + "grad_norm": 0.54296875, + "learning_rate": 0.00018768021755017928, + "loss": 0.9488, + "step": 16990 + }, + { + "epoch": 0.24, + "grad_norm": 0.5859375, + "learning_rate": 0.00018766817611297424, + "loss": 0.9043, + "step": 16995 + }, + { + "epoch": 0.24, + "grad_norm": 0.66015625, + "learning_rate": 0.0001876561291806281, + "loss": 1.104, + "step": 17000 + }, + { + "epoch": 0.24, + "grad_norm": 0.60546875, + "learning_rate": 0.00018764407675389593, + "loss": 1.1461, + "step": 17005 + }, + { + "epoch": 0.24, + "grad_norm": 0.56640625, + "learning_rate": 0.00018763201883353323, + "loss": 1.035, + "step": 17010 + }, + { + "epoch": 0.24, + "grad_norm": 0.53515625, + "learning_rate": 0.00018761995542029576, + "loss": 0.8472, + "step": 17015 + }, + { + "epoch": 0.24, + "grad_norm": 0.57421875, + "learning_rate": 0.00018760788651493973, + "loss": 0.9361, + "step": 17020 + }, + { + "epoch": 0.24, + "grad_norm": 0.5546875, + "learning_rate": 0.00018759581211822156, + "loss": 1.0741, + "step": 17025 + }, + { + "epoch": 0.24, + "grad_norm": 0.59765625, + "learning_rate": 0.0001875837322308981, + "loss": 1.028, + "step": 17030 + }, + { + "epoch": 0.24, + "grad_norm": 0.515625, + "learning_rate": 0.00018757164685372663, + "loss": 1.028, + "step": 17035 + }, + { + "epoch": 0.24, + "grad_norm": 0.57421875, + "learning_rate": 0.00018755955598746455, + "loss": 1.055, + "step": 17040 + }, + { + "epoch": 0.24, + "grad_norm": 0.52734375, + "learning_rate": 0.00018754745963286978, + "loss": 0.8644, + "step": 17045 + }, + { + "epoch": 0.24, + "grad_norm": 0.609375, + "learning_rate": 0.00018753535779070056, + "loss": 0.9841, + "step": 17050 + }, + { + "epoch": 0.24, + "grad_norm": 0.5625, + "learning_rate": 0.00018752325046171537, + "loss": 0.908, + "step": 17055 + }, + { + "epoch": 0.24, + "grad_norm": 0.6015625, + "learning_rate": 0.0001875111376466732, + "loss": 1.0285, + "step": 17060 + }, + { + "epoch": 0.24, + "grad_norm": 0.55859375, + "learning_rate": 0.00018749901934633325, + "loss": 0.9899, + "step": 17065 + }, + { + "epoch": 0.24, + "grad_norm": 0.625, + "learning_rate": 0.0001874868955614551, + "loss": 1.1264, + "step": 17070 + }, + { + "epoch": 0.24, + "grad_norm": 0.59375, + "learning_rate": 0.0001874747662927987, + "loss": 1.0565, + "step": 17075 + }, + { + "epoch": 0.25, + "grad_norm": 0.6015625, + "learning_rate": 0.00018746263154112433, + "loss": 0.9554, + "step": 17080 + }, + { + "epoch": 0.25, + "grad_norm": 0.54296875, + "learning_rate": 0.0001874504913071926, + "loss": 1.0431, + "step": 17085 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.0001874383455917645, + "loss": 0.9707, + "step": 17090 + }, + { + "epoch": 0.25, + "grad_norm": 0.60546875, + "learning_rate": 0.00018742619439560126, + "loss": 1.0394, + "step": 17095 + }, + { + "epoch": 0.25, + "grad_norm": 0.515625, + "learning_rate": 0.00018741403771946463, + "loss": 0.9724, + "step": 17100 + }, + { + "epoch": 0.25, + "grad_norm": 0.5546875, + "learning_rate": 0.00018740187556411653, + "loss": 1.0729, + "step": 17105 + }, + { + "epoch": 0.25, + "grad_norm": 0.56640625, + "learning_rate": 0.0001873897079303194, + "loss": 0.9667, + "step": 17110 + }, + { + "epoch": 0.25, + "grad_norm": 0.5703125, + "learning_rate": 0.00018737753481883575, + "loss": 1.1016, + "step": 17115 + }, + { + "epoch": 0.25, + "grad_norm": 0.6015625, + "learning_rate": 0.00018736535623042877, + "loss": 0.9476, + "step": 17120 + }, + { + "epoch": 0.25, + "grad_norm": 0.55859375, + "learning_rate": 0.00018735317216586172, + "loss": 1.0506, + "step": 17125 + }, + { + "epoch": 0.25, + "grad_norm": 0.640625, + "learning_rate": 0.00018734098262589835, + "loss": 0.9753, + "step": 17130 + }, + { + "epoch": 0.25, + "grad_norm": 0.609375, + "learning_rate": 0.00018732878761130274, + "loss": 0.9667, + "step": 17135 + }, + { + "epoch": 0.25, + "grad_norm": 0.494140625, + "learning_rate": 0.00018731658712283929, + "loss": 0.9733, + "step": 17140 + }, + { + "epoch": 0.25, + "grad_norm": 0.6015625, + "learning_rate": 0.00018730438116127266, + "loss": 0.9274, + "step": 17145 + }, + { + "epoch": 0.25, + "grad_norm": 0.56640625, + "learning_rate": 0.000187292169727368, + "loss": 1.0296, + "step": 17150 + }, + { + "epoch": 0.25, + "grad_norm": 0.55859375, + "learning_rate": 0.00018727995282189074, + "loss": 0.7776, + "step": 17155 + }, + { + "epoch": 0.25, + "grad_norm": 0.48046875, + "learning_rate": 0.00018726773044560664, + "loss": 0.8341, + "step": 17160 + }, + { + "epoch": 0.25, + "grad_norm": 0.58984375, + "learning_rate": 0.0001872555025992818, + "loss": 0.7679, + "step": 17165 + }, + { + "epoch": 0.25, + "grad_norm": 0.53515625, + "learning_rate": 0.00018724326928368267, + "loss": 0.9864, + "step": 17170 + }, + { + "epoch": 0.25, + "grad_norm": 0.62890625, + "learning_rate": 0.00018723103049957606, + "loss": 1.0016, + "step": 17175 + }, + { + "epoch": 0.25, + "grad_norm": 0.625, + "learning_rate": 0.00018721878624772912, + "loss": 0.958, + "step": 17180 + }, + { + "epoch": 0.25, + "grad_norm": 0.55078125, + "learning_rate": 0.00018720653652890934, + "loss": 0.8523, + "step": 17185 + }, + { + "epoch": 0.25, + "grad_norm": 0.57421875, + "learning_rate": 0.0001871942813438845, + "loss": 0.8542, + "step": 17190 + }, + { + "epoch": 0.25, + "grad_norm": 0.54296875, + "learning_rate": 0.00018718202069342282, + "loss": 0.8759, + "step": 17195 + }, + { + "epoch": 0.25, + "grad_norm": 0.546875, + "learning_rate": 0.00018716975457829275, + "loss": 0.9566, + "step": 17200 + }, + { + "epoch": 0.25, + "grad_norm": 0.65625, + "learning_rate": 0.00018715748299926322, + "loss": 1.0393, + "step": 17205 + }, + { + "epoch": 0.25, + "grad_norm": 0.59375, + "learning_rate": 0.0001871452059571034, + "loss": 1.0312, + "step": 17210 + }, + { + "epoch": 0.25, + "grad_norm": 0.48046875, + "learning_rate": 0.0001871329234525828, + "loss": 1.0642, + "step": 17215 + }, + { + "epoch": 0.25, + "grad_norm": 0.69921875, + "learning_rate": 0.00018712063548647132, + "loss": 1.0684, + "step": 17220 + }, + { + "epoch": 0.25, + "grad_norm": 0.61328125, + "learning_rate": 0.0001871083420595392, + "loss": 0.9215, + "step": 17225 + }, + { + "epoch": 0.25, + "grad_norm": 0.62109375, + "learning_rate": 0.00018709604317255699, + "loss": 0.9932, + "step": 17230 + }, + { + "epoch": 0.25, + "grad_norm": 0.5234375, + "learning_rate": 0.0001870837388262956, + "loss": 0.8693, + "step": 17235 + }, + { + "epoch": 0.25, + "grad_norm": 0.58203125, + "learning_rate": 0.00018707142902152626, + "loss": 0.9005, + "step": 17240 + }, + { + "epoch": 0.25, + "grad_norm": 0.65234375, + "learning_rate": 0.00018705911375902062, + "loss": 1.029, + "step": 17245 + }, + { + "epoch": 0.25, + "grad_norm": 0.5546875, + "learning_rate": 0.00018704679303955054, + "loss": 1.0987, + "step": 17250 + }, + { + "epoch": 0.25, + "grad_norm": 0.52734375, + "learning_rate": 0.00018703446686388838, + "loss": 0.9246, + "step": 17255 + }, + { + "epoch": 0.25, + "grad_norm": 0.8203125, + "learning_rate": 0.00018702213523280668, + "loss": 0.9038, + "step": 17260 + }, + { + "epoch": 0.25, + "grad_norm": 0.546875, + "learning_rate": 0.00018700979814707843, + "loss": 1.0012, + "step": 17265 + }, + { + "epoch": 0.25, + "grad_norm": 0.498046875, + "learning_rate": 0.00018699745560747696, + "loss": 0.8978, + "step": 17270 + }, + { + "epoch": 0.25, + "grad_norm": 0.6328125, + "learning_rate": 0.00018698510761477587, + "loss": 0.9811, + "step": 17275 + }, + { + "epoch": 0.25, + "grad_norm": 0.640625, + "learning_rate": 0.00018697275416974915, + "loss": 0.9977, + "step": 17280 + }, + { + "epoch": 0.25, + "grad_norm": 0.609375, + "learning_rate": 0.00018696039527317117, + "loss": 0.9776, + "step": 17285 + }, + { + "epoch": 0.25, + "grad_norm": 0.703125, + "learning_rate": 0.00018694803092581655, + "loss": 1.0117, + "step": 17290 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.00018693566112846038, + "loss": 0.9689, + "step": 17295 + }, + { + "epoch": 0.25, + "grad_norm": 0.5234375, + "learning_rate": 0.0001869232858818779, + "loss": 0.8599, + "step": 17300 + }, + { + "epoch": 0.25, + "grad_norm": 0.62890625, + "learning_rate": 0.0001869109051868449, + "loss": 0.9914, + "step": 17305 + }, + { + "epoch": 0.25, + "grad_norm": 0.51953125, + "learning_rate": 0.00018689851904413738, + "loss": 1.0214, + "step": 17310 + }, + { + "epoch": 0.25, + "grad_norm": 0.5703125, + "learning_rate": 0.00018688612745453172, + "loss": 0.9126, + "step": 17315 + }, + { + "epoch": 0.25, + "grad_norm": 0.55859375, + "learning_rate": 0.0001868737304188046, + "loss": 0.972, + "step": 17320 + }, + { + "epoch": 0.25, + "grad_norm": 0.51171875, + "learning_rate": 0.00018686132793773315, + "loss": 0.9786, + "step": 17325 + }, + { + "epoch": 0.25, + "grad_norm": 0.5234375, + "learning_rate": 0.00018684892001209473, + "loss": 0.8262, + "step": 17330 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.00018683650664266707, + "loss": 0.9764, + "step": 17335 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.00018682408783022832, + "loss": 1.0312, + "step": 17340 + }, + { + "epoch": 0.25, + "grad_norm": 0.62109375, + "learning_rate": 0.0001868116635755568, + "loss": 0.8887, + "step": 17345 + }, + { + "epoch": 0.25, + "grad_norm": 0.5234375, + "learning_rate": 0.0001867992338794314, + "loss": 0.9327, + "step": 17350 + }, + { + "epoch": 0.25, + "grad_norm": 0.6640625, + "learning_rate": 0.00018678679874263113, + "loss": 1.0066, + "step": 17355 + }, + { + "epoch": 0.25, + "grad_norm": 0.5390625, + "learning_rate": 0.00018677435816593548, + "loss": 1.0451, + "step": 17360 + }, + { + "epoch": 0.25, + "grad_norm": 0.48828125, + "learning_rate": 0.00018676191215012423, + "loss": 0.8877, + "step": 17365 + }, + { + "epoch": 0.25, + "grad_norm": 0.5234375, + "learning_rate": 0.0001867494606959775, + "loss": 0.951, + "step": 17370 + }, + { + "epoch": 0.25, + "grad_norm": 0.55078125, + "learning_rate": 0.00018673700380427582, + "loss": 1.0618, + "step": 17375 + }, + { + "epoch": 0.25, + "grad_norm": 0.5703125, + "learning_rate": 0.00018672454147579992, + "loss": 1.0022, + "step": 17380 + }, + { + "epoch": 0.25, + "grad_norm": 0.55859375, + "learning_rate": 0.00018671207371133097, + "loss": 0.85, + "step": 17385 + }, + { + "epoch": 0.25, + "grad_norm": 0.50390625, + "learning_rate": 0.00018669960051165052, + "loss": 0.8242, + "step": 17390 + }, + { + "epoch": 0.25, + "grad_norm": 0.53515625, + "learning_rate": 0.00018668712187754034, + "loss": 0.9412, + "step": 17395 + }, + { + "epoch": 0.25, + "grad_norm": 0.5859375, + "learning_rate": 0.00018667463780978265, + "loss": 0.9668, + "step": 17400 + }, + { + "epoch": 0.25, + "grad_norm": 0.4765625, + "learning_rate": 0.00018666214830915997, + "loss": 0.9269, + "step": 17405 + }, + { + "epoch": 0.25, + "grad_norm": 0.71875, + "learning_rate": 0.0001866496533764551, + "loss": 1.0226, + "step": 17410 + }, + { + "epoch": 0.25, + "grad_norm": 0.546875, + "learning_rate": 0.00018663715301245128, + "loss": 0.9235, + "step": 17415 + }, + { + "epoch": 0.25, + "grad_norm": 0.5546875, + "learning_rate": 0.00018662464721793203, + "loss": 1.1844, + "step": 17420 + }, + { + "epoch": 0.25, + "grad_norm": 0.65234375, + "learning_rate": 0.00018661213599368125, + "loss": 1.0601, + "step": 17425 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.00018659961934048313, + "loss": 1.0451, + "step": 17430 + }, + { + "epoch": 0.25, + "grad_norm": 0.62890625, + "learning_rate": 0.00018658709725912225, + "loss": 0.9832, + "step": 17435 + }, + { + "epoch": 0.25, + "grad_norm": 0.56640625, + "learning_rate": 0.0001865745697503835, + "loss": 1.0443, + "step": 17440 + }, + { + "epoch": 0.25, + "grad_norm": 0.66796875, + "learning_rate": 0.0001865620368150521, + "loss": 0.8875, + "step": 17445 + }, + { + "epoch": 0.25, + "grad_norm": 0.58203125, + "learning_rate": 0.00018654949845391366, + "loss": 0.8692, + "step": 17450 + }, + { + "epoch": 0.25, + "grad_norm": 0.59765625, + "learning_rate": 0.0001865369546677541, + "loss": 1.0209, + "step": 17455 + }, + { + "epoch": 0.25, + "grad_norm": 0.416015625, + "learning_rate": 0.00018652440545735964, + "loss": 0.7376, + "step": 17460 + }, + { + "epoch": 0.25, + "grad_norm": 0.494140625, + "learning_rate": 0.0001865118508235169, + "loss": 0.8804, + "step": 17465 + }, + { + "epoch": 0.25, + "grad_norm": 0.62109375, + "learning_rate": 0.00018649929076701283, + "loss": 1.0329, + "step": 17470 + }, + { + "epoch": 0.25, + "grad_norm": 0.66796875, + "learning_rate": 0.0001864867252886347, + "loss": 0.9949, + "step": 17475 + }, + { + "epoch": 0.25, + "grad_norm": 0.51953125, + "learning_rate": 0.00018647415438917015, + "loss": 0.9137, + "step": 17480 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.00018646157806940708, + "loss": 0.9676, + "step": 17485 + }, + { + "epoch": 0.25, + "grad_norm": 0.546875, + "learning_rate": 0.00018644899633013388, + "loss": 1.0912, + "step": 17490 + }, + { + "epoch": 0.25, + "grad_norm": 0.5625, + "learning_rate": 0.00018643640917213907, + "loss": 0.9975, + "step": 17495 + }, + { + "epoch": 0.25, + "grad_norm": 0.55078125, + "learning_rate": 0.00018642381659621173, + "loss": 0.932, + "step": 17500 + }, + { + "epoch": 0.25, + "grad_norm": 0.57421875, + "learning_rate": 0.00018641121860314114, + "loss": 1.0452, + "step": 17505 + }, + { + "epoch": 0.25, + "grad_norm": 0.5, + "learning_rate": 0.00018639861519371693, + "loss": 0.9322, + "step": 17510 + }, + { + "epoch": 0.25, + "grad_norm": 0.5859375, + "learning_rate": 0.00018638600636872914, + "loss": 0.8564, + "step": 17515 + }, + { + "epoch": 0.25, + "grad_norm": 0.5234375, + "learning_rate": 0.00018637339212896806, + "loss": 0.9579, + "step": 17520 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.0001863607724752244, + "loss": 0.9324, + "step": 17525 + }, + { + "epoch": 0.25, + "grad_norm": 0.49609375, + "learning_rate": 0.0001863481474082892, + "loss": 0.9017, + "step": 17530 + }, + { + "epoch": 0.25, + "grad_norm": 0.6796875, + "learning_rate": 0.00018633551692895373, + "loss": 0.9417, + "step": 17535 + }, + { + "epoch": 0.25, + "grad_norm": 0.8203125, + "learning_rate": 0.00018632288103800974, + "loss": 0.8638, + "step": 17540 + }, + { + "epoch": 0.25, + "grad_norm": 0.5, + "learning_rate": 0.00018631023973624927, + "loss": 0.9747, + "step": 17545 + }, + { + "epoch": 0.25, + "grad_norm": 0.63671875, + "learning_rate": 0.00018629759302446467, + "loss": 0.8661, + "step": 17550 + }, + { + "epoch": 0.25, + "grad_norm": 0.6171875, + "learning_rate": 0.00018628494090344863, + "loss": 1.106, + "step": 17555 + }, + { + "epoch": 0.25, + "grad_norm": 0.67578125, + "learning_rate": 0.00018627228337399427, + "loss": 1.0067, + "step": 17560 + }, + { + "epoch": 0.25, + "grad_norm": 0.609375, + "learning_rate": 0.0001862596204368949, + "loss": 1.1062, + "step": 17565 + }, + { + "epoch": 0.25, + "grad_norm": 0.58203125, + "learning_rate": 0.00018624695209294427, + "loss": 0.8607, + "step": 17570 + }, + { + "epoch": 0.25, + "grad_norm": 0.53515625, + "learning_rate": 0.0001862342783429365, + "loss": 1.0601, + "step": 17575 + }, + { + "epoch": 0.25, + "grad_norm": 0.54296875, + "learning_rate": 0.0001862215991876659, + "loss": 0.8794, + "step": 17580 + }, + { + "epoch": 0.25, + "grad_norm": 0.5859375, + "learning_rate": 0.00018620891462792729, + "loss": 0.9462, + "step": 17585 + }, + { + "epoch": 0.25, + "grad_norm": 0.5859375, + "learning_rate": 0.00018619622466451574, + "loss": 1.0127, + "step": 17590 + }, + { + "epoch": 0.25, + "grad_norm": 0.498046875, + "learning_rate": 0.00018618352929822662, + "loss": 1.0018, + "step": 17595 + }, + { + "epoch": 0.25, + "grad_norm": 0.54296875, + "learning_rate": 0.00018617082852985577, + "loss": 1.0369, + "step": 17600 + }, + { + "epoch": 0.25, + "grad_norm": 0.609375, + "learning_rate": 0.00018615812236019922, + "loss": 0.9432, + "step": 17605 + }, + { + "epoch": 0.25, + "grad_norm": 0.5390625, + "learning_rate": 0.00018614541079005345, + "loss": 0.8503, + "step": 17610 + }, + { + "epoch": 0.25, + "grad_norm": 0.51953125, + "learning_rate": 0.00018613269382021522, + "loss": 0.9858, + "step": 17615 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.00018611997145148165, + "loss": 0.9059, + "step": 17620 + }, + { + "epoch": 0.25, + "grad_norm": 0.609375, + "learning_rate": 0.00018610724368465015, + "loss": 1.0486, + "step": 17625 + }, + { + "epoch": 0.25, + "grad_norm": 0.60546875, + "learning_rate": 0.00018609451052051858, + "loss": 0.9702, + "step": 17630 + }, + { + "epoch": 0.25, + "grad_norm": 0.70703125, + "learning_rate": 0.00018608177195988504, + "loss": 1.2176, + "step": 17635 + }, + { + "epoch": 0.25, + "grad_norm": 0.55859375, + "learning_rate": 0.00018606902800354796, + "loss": 1.0093, + "step": 17640 + }, + { + "epoch": 0.25, + "grad_norm": 0.6171875, + "learning_rate": 0.0001860562786523062, + "loss": 0.9306, + "step": 17645 + }, + { + "epoch": 0.25, + "grad_norm": 0.6953125, + "learning_rate": 0.0001860435239069589, + "loss": 1.1033, + "step": 17650 + }, + { + "epoch": 0.25, + "grad_norm": 0.64453125, + "learning_rate": 0.00018603076376830555, + "loss": 0.9792, + "step": 17655 + }, + { + "epoch": 0.25, + "grad_norm": 0.6953125, + "learning_rate": 0.0001860179982371459, + "loss": 0.9309, + "step": 17660 + }, + { + "epoch": 0.25, + "grad_norm": 0.65625, + "learning_rate": 0.00018600522731428016, + "loss": 1.0316, + "step": 17665 + }, + { + "epoch": 0.25, + "grad_norm": 0.5859375, + "learning_rate": 0.00018599245100050883, + "loss": 1.0418, + "step": 17670 + }, + { + "epoch": 0.25, + "grad_norm": 0.57421875, + "learning_rate": 0.00018597966929663273, + "loss": 1.02, + "step": 17675 + }, + { + "epoch": 0.25, + "grad_norm": 0.546875, + "learning_rate": 0.00018596688220345303, + "loss": 0.9424, + "step": 17680 + }, + { + "epoch": 0.25, + "grad_norm": 0.59765625, + "learning_rate": 0.00018595408972177127, + "loss": 1.0338, + "step": 17685 + }, + { + "epoch": 0.25, + "grad_norm": 0.61328125, + "learning_rate": 0.00018594129185238925, + "loss": 0.9645, + "step": 17690 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.00018592848859610918, + "loss": 0.9934, + "step": 17695 + }, + { + "epoch": 0.25, + "grad_norm": 0.59375, + "learning_rate": 0.0001859156799537336, + "loss": 1.063, + "step": 17700 + }, + { + "epoch": 0.25, + "grad_norm": 0.5, + "learning_rate": 0.0001859028659260653, + "loss": 0.9458, + "step": 17705 + }, + { + "epoch": 0.25, + "grad_norm": 0.578125, + "learning_rate": 0.00018589004651390758, + "loss": 0.9579, + "step": 17710 + }, + { + "epoch": 0.25, + "grad_norm": 0.71875, + "learning_rate": 0.0001858772217180639, + "loss": 1.0698, + "step": 17715 + }, + { + "epoch": 0.25, + "grad_norm": 0.59765625, + "learning_rate": 0.0001858643915393382, + "loss": 0.9331, + "step": 17720 + }, + { + "epoch": 0.25, + "grad_norm": 0.55078125, + "learning_rate": 0.0001858515559785346, + "loss": 0.8905, + "step": 17725 + }, + { + "epoch": 0.25, + "grad_norm": 0.58203125, + "learning_rate": 0.0001858387150364577, + "loss": 0.9107, + "step": 17730 + }, + { + "epoch": 0.25, + "grad_norm": 0.5546875, + "learning_rate": 0.00018582586871391236, + "loss": 1.063, + "step": 17735 + }, + { + "epoch": 0.25, + "grad_norm": 0.5625, + "learning_rate": 0.00018581301701170387, + "loss": 1.0574, + "step": 17740 + }, + { + "epoch": 0.25, + "grad_norm": 0.6171875, + "learning_rate": 0.0001858001599306377, + "loss": 1.01, + "step": 17745 + }, + { + "epoch": 0.25, + "grad_norm": 0.6015625, + "learning_rate": 0.0001857872974715198, + "loss": 0.9275, + "step": 17750 + }, + { + "epoch": 0.25, + "grad_norm": 0.5234375, + "learning_rate": 0.0001857744296351564, + "loss": 0.9735, + "step": 17755 + }, + { + "epoch": 0.25, + "grad_norm": 0.52734375, + "learning_rate": 0.00018576155642235407, + "loss": 1.0272, + "step": 17760 + }, + { + "epoch": 0.25, + "grad_norm": 0.58203125, + "learning_rate": 0.0001857486778339197, + "loss": 1.0457, + "step": 17765 + }, + { + "epoch": 0.25, + "grad_norm": 0.609375, + "learning_rate": 0.00018573579387066053, + "loss": 0.9561, + "step": 17770 + }, + { + "epoch": 0.25, + "grad_norm": 0.59375, + "learning_rate": 0.00018572290453338416, + "loss": 0.9612, + "step": 17775 + }, + { + "epoch": 0.26, + "grad_norm": 0.57421875, + "learning_rate": 0.0001857100098228985, + "loss": 0.8423, + "step": 17780 + }, + { + "epoch": 0.26, + "grad_norm": 0.515625, + "learning_rate": 0.00018569710974001183, + "loss": 1.0384, + "step": 17785 + }, + { + "epoch": 0.26, + "grad_norm": 0.54296875, + "learning_rate": 0.00018568420428553272, + "loss": 1.1032, + "step": 17790 + }, + { + "epoch": 0.26, + "grad_norm": 0.5078125, + "learning_rate": 0.00018567129346027007, + "loss": 1.1377, + "step": 17795 + }, + { + "epoch": 0.26, + "grad_norm": 0.58203125, + "learning_rate": 0.00018565837726503318, + "loss": 0.9317, + "step": 17800 + }, + { + "epoch": 0.26, + "grad_norm": 0.53515625, + "learning_rate": 0.00018564545570063168, + "loss": 0.9791, + "step": 17805 + }, + { + "epoch": 0.26, + "grad_norm": 0.6484375, + "learning_rate": 0.00018563252876787546, + "loss": 0.9285, + "step": 17810 + }, + { + "epoch": 0.26, + "grad_norm": 0.53125, + "learning_rate": 0.00018561959646757478, + "loss": 0.8951, + "step": 17815 + }, + { + "epoch": 0.26, + "grad_norm": 0.57421875, + "learning_rate": 0.00018560665880054032, + "loss": 0.9693, + "step": 17820 + }, + { + "epoch": 0.26, + "grad_norm": 0.54296875, + "learning_rate": 0.00018559371576758296, + "loss": 0.8104, + "step": 17825 + }, + { + "epoch": 0.26, + "grad_norm": 0.5703125, + "learning_rate": 0.00018558076736951404, + "loss": 0.9264, + "step": 17830 + }, + { + "epoch": 0.26, + "grad_norm": 0.490234375, + "learning_rate": 0.00018556781360714512, + "loss": 1.1434, + "step": 17835 + }, + { + "epoch": 0.26, + "grad_norm": 0.53125, + "learning_rate": 0.0001855548544812882, + "loss": 0.9131, + "step": 17840 + }, + { + "epoch": 0.26, + "grad_norm": 0.59765625, + "learning_rate": 0.0001855418899927556, + "loss": 1.0054, + "step": 17845 + }, + { + "epoch": 0.26, + "grad_norm": 0.6171875, + "learning_rate": 0.00018552892014235986, + "loss": 0.9281, + "step": 17850 + }, + { + "epoch": 0.26, + "grad_norm": 0.625, + "learning_rate": 0.000185515944930914, + "loss": 1.033, + "step": 17855 + }, + { + "epoch": 0.26, + "grad_norm": 0.5859375, + "learning_rate": 0.00018550296435923135, + "loss": 0.9832, + "step": 17860 + }, + { + "epoch": 0.26, + "grad_norm": 0.69921875, + "learning_rate": 0.00018548997842812552, + "loss": 1.0676, + "step": 17865 + }, + { + "epoch": 0.26, + "grad_norm": 0.55859375, + "learning_rate": 0.00018547698713841047, + "loss": 0.847, + "step": 17870 + }, + { + "epoch": 0.26, + "grad_norm": 0.5859375, + "learning_rate": 0.00018546399049090048, + "loss": 0.8618, + "step": 17875 + }, + { + "epoch": 0.26, + "grad_norm": 0.59765625, + "learning_rate": 0.00018545098848641025, + "loss": 0.9026, + "step": 17880 + }, + { + "epoch": 0.26, + "grad_norm": 0.578125, + "learning_rate": 0.00018543798112575474, + "loss": 1.1252, + "step": 17885 + }, + { + "epoch": 0.26, + "grad_norm": 0.55078125, + "learning_rate": 0.0001854249684097493, + "loss": 0.8721, + "step": 17890 + }, + { + "epoch": 0.26, + "grad_norm": 0.5546875, + "learning_rate": 0.0001854119503392095, + "loss": 0.9944, + "step": 17895 + }, + { + "epoch": 0.26, + "grad_norm": 0.56640625, + "learning_rate": 0.0001853989269149514, + "loss": 0.9431, + "step": 17900 + }, + { + "epoch": 0.26, + "grad_norm": 0.62890625, + "learning_rate": 0.0001853858981377913, + "loss": 1.072, + "step": 17905 + }, + { + "epoch": 0.26, + "grad_norm": 0.62109375, + "learning_rate": 0.00018537286400854583, + "loss": 0.9626, + "step": 17910 + }, + { + "epoch": 0.26, + "grad_norm": 0.55859375, + "learning_rate": 0.00018535982452803204, + "loss": 0.9894, + "step": 17915 + }, + { + "epoch": 0.26, + "grad_norm": 0.62109375, + "learning_rate": 0.00018534677969706724, + "loss": 0.9762, + "step": 17920 + }, + { + "epoch": 0.26, + "grad_norm": 0.53515625, + "learning_rate": 0.00018533372951646908, + "loss": 0.9588, + "step": 17925 + }, + { + "epoch": 0.26, + "grad_norm": 0.52734375, + "learning_rate": 0.00018532067398705556, + "loss": 0.828, + "step": 17930 + }, + { + "epoch": 0.26, + "grad_norm": 0.625, + "learning_rate": 0.00018530761310964504, + "loss": 1.1494, + "step": 17935 + }, + { + "epoch": 0.26, + "grad_norm": 0.6875, + "learning_rate": 0.00018529454688505614, + "loss": 1.1224, + "step": 17940 + }, + { + "epoch": 0.26, + "grad_norm": 0.63671875, + "learning_rate": 0.00018528147531410793, + "loss": 0.931, + "step": 17945 + }, + { + "epoch": 0.26, + "grad_norm": 0.625, + "learning_rate": 0.00018526839839761968, + "loss": 0.9902, + "step": 17950 + }, + { + "epoch": 0.26, + "grad_norm": 0.58984375, + "learning_rate": 0.00018525531613641113, + "loss": 0.9354, + "step": 17955 + }, + { + "epoch": 0.26, + "grad_norm": 0.5859375, + "learning_rate": 0.0001852422285313023, + "loss": 0.9686, + "step": 17960 + }, + { + "epoch": 0.26, + "grad_norm": 0.6953125, + "learning_rate": 0.00018522913558311345, + "loss": 0.9746, + "step": 17965 + }, + { + "epoch": 0.26, + "grad_norm": 0.61328125, + "learning_rate": 0.00018521603729266536, + "loss": 0.9197, + "step": 17970 + }, + { + "epoch": 0.26, + "grad_norm": 0.58203125, + "learning_rate": 0.00018520293366077896, + "loss": 1.0712, + "step": 17975 + }, + { + "epoch": 0.26, + "grad_norm": 0.56640625, + "learning_rate": 0.00018518982468827567, + "loss": 0.9947, + "step": 17980 + }, + { + "epoch": 0.26, + "grad_norm": 0.5625, + "learning_rate": 0.00018517671037597712, + "loss": 0.8395, + "step": 17985 + }, + { + "epoch": 0.26, + "grad_norm": 0.6171875, + "learning_rate": 0.00018516359072470536, + "loss": 0.9635, + "step": 17990 + }, + { + "epoch": 0.26, + "grad_norm": 0.578125, + "learning_rate": 0.00018515046573528275, + "loss": 0.9441, + "step": 17995 + }, + { + "epoch": 0.26, + "grad_norm": 0.5859375, + "learning_rate": 0.00018513733540853196, + "loss": 0.9905, + "step": 18000 + }, + { + "epoch": 0.26, + "grad_norm": 0.58203125, + "learning_rate": 0.00018512419974527604, + "loss": 0.9265, + "step": 18005 + }, + { + "epoch": 0.26, + "grad_norm": 0.625, + "learning_rate": 0.00018511105874633832, + "loss": 0.9096, + "step": 18010 + }, + { + "epoch": 0.26, + "grad_norm": 0.53515625, + "learning_rate": 0.00018509791241254253, + "loss": 0.9061, + "step": 18015 + }, + { + "epoch": 0.26, + "grad_norm": 0.703125, + "learning_rate": 0.00018508476074471263, + "loss": 1.0156, + "step": 18020 + }, + { + "epoch": 0.26, + "grad_norm": 0.609375, + "learning_rate": 0.00018507160374367306, + "loss": 1.0067, + "step": 18025 + }, + { + "epoch": 0.26, + "grad_norm": 0.5390625, + "learning_rate": 0.00018505844141024843, + "loss": 0.9401, + "step": 18030 + }, + { + "epoch": 0.26, + "grad_norm": 0.6328125, + "learning_rate": 0.00018504527374526387, + "loss": 1.0436, + "step": 18035 + }, + { + "epoch": 0.26, + "grad_norm": 0.58203125, + "learning_rate": 0.00018503210074954468, + "loss": 1.0336, + "step": 18040 + }, + { + "epoch": 0.26, + "grad_norm": 0.625, + "learning_rate": 0.00018501892242391653, + "loss": 0.9606, + "step": 18045 + }, + { + "epoch": 0.26, + "grad_norm": 0.5703125, + "learning_rate": 0.00018500573876920555, + "loss": 0.9777, + "step": 18050 + }, + { + "epoch": 0.26, + "grad_norm": 0.5390625, + "learning_rate": 0.000184992549786238, + "loss": 0.9084, + "step": 18055 + }, + { + "epoch": 0.26, + "grad_norm": 0.59375, + "learning_rate": 0.00018497935547584068, + "loss": 1.0103, + "step": 18060 + }, + { + "epoch": 0.26, + "grad_norm": 0.56640625, + "learning_rate": 0.00018496615583884056, + "loss": 0.9744, + "step": 18065 + }, + { + "epoch": 0.26, + "grad_norm": 0.54296875, + "learning_rate": 0.000184952950876065, + "loss": 0.9016, + "step": 18070 + }, + { + "epoch": 0.26, + "grad_norm": 0.54296875, + "learning_rate": 0.00018493974058834178, + "loss": 0.8987, + "step": 18075 + }, + { + "epoch": 0.26, + "grad_norm": 0.55078125, + "learning_rate": 0.00018492652497649882, + "loss": 0.8471, + "step": 18080 + }, + { + "epoch": 0.26, + "grad_norm": 0.734375, + "learning_rate": 0.00018491330404136458, + "loss": 1.0032, + "step": 18085 + }, + { + "epoch": 0.26, + "grad_norm": 0.54296875, + "learning_rate": 0.00018490007778376776, + "loss": 0.8814, + "step": 18090 + }, + { + "epoch": 0.26, + "grad_norm": 0.59765625, + "learning_rate": 0.00018488684620453731, + "loss": 0.9529, + "step": 18095 + }, + { + "epoch": 0.26, + "grad_norm": 0.52734375, + "learning_rate": 0.00018487360930450272, + "loss": 1.0356, + "step": 18100 + }, + { + "epoch": 0.26, + "grad_norm": 0.62890625, + "learning_rate": 0.0001848603670844936, + "loss": 1.0452, + "step": 18105 + }, + { + "epoch": 0.26, + "grad_norm": 0.427734375, + "learning_rate": 0.00018484711954534002, + "loss": 0.723, + "step": 18110 + }, + { + "epoch": 0.26, + "grad_norm": 0.54296875, + "learning_rate": 0.0001848338666878724, + "loss": 1.0693, + "step": 18115 + }, + { + "epoch": 0.26, + "grad_norm": 0.5234375, + "learning_rate": 0.00018482060851292132, + "loss": 0.9527, + "step": 18120 + }, + { + "epoch": 0.26, + "grad_norm": 0.6015625, + "learning_rate": 0.00018480734502131796, + "loss": 0.9394, + "step": 18125 + }, + { + "epoch": 0.26, + "grad_norm": 0.50390625, + "learning_rate": 0.00018479407621389363, + "loss": 0.8132, + "step": 18130 + }, + { + "epoch": 0.26, + "grad_norm": 0.55859375, + "learning_rate": 0.00018478080209148, + "loss": 1.0694, + "step": 18135 + }, + { + "epoch": 0.26, + "grad_norm": 0.6015625, + "learning_rate": 0.00018476752265490914, + "loss": 0.9333, + "step": 18140 + }, + { + "epoch": 0.26, + "grad_norm": 0.6796875, + "learning_rate": 0.00018475423790501342, + "loss": 1.0483, + "step": 18145 + }, + { + "epoch": 0.26, + "grad_norm": 0.55078125, + "learning_rate": 0.00018474094784262554, + "loss": 0.9722, + "step": 18150 + }, + { + "epoch": 0.26, + "grad_norm": 0.625, + "learning_rate": 0.00018472765246857855, + "loss": 1.1281, + "step": 18155 + }, + { + "epoch": 0.26, + "grad_norm": 0.6875, + "learning_rate": 0.0001847143517837058, + "loss": 1.0055, + "step": 18160 + }, + { + "epoch": 0.26, + "grad_norm": 0.5546875, + "learning_rate": 0.00018470104578884099, + "loss": 0.8147, + "step": 18165 + }, + { + "epoch": 0.26, + "grad_norm": 0.59375, + "learning_rate": 0.00018468773448481818, + "loss": 1.1144, + "step": 18170 + }, + { + "epoch": 0.26, + "grad_norm": 0.59765625, + "learning_rate": 0.0001846744178724717, + "loss": 0.9699, + "step": 18175 + }, + { + "epoch": 0.26, + "grad_norm": 0.53125, + "learning_rate": 0.0001846610959526363, + "loss": 1.0632, + "step": 18180 + }, + { + "epoch": 0.26, + "grad_norm": 0.5625, + "learning_rate": 0.00018464776872614697, + "loss": 0.8508, + "step": 18185 + }, + { + "epoch": 0.26, + "grad_norm": 0.66015625, + "learning_rate": 0.0001846344361938391, + "loss": 1.0169, + "step": 18190 + }, + { + "epoch": 0.26, + "grad_norm": 0.62109375, + "learning_rate": 0.00018462109835654838, + "loss": 1.0482, + "step": 18195 + }, + { + "epoch": 0.26, + "grad_norm": 0.57421875, + "learning_rate": 0.00018460775521511082, + "loss": 0.9038, + "step": 18200 + }, + { + "epoch": 0.26, + "grad_norm": 0.65625, + "learning_rate": 0.0001845944067703628, + "loss": 0.9428, + "step": 18205 + }, + { + "epoch": 0.26, + "grad_norm": 0.5546875, + "learning_rate": 0.00018458105302314104, + "loss": 0.8876, + "step": 18210 + }, + { + "epoch": 0.26, + "grad_norm": 0.53125, + "learning_rate": 0.00018456769397428254, + "loss": 0.8324, + "step": 18215 + }, + { + "epoch": 0.26, + "grad_norm": 0.6640625, + "learning_rate": 0.00018455432962462466, + "loss": 1.1117, + "step": 18220 + }, + { + "epoch": 0.26, + "grad_norm": 0.546875, + "learning_rate": 0.0001845409599750051, + "loss": 0.87, + "step": 18225 + }, + { + "epoch": 0.26, + "grad_norm": 0.6484375, + "learning_rate": 0.0001845275850262619, + "loss": 0.8693, + "step": 18230 + }, + { + "epoch": 0.26, + "grad_norm": 0.64453125, + "learning_rate": 0.00018451420477923338, + "loss": 1.0202, + "step": 18235 + }, + { + "epoch": 0.26, + "grad_norm": 0.5, + "learning_rate": 0.00018450081923475828, + "loss": 1.0089, + "step": 18240 + }, + { + "epoch": 0.26, + "grad_norm": 0.609375, + "learning_rate": 0.00018448742839367557, + "loss": 1.0084, + "step": 18245 + }, + { + "epoch": 0.26, + "grad_norm": 0.58984375, + "learning_rate": 0.00018447403225682464, + "loss": 0.797, + "step": 18250 + }, + { + "epoch": 0.26, + "grad_norm": 0.56640625, + "learning_rate": 0.00018446063082504512, + "loss": 1.0342, + "step": 18255 + }, + { + "epoch": 0.26, + "grad_norm": 0.578125, + "learning_rate": 0.0001844472240991771, + "loss": 1.0519, + "step": 18260 + }, + { + "epoch": 0.26, + "grad_norm": 0.5859375, + "learning_rate": 0.0001844338120800609, + "loss": 0.9145, + "step": 18265 + }, + { + "epoch": 0.26, + "grad_norm": 0.515625, + "learning_rate": 0.0001844203947685372, + "loss": 0.8796, + "step": 18270 + }, + { + "epoch": 0.26, + "grad_norm": 0.5546875, + "learning_rate": 0.000184406972165447, + "loss": 0.8601, + "step": 18275 + }, + { + "epoch": 0.26, + "grad_norm": 0.58984375, + "learning_rate": 0.00018439354427163162, + "loss": 0.9921, + "step": 18280 + }, + { + "epoch": 0.26, + "grad_norm": 0.70703125, + "learning_rate": 0.00018438011108793282, + "loss": 0.91, + "step": 18285 + }, + { + "epoch": 0.26, + "grad_norm": 0.5703125, + "learning_rate": 0.00018436667261519254, + "loss": 1.024, + "step": 18290 + }, + { + "epoch": 0.26, + "grad_norm": 0.59765625, + "learning_rate": 0.00018435322885425312, + "loss": 0.9442, + "step": 18295 + }, + { + "epoch": 0.26, + "grad_norm": 0.69921875, + "learning_rate": 0.00018433977980595727, + "loss": 0.9701, + "step": 18300 + }, + { + "epoch": 0.26, + "grad_norm": 0.54296875, + "learning_rate": 0.00018432632547114795, + "loss": 1.0441, + "step": 18305 + }, + { + "epoch": 0.26, + "grad_norm": 0.609375, + "learning_rate": 0.00018431286585066851, + "loss": 0.8904, + "step": 18310 + }, + { + "epoch": 0.26, + "grad_norm": 0.53515625, + "learning_rate": 0.0001842994009453626, + "loss": 0.913, + "step": 18315 + }, + { + "epoch": 0.26, + "grad_norm": 0.53515625, + "learning_rate": 0.00018428593075607425, + "loss": 0.8808, + "step": 18320 + }, + { + "epoch": 0.26, + "grad_norm": 0.5234375, + "learning_rate": 0.00018427245528364778, + "loss": 0.9081, + "step": 18325 + }, + { + "epoch": 0.26, + "grad_norm": 0.55078125, + "learning_rate": 0.00018425897452892782, + "loss": 0.9171, + "step": 18330 + }, + { + "epoch": 0.26, + "grad_norm": 0.57421875, + "learning_rate": 0.00018424548849275935, + "loss": 0.8542, + "step": 18335 + }, + { + "epoch": 0.26, + "grad_norm": 0.5859375, + "learning_rate": 0.00018423199717598776, + "loss": 0.9848, + "step": 18340 + }, + { + "epoch": 0.26, + "grad_norm": 0.56640625, + "learning_rate": 0.00018421850057945863, + "loss": 0.8988, + "step": 18345 + }, + { + "epoch": 0.26, + "grad_norm": 0.5390625, + "learning_rate": 0.00018420499870401796, + "loss": 0.9853, + "step": 18350 + }, + { + "epoch": 0.26, + "grad_norm": 0.63671875, + "learning_rate": 0.00018419149155051207, + "loss": 0.9578, + "step": 18355 + }, + { + "epoch": 0.26, + "grad_norm": 0.56640625, + "learning_rate": 0.0001841779791197876, + "loss": 0.8141, + "step": 18360 + }, + { + "epoch": 0.26, + "grad_norm": 0.53125, + "learning_rate": 0.00018416446141269156, + "loss": 0.8497, + "step": 18365 + }, + { + "epoch": 0.26, + "grad_norm": 0.5546875, + "learning_rate": 0.0001841509384300712, + "loss": 0.9357, + "step": 18370 + }, + { + "epoch": 0.26, + "grad_norm": 0.6171875, + "learning_rate": 0.0001841374101727742, + "loss": 1.0044, + "step": 18375 + }, + { + "epoch": 0.26, + "grad_norm": 0.60546875, + "learning_rate": 0.00018412387664164847, + "loss": 0.9814, + "step": 18380 + }, + { + "epoch": 0.26, + "grad_norm": 0.59765625, + "learning_rate": 0.00018411033783754234, + "loss": 0.9315, + "step": 18385 + }, + { + "epoch": 0.26, + "grad_norm": 0.4921875, + "learning_rate": 0.00018409679376130445, + "loss": 0.9981, + "step": 18390 + }, + { + "epoch": 0.26, + "grad_norm": 0.5703125, + "learning_rate": 0.0001840832444137838, + "loss": 1.0124, + "step": 18395 + }, + { + "epoch": 0.26, + "grad_norm": 0.58203125, + "learning_rate": 0.00018406968979582956, + "loss": 0.9862, + "step": 18400 + }, + { + "epoch": 0.26, + "grad_norm": 0.5234375, + "learning_rate": 0.00018405612990829147, + "loss": 0.9314, + "step": 18405 + }, + { + "epoch": 0.26, + "grad_norm": 0.58984375, + "learning_rate": 0.00018404256475201938, + "loss": 0.9507, + "step": 18410 + }, + { + "epoch": 0.26, + "grad_norm": 0.6015625, + "learning_rate": 0.00018402899432786365, + "loss": 0.9749, + "step": 18415 + }, + { + "epoch": 0.26, + "grad_norm": 0.61328125, + "learning_rate": 0.00018401541863667485, + "loss": 0.9224, + "step": 18420 + }, + { + "epoch": 0.26, + "grad_norm": 0.59765625, + "learning_rate": 0.00018400183767930387, + "loss": 1.1241, + "step": 18425 + }, + { + "epoch": 0.26, + "grad_norm": 0.59375, + "learning_rate": 0.00018398825145660212, + "loss": 0.8608, + "step": 18430 + }, + { + "epoch": 0.26, + "grad_norm": 0.76953125, + "learning_rate": 0.00018397465996942107, + "loss": 1.0383, + "step": 18435 + }, + { + "epoch": 0.26, + "grad_norm": 0.6953125, + "learning_rate": 0.00018396106321861267, + "loss": 0.915, + "step": 18440 + }, + { + "epoch": 0.26, + "grad_norm": 0.56640625, + "learning_rate": 0.00018394746120502922, + "loss": 0.95, + "step": 18445 + }, + { + "epoch": 0.26, + "grad_norm": 0.58203125, + "learning_rate": 0.0001839338539295233, + "loss": 0.9485, + "step": 18450 + }, + { + "epoch": 0.26, + "grad_norm": 0.609375, + "learning_rate": 0.00018392024139294785, + "loss": 0.8437, + "step": 18455 + }, + { + "epoch": 0.26, + "grad_norm": 0.56640625, + "learning_rate": 0.00018390662359615603, + "loss": 1.0354, + "step": 18460 + }, + { + "epoch": 0.26, + "grad_norm": 0.609375, + "learning_rate": 0.00018389300054000155, + "loss": 1.0424, + "step": 18465 + }, + { + "epoch": 0.26, + "grad_norm": 0.54296875, + "learning_rate": 0.00018387937222533825, + "loss": 1.1084, + "step": 18470 + }, + { + "epoch": 0.27, + "grad_norm": 0.5234375, + "learning_rate": 0.0001838657386530203, + "loss": 0.909, + "step": 18475 + }, + { + "epoch": 0.27, + "grad_norm": 0.5625, + "learning_rate": 0.0001838520998239024, + "loss": 1.0199, + "step": 18480 + }, + { + "epoch": 0.27, + "grad_norm": 0.59375, + "learning_rate": 0.00018383845573883932, + "loss": 0.8707, + "step": 18485 + }, + { + "epoch": 0.27, + "grad_norm": 0.609375, + "learning_rate": 0.0001838248063986864, + "loss": 1.0918, + "step": 18490 + }, + { + "epoch": 0.27, + "grad_norm": 0.55859375, + "learning_rate": 0.00018381115180429912, + "loss": 0.9805, + "step": 18495 + }, + { + "epoch": 0.27, + "grad_norm": 0.6328125, + "learning_rate": 0.00018379749195653343, + "loss": 0.9804, + "step": 18500 + }, + { + "epoch": 0.27, + "grad_norm": 0.57421875, + "learning_rate": 0.00018378382685624547, + "loss": 0.9463, + "step": 18505 + }, + { + "epoch": 0.27, + "grad_norm": 0.5234375, + "learning_rate": 0.00018377015650429182, + "loss": 1.065, + "step": 18510 + }, + { + "epoch": 0.27, + "grad_norm": 0.55078125, + "learning_rate": 0.00018375648090152938, + "loss": 0.9703, + "step": 18515 + }, + { + "epoch": 0.27, + "grad_norm": 0.58203125, + "learning_rate": 0.00018374280004881531, + "loss": 0.8268, + "step": 18520 + }, + { + "epoch": 0.27, + "grad_norm": 0.609375, + "learning_rate": 0.00018372911394700717, + "loss": 1.0015, + "step": 18525 + }, + { + "epoch": 0.27, + "grad_norm": 0.515625, + "learning_rate": 0.0001837154225969628, + "loss": 0.8507, + "step": 18530 + }, + { + "epoch": 0.27, + "grad_norm": 0.60546875, + "learning_rate": 0.00018370172599954041, + "loss": 1.0905, + "step": 18535 + }, + { + "epoch": 0.27, + "grad_norm": 0.5546875, + "learning_rate": 0.0001836880241555985, + "loss": 0.9969, + "step": 18540 + }, + { + "epoch": 0.27, + "grad_norm": 0.5859375, + "learning_rate": 0.0001836743170659959, + "loss": 0.9468, + "step": 18545 + }, + { + "epoch": 0.27, + "grad_norm": 0.53515625, + "learning_rate": 0.00018366060473159183, + "loss": 0.9021, + "step": 18550 + }, + { + "epoch": 0.27, + "grad_norm": 0.5625, + "learning_rate": 0.0001836468871532458, + "loss": 0.8949, + "step": 18555 + }, + { + "epoch": 0.27, + "grad_norm": 0.45703125, + "learning_rate": 0.00018363316433181757, + "loss": 1.0498, + "step": 18560 + }, + { + "epoch": 0.27, + "grad_norm": 0.671875, + "learning_rate": 0.00018361943626816736, + "loss": 1.1729, + "step": 18565 + }, + { + "epoch": 0.27, + "grad_norm": 0.5625, + "learning_rate": 0.00018360570296315566, + "loss": 1.0258, + "step": 18570 + }, + { + "epoch": 0.27, + "grad_norm": 0.57421875, + "learning_rate": 0.00018359196441764328, + "loss": 0.9827, + "step": 18575 + }, + { + "epoch": 0.27, + "grad_norm": 0.59375, + "learning_rate": 0.00018357822063249136, + "loss": 0.9098, + "step": 18580 + }, + { + "epoch": 0.27, + "grad_norm": 0.55078125, + "learning_rate": 0.0001835644716085614, + "loss": 0.9919, + "step": 18585 + }, + { + "epoch": 0.27, + "grad_norm": 0.671875, + "learning_rate": 0.00018355071734671517, + "loss": 1.004, + "step": 18590 + }, + { + "epoch": 0.27, + "grad_norm": 0.546875, + "learning_rate": 0.0001835369578478148, + "loss": 0.9617, + "step": 18595 + }, + { + "epoch": 0.27, + "grad_norm": 0.5703125, + "learning_rate": 0.0001835231931127228, + "loss": 0.9761, + "step": 18600 + }, + { + "epoch": 0.27, + "grad_norm": 0.44140625, + "learning_rate": 0.00018350942314230195, + "loss": 0.8244, + "step": 18605 + }, + { + "epoch": 0.27, + "grad_norm": 0.5390625, + "learning_rate": 0.00018349564793741533, + "loss": 0.97, + "step": 18610 + }, + { + "epoch": 0.27, + "grad_norm": 0.64453125, + "learning_rate": 0.00018348186749892639, + "loss": 1.1368, + "step": 18615 + }, + { + "epoch": 0.27, + "grad_norm": 0.65234375, + "learning_rate": 0.0001834680818276989, + "loss": 1.0208, + "step": 18620 + }, + { + "epoch": 0.27, + "grad_norm": 0.61328125, + "learning_rate": 0.00018345429092459704, + "loss": 0.8885, + "step": 18625 + }, + { + "epoch": 0.27, + "grad_norm": 0.703125, + "learning_rate": 0.00018344049479048513, + "loss": 1.0854, + "step": 18630 + }, + { + "epoch": 0.27, + "grad_norm": 0.62109375, + "learning_rate": 0.000183426693426228, + "loss": 1.1566, + "step": 18635 + }, + { + "epoch": 0.27, + "grad_norm": 0.55859375, + "learning_rate": 0.0001834128868326907, + "loss": 0.9691, + "step": 18640 + }, + { + "epoch": 0.27, + "grad_norm": 0.59375, + "learning_rate": 0.00018339907501073867, + "loss": 0.968, + "step": 18645 + }, + { + "epoch": 0.27, + "grad_norm": 0.51953125, + "learning_rate": 0.0001833852579612376, + "loss": 0.9445, + "step": 18650 + }, + { + "epoch": 0.27, + "grad_norm": 0.60546875, + "learning_rate": 0.00018337143568505362, + "loss": 1.0196, + "step": 18655 + }, + { + "epoch": 0.27, + "grad_norm": 0.6171875, + "learning_rate": 0.00018335760818305309, + "loss": 0.914, + "step": 18660 + }, + { + "epoch": 0.27, + "grad_norm": 0.56640625, + "learning_rate": 0.00018334377545610274, + "loss": 0.9988, + "step": 18665 + }, + { + "epoch": 0.27, + "grad_norm": 0.55078125, + "learning_rate": 0.00018332993750506962, + "loss": 0.7872, + "step": 18670 + }, + { + "epoch": 0.27, + "grad_norm": 0.5, + "learning_rate": 0.00018331609433082114, + "loss": 0.9388, + "step": 18675 + }, + { + "epoch": 0.27, + "grad_norm": 0.51171875, + "learning_rate": 0.00018330224593422496, + "loss": 0.9146, + "step": 18680 + }, + { + "epoch": 0.27, + "grad_norm": 0.54296875, + "learning_rate": 0.00018328839231614911, + "loss": 0.8824, + "step": 18685 + }, + { + "epoch": 0.27, + "grad_norm": 0.62109375, + "learning_rate": 0.00018327453347746203, + "loss": 1.1295, + "step": 18690 + }, + { + "epoch": 0.27, + "grad_norm": 0.53125, + "learning_rate": 0.00018326066941903228, + "loss": 1.0299, + "step": 18695 + }, + { + "epoch": 0.27, + "grad_norm": 0.61328125, + "learning_rate": 0.000183246800141729, + "loss": 0.9898, + "step": 18700 + }, + { + "epoch": 0.27, + "grad_norm": 0.57421875, + "learning_rate": 0.00018323292564642146, + "loss": 1.019, + "step": 18705 + }, + { + "epoch": 0.27, + "grad_norm": 0.5234375, + "learning_rate": 0.0001832190459339793, + "loss": 0.89, + "step": 18710 + }, + { + "epoch": 0.27, + "grad_norm": 0.61328125, + "learning_rate": 0.0001832051610052726, + "loss": 0.9524, + "step": 18715 + }, + { + "epoch": 0.27, + "grad_norm": 0.59375, + "learning_rate": 0.00018319127086117168, + "loss": 0.9742, + "step": 18720 + }, + { + "epoch": 0.27, + "grad_norm": 0.609375, + "learning_rate": 0.00018317737550254713, + "loss": 1.1546, + "step": 18725 + }, + { + "epoch": 0.27, + "grad_norm": 0.5234375, + "learning_rate": 0.00018316347493026994, + "loss": 0.9396, + "step": 18730 + }, + { + "epoch": 0.27, + "grad_norm": 0.470703125, + "learning_rate": 0.00018314956914521142, + "loss": 0.8559, + "step": 18735 + }, + { + "epoch": 0.27, + "grad_norm": 0.62890625, + "learning_rate": 0.0001831356581482432, + "loss": 1.0151, + "step": 18740 + }, + { + "epoch": 0.27, + "grad_norm": 0.458984375, + "learning_rate": 0.0001831217419402373, + "loss": 0.8159, + "step": 18745 + }, + { + "epoch": 0.27, + "grad_norm": 0.609375, + "learning_rate": 0.0001831078205220659, + "loss": 1.0798, + "step": 18750 + }, + { + "epoch": 0.27, + "grad_norm": 0.51171875, + "learning_rate": 0.00018309389389460168, + "loss": 0.9936, + "step": 18755 + }, + { + "epoch": 0.27, + "grad_norm": 0.65234375, + "learning_rate": 0.00018307996205871755, + "loss": 0.984, + "step": 18760 + }, + { + "epoch": 0.27, + "grad_norm": 0.5078125, + "learning_rate": 0.00018306602501528673, + "loss": 0.8814, + "step": 18765 + }, + { + "epoch": 0.27, + "grad_norm": 0.49609375, + "learning_rate": 0.00018305208276518293, + "loss": 0.9234, + "step": 18770 + }, + { + "epoch": 0.27, + "grad_norm": 0.515625, + "learning_rate": 0.00018303813530927995, + "loss": 0.908, + "step": 18775 + }, + { + "epoch": 0.27, + "grad_norm": 0.51171875, + "learning_rate": 0.00018302418264845208, + "loss": 0.8875, + "step": 18780 + }, + { + "epoch": 0.27, + "grad_norm": 0.61328125, + "learning_rate": 0.00018301022478357391, + "loss": 0.9008, + "step": 18785 + }, + { + "epoch": 0.27, + "grad_norm": 0.6640625, + "learning_rate": 0.0001829962617155203, + "loss": 1.0306, + "step": 18790 + }, + { + "epoch": 0.27, + "grad_norm": 0.546875, + "learning_rate": 0.00018298229344516646, + "loss": 0.9696, + "step": 18795 + }, + { + "epoch": 0.27, + "grad_norm": 0.57421875, + "learning_rate": 0.00018296831997338797, + "loss": 0.9603, + "step": 18800 + }, + { + "epoch": 0.27, + "grad_norm": 0.59765625, + "learning_rate": 0.0001829543413010607, + "loss": 0.8585, + "step": 18805 + }, + { + "epoch": 0.27, + "grad_norm": 0.58984375, + "learning_rate": 0.0001829403574290608, + "loss": 0.8206, + "step": 18810 + }, + { + "epoch": 0.27, + "grad_norm": 0.546875, + "learning_rate": 0.0001829263683582649, + "loss": 0.9683, + "step": 18815 + }, + { + "epoch": 0.27, + "grad_norm": 0.51953125, + "learning_rate": 0.00018291237408954976, + "loss": 0.9284, + "step": 18820 + }, + { + "epoch": 0.27, + "grad_norm": 0.6796875, + "learning_rate": 0.00018289837462379257, + "loss": 1.0969, + "step": 18825 + }, + { + "epoch": 0.27, + "grad_norm": 0.64453125, + "learning_rate": 0.00018288436996187084, + "loss": 1.0336, + "step": 18830 + }, + { + "epoch": 0.27, + "grad_norm": 0.546875, + "learning_rate": 0.00018287036010466244, + "loss": 0.9165, + "step": 18835 + }, + { + "epoch": 0.27, + "grad_norm": 0.5546875, + "learning_rate": 0.00018285634505304545, + "loss": 0.9036, + "step": 18840 + }, + { + "epoch": 0.27, + "grad_norm": 0.54296875, + "learning_rate": 0.00018284232480789841, + "loss": 0.8956, + "step": 18845 + }, + { + "epoch": 0.27, + "grad_norm": 0.6484375, + "learning_rate": 0.00018282829937010009, + "loss": 0.9551, + "step": 18850 + }, + { + "epoch": 0.27, + "grad_norm": 0.51171875, + "learning_rate": 0.00018281426874052961, + "loss": 0.9882, + "step": 18855 + }, + { + "epoch": 0.27, + "grad_norm": 0.59765625, + "learning_rate": 0.00018280023292006648, + "loss": 1.0658, + "step": 18860 + }, + { + "epoch": 0.27, + "grad_norm": 0.52734375, + "learning_rate": 0.00018278619190959045, + "loss": 0.8386, + "step": 18865 + }, + { + "epoch": 0.27, + "grad_norm": 0.578125, + "learning_rate": 0.00018277214570998161, + "loss": 0.9481, + "step": 18870 + }, + { + "epoch": 0.27, + "grad_norm": 0.51953125, + "learning_rate": 0.00018275809432212041, + "loss": 1.0821, + "step": 18875 + }, + { + "epoch": 0.27, + "grad_norm": 0.6328125, + "learning_rate": 0.0001827440377468876, + "loss": 0.9736, + "step": 18880 + }, + { + "epoch": 0.27, + "grad_norm": 0.578125, + "learning_rate": 0.00018272997598516431, + "loss": 0.9497, + "step": 18885 + }, + { + "epoch": 0.27, + "grad_norm": 0.59375, + "learning_rate": 0.00018271590903783184, + "loss": 0.9594, + "step": 18890 + }, + { + "epoch": 0.27, + "grad_norm": 0.64453125, + "learning_rate": 0.00018270183690577202, + "loss": 0.9803, + "step": 18895 + }, + { + "epoch": 0.27, + "grad_norm": 0.62109375, + "learning_rate": 0.00018268775958986687, + "loss": 0.9781, + "step": 18900 + }, + { + "epoch": 0.27, + "grad_norm": 0.640625, + "learning_rate": 0.00018267367709099878, + "loss": 1.1337, + "step": 18905 + }, + { + "epoch": 0.27, + "grad_norm": 0.52734375, + "learning_rate": 0.00018265958941005044, + "loss": 0.8523, + "step": 18910 + }, + { + "epoch": 0.27, + "grad_norm": 0.55859375, + "learning_rate": 0.00018264549654790487, + "loss": 0.8817, + "step": 18915 + }, + { + "epoch": 0.27, + "grad_norm": 0.5546875, + "learning_rate": 0.00018263139850544551, + "loss": 1.0435, + "step": 18920 + }, + { + "epoch": 0.27, + "grad_norm": 0.5859375, + "learning_rate": 0.00018261729528355595, + "loss": 1.0755, + "step": 18925 + }, + { + "epoch": 0.27, + "grad_norm": 0.55078125, + "learning_rate": 0.0001826031868831202, + "loss": 1.0781, + "step": 18930 + }, + { + "epoch": 0.27, + "grad_norm": 0.5859375, + "learning_rate": 0.00018258907330502265, + "loss": 1.0118, + "step": 18935 + }, + { + "epoch": 0.27, + "grad_norm": 0.515625, + "learning_rate": 0.00018257495455014798, + "loss": 0.8649, + "step": 18940 + }, + { + "epoch": 0.27, + "grad_norm": 0.64453125, + "learning_rate": 0.00018256083061938104, + "loss": 0.984, + "step": 18945 + }, + { + "epoch": 0.27, + "grad_norm": 0.5390625, + "learning_rate": 0.00018254670151360722, + "loss": 1.0077, + "step": 18950 + }, + { + "epoch": 0.27, + "grad_norm": 0.56640625, + "learning_rate": 0.00018253256723371216, + "loss": 1.1296, + "step": 18955 + }, + { + "epoch": 0.27, + "grad_norm": 0.55859375, + "learning_rate": 0.00018251842778058177, + "loss": 0.965, + "step": 18960 + }, + { + "epoch": 0.27, + "grad_norm": 0.58203125, + "learning_rate": 0.00018250428315510234, + "loss": 0.9761, + "step": 18965 + }, + { + "epoch": 0.27, + "grad_norm": 0.58203125, + "learning_rate": 0.00018249013335816048, + "loss": 0.9319, + "step": 18970 + }, + { + "epoch": 0.27, + "grad_norm": 0.54296875, + "learning_rate": 0.0001824759783906431, + "loss": 1.0028, + "step": 18975 + }, + { + "epoch": 0.27, + "grad_norm": 0.6328125, + "learning_rate": 0.0001824618182534375, + "loss": 1.0927, + "step": 18980 + }, + { + "epoch": 0.27, + "grad_norm": 0.59375, + "learning_rate": 0.00018244765294743117, + "loss": 0.8849, + "step": 18985 + }, + { + "epoch": 0.27, + "grad_norm": 0.5859375, + "learning_rate": 0.00018243348247351212, + "loss": 0.924, + "step": 18990 + }, + { + "epoch": 0.27, + "grad_norm": 0.53125, + "learning_rate": 0.00018241930683256846, + "loss": 0.892, + "step": 18995 + }, + { + "epoch": 0.27, + "grad_norm": 0.62109375, + "learning_rate": 0.00018240512602548875, + "loss": 1.0804, + "step": 19000 + }, + { + "epoch": 0.27, + "grad_norm": 0.515625, + "learning_rate": 0.00018239094005316193, + "loss": 0.8454, + "step": 19005 + }, + { + "epoch": 0.27, + "grad_norm": 0.5625, + "learning_rate": 0.00018237674891647716, + "loss": 0.9701, + "step": 19010 + }, + { + "epoch": 0.27, + "grad_norm": 0.59375, + "learning_rate": 0.00018236255261632392, + "loss": 0.9946, + "step": 19015 + }, + { + "epoch": 0.27, + "grad_norm": 0.6484375, + "learning_rate": 0.0001823483511535921, + "loss": 1.0154, + "step": 19020 + }, + { + "epoch": 0.27, + "grad_norm": 0.58203125, + "learning_rate": 0.00018233414452917184, + "loss": 0.9028, + "step": 19025 + }, + { + "epoch": 0.27, + "grad_norm": 0.578125, + "learning_rate": 0.00018231993274395362, + "loss": 0.8365, + "step": 19030 + }, + { + "epoch": 0.27, + "grad_norm": 0.5546875, + "learning_rate": 0.00018230571579882826, + "loss": 0.7945, + "step": 19035 + }, + { + "epoch": 0.27, + "grad_norm": 0.58203125, + "learning_rate": 0.0001822914936946869, + "loss": 1.0, + "step": 19040 + }, + { + "epoch": 0.27, + "grad_norm": 0.52734375, + "learning_rate": 0.000182277266432421, + "loss": 1.0195, + "step": 19045 + }, + { + "epoch": 0.27, + "grad_norm": 0.56640625, + "learning_rate": 0.00018226303401292233, + "loss": 1.0113, + "step": 19050 + }, + { + "epoch": 0.27, + "grad_norm": 0.56640625, + "learning_rate": 0.00018224879643708299, + "loss": 0.9697, + "step": 19055 + }, + { + "epoch": 0.27, + "grad_norm": 0.6484375, + "learning_rate": 0.00018223455370579544, + "loss": 1.0527, + "step": 19060 + }, + { + "epoch": 0.27, + "grad_norm": 0.5625, + "learning_rate": 0.00018222030581995237, + "loss": 1.1755, + "step": 19065 + }, + { + "epoch": 0.27, + "grad_norm": 0.55078125, + "learning_rate": 0.00018220605278044692, + "loss": 0.8671, + "step": 19070 + }, + { + "epoch": 0.27, + "grad_norm": 0.609375, + "learning_rate": 0.00018219179458817247, + "loss": 0.9569, + "step": 19075 + }, + { + "epoch": 0.27, + "grad_norm": 0.5625, + "learning_rate": 0.00018217753124402268, + "loss": 0.9445, + "step": 19080 + }, + { + "epoch": 0.27, + "grad_norm": 0.55859375, + "learning_rate": 0.00018216326274889165, + "loss": 1.0961, + "step": 19085 + }, + { + "epoch": 0.27, + "grad_norm": 0.62109375, + "learning_rate": 0.00018214898910367375, + "loss": 0.9009, + "step": 19090 + }, + { + "epoch": 0.27, + "grad_norm": 0.82421875, + "learning_rate": 0.00018213471030926367, + "loss": 1.0007, + "step": 19095 + }, + { + "epoch": 0.27, + "grad_norm": 0.5625, + "learning_rate": 0.00018212042636655637, + "loss": 1.0147, + "step": 19100 + }, + { + "epoch": 0.27, + "grad_norm": 0.765625, + "learning_rate": 0.00018210613727644723, + "loss": 0.8774, + "step": 19105 + }, + { + "epoch": 0.27, + "grad_norm": 0.578125, + "learning_rate": 0.0001820918430398319, + "loss": 0.9651, + "step": 19110 + }, + { + "epoch": 0.27, + "grad_norm": 0.5625, + "learning_rate": 0.00018207754365760637, + "loss": 1.0168, + "step": 19115 + }, + { + "epoch": 0.27, + "grad_norm": 0.73046875, + "learning_rate": 0.00018206323913066687, + "loss": 1.0255, + "step": 19120 + }, + { + "epoch": 0.27, + "grad_norm": 0.58984375, + "learning_rate": 0.00018204892945991014, + "loss": 1.0751, + "step": 19125 + }, + { + "epoch": 0.27, + "grad_norm": 0.5625, + "learning_rate": 0.00018203461464623302, + "loss": 0.9231, + "step": 19130 + }, + { + "epoch": 0.27, + "grad_norm": 0.6640625, + "learning_rate": 0.00018202029469053285, + "loss": 1.022, + "step": 19135 + }, + { + "epoch": 0.27, + "grad_norm": 0.6875, + "learning_rate": 0.00018200596959370722, + "loss": 1.0119, + "step": 19140 + }, + { + "epoch": 0.27, + "grad_norm": 0.6328125, + "learning_rate": 0.00018199163935665396, + "loss": 0.9225, + "step": 19145 + }, + { + "epoch": 0.27, + "grad_norm": 0.6015625, + "learning_rate": 0.00018197730398027142, + "loss": 1.0322, + "step": 19150 + }, + { + "epoch": 0.27, + "grad_norm": 0.625, + "learning_rate": 0.00018196296346545805, + "loss": 1.0106, + "step": 19155 + }, + { + "epoch": 0.27, + "grad_norm": 0.62109375, + "learning_rate": 0.00018194861781311282, + "loss": 0.9608, + "step": 19160 + }, + { + "epoch": 0.27, + "grad_norm": 0.546875, + "learning_rate": 0.00018193426702413487, + "loss": 0.9761, + "step": 19165 + }, + { + "epoch": 0.27, + "grad_norm": 0.546875, + "learning_rate": 0.00018191991109942377, + "loss": 0.8546, + "step": 19170 + }, + { + "epoch": 0.28, + "grad_norm": 0.546875, + "learning_rate": 0.0001819055500398793, + "loss": 0.9388, + "step": 19175 + }, + { + "epoch": 0.28, + "grad_norm": 0.6015625, + "learning_rate": 0.00018189118384640172, + "loss": 0.909, + "step": 19180 + }, + { + "epoch": 0.28, + "grad_norm": 0.51953125, + "learning_rate": 0.0001818768125198915, + "loss": 1.0421, + "step": 19185 + }, + { + "epoch": 0.28, + "grad_norm": 0.5078125, + "learning_rate": 0.00018186243606124934, + "loss": 0.8726, + "step": 19190 + }, + { + "epoch": 0.28, + "grad_norm": 0.6015625, + "learning_rate": 0.0001818480544713765, + "loss": 0.9523, + "step": 19195 + }, + { + "epoch": 0.28, + "grad_norm": 0.56640625, + "learning_rate": 0.00018183366775117437, + "loss": 0.9786, + "step": 19200 + }, + { + "epoch": 0.28, + "grad_norm": 0.53515625, + "learning_rate": 0.00018181927590154475, + "loss": 0.8752, + "step": 19205 + }, + { + "epoch": 0.28, + "grad_norm": 0.46484375, + "learning_rate": 0.00018180487892338973, + "loss": 0.7925, + "step": 19210 + }, + { + "epoch": 0.28, + "grad_norm": 0.59375, + "learning_rate": 0.00018179047681761174, + "loss": 0.957, + "step": 19215 + }, + { + "epoch": 0.28, + "grad_norm": 0.6015625, + "learning_rate": 0.0001817760695851135, + "loss": 1.0573, + "step": 19220 + }, + { + "epoch": 0.28, + "grad_norm": 0.48828125, + "learning_rate": 0.0001817616572267981, + "loss": 0.8925, + "step": 19225 + }, + { + "epoch": 0.28, + "grad_norm": 0.56640625, + "learning_rate": 0.0001817472397435689, + "loss": 0.8296, + "step": 19230 + }, + { + "epoch": 0.28, + "grad_norm": 0.515625, + "learning_rate": 0.0001817328171363296, + "loss": 0.9467, + "step": 19235 + }, + { + "epoch": 0.28, + "grad_norm": 0.5703125, + "learning_rate": 0.00018171838940598425, + "loss": 0.8865, + "step": 19240 + }, + { + "epoch": 0.28, + "grad_norm": 0.5859375, + "learning_rate": 0.00018170395655343717, + "loss": 0.9796, + "step": 19245 + }, + { + "epoch": 0.28, + "grad_norm": 0.5078125, + "learning_rate": 0.00018168951857959305, + "loss": 0.8189, + "step": 19250 + }, + { + "epoch": 0.28, + "grad_norm": 0.671875, + "learning_rate": 0.00018167507548535685, + "loss": 0.9377, + "step": 19255 + }, + { + "epoch": 0.28, + "grad_norm": 0.53125, + "learning_rate": 0.00018166062727163393, + "loss": 1.1658, + "step": 19260 + }, + { + "epoch": 0.28, + "grad_norm": 0.5703125, + "learning_rate": 0.00018164617393932986, + "loss": 0.9539, + "step": 19265 + }, + { + "epoch": 0.28, + "grad_norm": 0.55078125, + "learning_rate": 0.00018163171548935062, + "loss": 1.0121, + "step": 19270 + }, + { + "epoch": 0.28, + "grad_norm": 0.55078125, + "learning_rate": 0.00018161725192260254, + "loss": 0.9584, + "step": 19275 + }, + { + "epoch": 0.28, + "grad_norm": 0.59375, + "learning_rate": 0.0001816027832399921, + "loss": 0.9901, + "step": 19280 + }, + { + "epoch": 0.28, + "grad_norm": 0.5546875, + "learning_rate": 0.00018158830944242627, + "loss": 0.8591, + "step": 19285 + }, + { + "epoch": 0.28, + "grad_norm": 0.51953125, + "learning_rate": 0.0001815738305308123, + "loss": 0.9036, + "step": 19290 + }, + { + "epoch": 0.28, + "grad_norm": 0.63671875, + "learning_rate": 0.0001815593465060577, + "loss": 1.0187, + "step": 19295 + }, + { + "epoch": 0.28, + "grad_norm": 0.6015625, + "learning_rate": 0.0001815448573690704, + "loss": 0.856, + "step": 19300 + }, + { + "epoch": 0.28, + "grad_norm": 0.49609375, + "learning_rate": 0.00018153036312075854, + "loss": 0.9173, + "step": 19305 + }, + { + "epoch": 0.28, + "grad_norm": 0.5390625, + "learning_rate": 0.00018151586376203072, + "loss": 0.9322, + "step": 19310 + }, + { + "epoch": 0.28, + "grad_norm": 0.54296875, + "learning_rate": 0.00018150135929379565, + "loss": 0.9743, + "step": 19315 + }, + { + "epoch": 0.28, + "grad_norm": 0.5, + "learning_rate": 0.0001814868497169626, + "loss": 0.9384, + "step": 19320 + }, + { + "epoch": 0.28, + "grad_norm": 0.57421875, + "learning_rate": 0.000181472335032441, + "loss": 0.9545, + "step": 19325 + }, + { + "epoch": 0.28, + "grad_norm": 0.58203125, + "learning_rate": 0.00018145781524114068, + "loss": 0.9326, + "step": 19330 + }, + { + "epoch": 0.28, + "grad_norm": 0.55859375, + "learning_rate": 0.00018144329034397167, + "loss": 0.7758, + "step": 19335 + }, + { + "epoch": 0.28, + "grad_norm": 0.640625, + "learning_rate": 0.0001814287603418445, + "loss": 0.9259, + "step": 19340 + }, + { + "epoch": 0.28, + "grad_norm": 0.57421875, + "learning_rate": 0.00018141422523566987, + "loss": 0.7652, + "step": 19345 + }, + { + "epoch": 0.28, + "grad_norm": 0.62109375, + "learning_rate": 0.00018139968502635888, + "loss": 1.0755, + "step": 19350 + }, + { + "epoch": 0.28, + "grad_norm": 0.5625, + "learning_rate": 0.00018138513971482296, + "loss": 0.9437, + "step": 19355 + }, + { + "epoch": 0.28, + "grad_norm": 0.59765625, + "learning_rate": 0.00018137058930197376, + "loss": 0.9806, + "step": 19360 + }, + { + "epoch": 0.28, + "grad_norm": 0.53515625, + "learning_rate": 0.00018135603378872337, + "loss": 0.9889, + "step": 19365 + }, + { + "epoch": 0.28, + "grad_norm": 0.53125, + "learning_rate": 0.0001813414731759841, + "loss": 0.8439, + "step": 19370 + }, + { + "epoch": 0.28, + "grad_norm": 0.494140625, + "learning_rate": 0.00018132690746466867, + "loss": 1.0547, + "step": 19375 + }, + { + "epoch": 0.28, + "grad_norm": 0.56640625, + "learning_rate": 0.00018131233665569005, + "loss": 0.7771, + "step": 19380 + }, + { + "epoch": 0.28, + "grad_norm": 0.61328125, + "learning_rate": 0.00018129776074996156, + "loss": 1.1168, + "step": 19385 + }, + { + "epoch": 0.28, + "grad_norm": 0.59765625, + "learning_rate": 0.00018128317974839685, + "loss": 0.9792, + "step": 19390 + }, + { + "epoch": 0.28, + "grad_norm": 0.5859375, + "learning_rate": 0.00018126859365190986, + "loss": 0.9773, + "step": 19395 + }, + { + "epoch": 0.28, + "grad_norm": 0.62109375, + "learning_rate": 0.00018125400246141486, + "loss": 1.0232, + "step": 19400 + }, + { + "epoch": 0.28, + "grad_norm": 0.68359375, + "learning_rate": 0.00018123940617782643, + "loss": 1.1173, + "step": 19405 + }, + { + "epoch": 0.28, + "grad_norm": 0.58203125, + "learning_rate": 0.0001812248048020595, + "loss": 1.0651, + "step": 19410 + }, + { + "epoch": 0.28, + "grad_norm": 0.60546875, + "learning_rate": 0.0001812101983350293, + "loss": 1.0051, + "step": 19415 + }, + { + "epoch": 0.28, + "grad_norm": 0.58984375, + "learning_rate": 0.0001811955867776514, + "loss": 0.995, + "step": 19420 + }, + { + "epoch": 0.28, + "grad_norm": 0.6171875, + "learning_rate": 0.0001811809701308416, + "loss": 0.9651, + "step": 19425 + }, + { + "epoch": 0.28, + "grad_norm": 0.6328125, + "learning_rate": 0.00018116634839551618, + "loss": 1.0279, + "step": 19430 + }, + { + "epoch": 0.28, + "grad_norm": 0.640625, + "learning_rate": 0.00018115172157259158, + "loss": 1.0307, + "step": 19435 + }, + { + "epoch": 0.28, + "grad_norm": 0.578125, + "learning_rate": 0.00018113708966298466, + "loss": 0.8745, + "step": 19440 + }, + { + "epoch": 0.28, + "grad_norm": 0.61328125, + "learning_rate": 0.00018112245266761255, + "loss": 0.8352, + "step": 19445 + }, + { + "epoch": 0.28, + "grad_norm": 0.62109375, + "learning_rate": 0.0001811078105873927, + "loss": 0.9852, + "step": 19450 + }, + { + "epoch": 0.28, + "grad_norm": 0.546875, + "learning_rate": 0.0001810931634232429, + "loss": 0.9463, + "step": 19455 + }, + { + "epoch": 0.28, + "grad_norm": 0.703125, + "learning_rate": 0.00018107851117608127, + "loss": 0.965, + "step": 19460 + }, + { + "epoch": 0.28, + "grad_norm": 0.5625, + "learning_rate": 0.00018106385384682625, + "loss": 0.9346, + "step": 19465 + }, + { + "epoch": 0.28, + "grad_norm": 0.61328125, + "learning_rate": 0.00018104919143639654, + "loss": 1.0714, + "step": 19470 + }, + { + "epoch": 0.28, + "grad_norm": 0.52734375, + "learning_rate": 0.00018103452394571117, + "loss": 0.9186, + "step": 19475 + }, + { + "epoch": 0.28, + "grad_norm": 0.54296875, + "learning_rate": 0.00018101985137568955, + "loss": 1.0589, + "step": 19480 + }, + { + "epoch": 0.28, + "grad_norm": 0.546875, + "learning_rate": 0.00018100517372725142, + "loss": 0.9316, + "step": 19485 + }, + { + "epoch": 0.28, + "grad_norm": 0.51953125, + "learning_rate": 0.0001809904910013167, + "loss": 0.9155, + "step": 19490 + }, + { + "epoch": 0.28, + "grad_norm": 0.796875, + "learning_rate": 0.00018097580319880577, + "loss": 0.9344, + "step": 19495 + }, + { + "epoch": 0.28, + "grad_norm": 0.546875, + "learning_rate": 0.00018096111032063928, + "loss": 1.1705, + "step": 19500 + }, + { + "epoch": 0.28, + "grad_norm": 0.50390625, + "learning_rate": 0.00018094641236773818, + "loss": 0.9314, + "step": 19505 + }, + { + "epoch": 0.28, + "grad_norm": 0.53125, + "learning_rate": 0.00018093170934102378, + "loss": 0.8923, + "step": 19510 + }, + { + "epoch": 0.28, + "grad_norm": 0.6171875, + "learning_rate": 0.00018091700124141764, + "loss": 1.0893, + "step": 19515 + }, + { + "epoch": 0.28, + "grad_norm": 0.68359375, + "learning_rate": 0.0001809022880698417, + "loss": 0.9808, + "step": 19520 + }, + { + "epoch": 0.28, + "grad_norm": 0.53515625, + "learning_rate": 0.00018088756982721825, + "loss": 0.9501, + "step": 19525 + }, + { + "epoch": 0.28, + "grad_norm": 0.58984375, + "learning_rate": 0.00018087284651446977, + "loss": 0.9246, + "step": 19530 + }, + { + "epoch": 0.28, + "grad_norm": 0.55078125, + "learning_rate": 0.00018085811813251917, + "loss": 0.9049, + "step": 19535 + }, + { + "epoch": 0.28, + "grad_norm": 0.470703125, + "learning_rate": 0.0001808433846822896, + "loss": 1.1336, + "step": 19540 + }, + { + "epoch": 0.28, + "grad_norm": 0.69140625, + "learning_rate": 0.00018082864616470468, + "loss": 1.003, + "step": 19545 + }, + { + "epoch": 0.28, + "grad_norm": 0.49609375, + "learning_rate": 0.00018081390258068808, + "loss": 1.0711, + "step": 19550 + }, + { + "epoch": 0.28, + "grad_norm": 0.56640625, + "learning_rate": 0.00018079915393116405, + "loss": 1.0544, + "step": 19555 + }, + { + "epoch": 0.28, + "grad_norm": 0.55859375, + "learning_rate": 0.00018078440021705708, + "loss": 1.1562, + "step": 19560 + }, + { + "epoch": 0.28, + "grad_norm": 0.6953125, + "learning_rate": 0.00018076964143929188, + "loss": 1.1116, + "step": 19565 + }, + { + "epoch": 0.28, + "grad_norm": 0.5546875, + "learning_rate": 0.00018075487759879353, + "loss": 0.9089, + "step": 19570 + }, + { + "epoch": 0.28, + "grad_norm": 0.66015625, + "learning_rate": 0.0001807401086964875, + "loss": 0.9719, + "step": 19575 + }, + { + "epoch": 0.28, + "grad_norm": 0.68359375, + "learning_rate": 0.00018072533473329952, + "loss": 1.1497, + "step": 19580 + }, + { + "epoch": 0.28, + "grad_norm": 0.50390625, + "learning_rate": 0.0001807105557101556, + "loss": 0.9319, + "step": 19585 + }, + { + "epoch": 0.28, + "grad_norm": 0.7109375, + "learning_rate": 0.0001806957716279821, + "loss": 0.8869, + "step": 19590 + }, + { + "epoch": 0.28, + "grad_norm": 0.59375, + "learning_rate": 0.00018068098248770576, + "loss": 0.9751, + "step": 19595 + }, + { + "epoch": 0.28, + "grad_norm": 0.56640625, + "learning_rate": 0.00018066618829025354, + "loss": 0.8765, + "step": 19600 + }, + { + "epoch": 0.28, + "grad_norm": 0.64453125, + "learning_rate": 0.0001806513890365528, + "loss": 1.1602, + "step": 19605 + }, + { + "epoch": 0.28, + "grad_norm": 0.578125, + "learning_rate": 0.0001806365847275311, + "loss": 0.9074, + "step": 19610 + }, + { + "epoch": 0.28, + "grad_norm": 0.59765625, + "learning_rate": 0.00018062177536411645, + "loss": 0.909, + "step": 19615 + }, + { + "epoch": 0.28, + "grad_norm": 0.5078125, + "learning_rate": 0.00018060696094723708, + "loss": 0.8727, + "step": 19620 + }, + { + "epoch": 0.28, + "grad_norm": 0.55078125, + "learning_rate": 0.00018059214147782163, + "loss": 1.0834, + "step": 19625 + }, + { + "epoch": 0.28, + "grad_norm": 0.609375, + "learning_rate": 0.00018057731695679893, + "loss": 0.9149, + "step": 19630 + }, + { + "epoch": 0.28, + "grad_norm": 0.5546875, + "learning_rate": 0.00018056248738509826, + "loss": 1.0154, + "step": 19635 + }, + { + "epoch": 0.28, + "grad_norm": 0.515625, + "learning_rate": 0.0001805476527636491, + "loss": 1.0771, + "step": 19640 + }, + { + "epoch": 0.28, + "grad_norm": 0.57421875, + "learning_rate": 0.00018053281309338135, + "loss": 1.0866, + "step": 19645 + }, + { + "epoch": 0.28, + "grad_norm": 0.6171875, + "learning_rate": 0.00018051796837522516, + "loss": 0.9696, + "step": 19650 + }, + { + "epoch": 0.28, + "grad_norm": 0.515625, + "learning_rate": 0.000180503118610111, + "loss": 0.7969, + "step": 19655 + }, + { + "epoch": 0.28, + "grad_norm": 0.578125, + "learning_rate": 0.00018048826379896967, + "loss": 0.9516, + "step": 19660 + }, + { + "epoch": 0.28, + "grad_norm": 0.57421875, + "learning_rate": 0.00018047340394273232, + "loss": 0.9688, + "step": 19665 + }, + { + "epoch": 0.28, + "grad_norm": 0.515625, + "learning_rate": 0.00018045853904233034, + "loss": 1.0596, + "step": 19670 + }, + { + "epoch": 0.28, + "grad_norm": 0.53125, + "learning_rate": 0.00018044366909869552, + "loss": 0.9945, + "step": 19675 + }, + { + "epoch": 0.28, + "grad_norm": 0.62890625, + "learning_rate": 0.00018042879411275987, + "loss": 1.033, + "step": 19680 + }, + { + "epoch": 0.28, + "grad_norm": 0.640625, + "learning_rate": 0.00018041391408545586, + "loss": 0.9772, + "step": 19685 + }, + { + "epoch": 0.28, + "grad_norm": 0.59765625, + "learning_rate": 0.00018039902901771608, + "loss": 1.0647, + "step": 19690 + }, + { + "epoch": 0.28, + "grad_norm": 0.58203125, + "learning_rate": 0.00018038413891047358, + "loss": 0.9532, + "step": 19695 + }, + { + "epoch": 0.28, + "grad_norm": 0.625, + "learning_rate": 0.00018036924376466174, + "loss": 0.801, + "step": 19700 + }, + { + "epoch": 0.28, + "grad_norm": 0.5234375, + "learning_rate": 0.00018035434358121418, + "loss": 0.9809, + "step": 19705 + }, + { + "epoch": 0.28, + "grad_norm": 0.59765625, + "learning_rate": 0.00018033943836106482, + "loss": 1.0384, + "step": 19710 + }, + { + "epoch": 0.28, + "grad_norm": 0.6015625, + "learning_rate": 0.00018032452810514798, + "loss": 0.9302, + "step": 19715 + }, + { + "epoch": 0.28, + "grad_norm": 0.54296875, + "learning_rate": 0.0001803096128143982, + "loss": 1.0039, + "step": 19720 + }, + { + "epoch": 0.28, + "grad_norm": 0.6328125, + "learning_rate": 0.00018029469248975047, + "loss": 1.1232, + "step": 19725 + }, + { + "epoch": 0.28, + "grad_norm": 0.5859375, + "learning_rate": 0.00018027976713213994, + "loss": 0.9102, + "step": 19730 + }, + { + "epoch": 0.28, + "grad_norm": 0.5703125, + "learning_rate": 0.0001802648367425022, + "loss": 1.1901, + "step": 19735 + }, + { + "epoch": 0.28, + "grad_norm": 0.6171875, + "learning_rate": 0.00018024990132177305, + "loss": 0.8535, + "step": 19740 + }, + { + "epoch": 0.28, + "grad_norm": 0.5234375, + "learning_rate": 0.00018023496087088872, + "loss": 0.9464, + "step": 19745 + }, + { + "epoch": 0.28, + "grad_norm": 0.67578125, + "learning_rate": 0.00018022001539078563, + "loss": 1.0605, + "step": 19750 + }, + { + "epoch": 0.28, + "grad_norm": 0.494140625, + "learning_rate": 0.00018020506488240065, + "loss": 0.892, + "step": 19755 + }, + { + "epoch": 0.28, + "grad_norm": 0.50390625, + "learning_rate": 0.00018019010934667082, + "loss": 0.8213, + "step": 19760 + }, + { + "epoch": 0.28, + "grad_norm": 0.546875, + "learning_rate": 0.00018017514878453363, + "loss": 0.9154, + "step": 19765 + }, + { + "epoch": 0.28, + "grad_norm": 0.71484375, + "learning_rate": 0.0001801601831969268, + "loss": 1.0438, + "step": 19770 + }, + { + "epoch": 0.28, + "grad_norm": 0.67578125, + "learning_rate": 0.00018014521258478839, + "loss": 0.9831, + "step": 19775 + }, + { + "epoch": 0.28, + "grad_norm": 0.63671875, + "learning_rate": 0.00018013023694905678, + "loss": 1.0661, + "step": 19780 + }, + { + "epoch": 0.28, + "grad_norm": 0.609375, + "learning_rate": 0.00018011525629067063, + "loss": 0.8326, + "step": 19785 + }, + { + "epoch": 0.28, + "grad_norm": 0.546875, + "learning_rate": 0.000180100270610569, + "loss": 1.0475, + "step": 19790 + }, + { + "epoch": 0.28, + "grad_norm": 0.59375, + "learning_rate": 0.00018008527990969118, + "loss": 0.9246, + "step": 19795 + }, + { + "epoch": 0.28, + "grad_norm": 0.56640625, + "learning_rate": 0.0001800702841889768, + "loss": 1.0556, + "step": 19800 + }, + { + "epoch": 0.28, + "grad_norm": 0.578125, + "learning_rate": 0.00018005528344936582, + "loss": 1.0302, + "step": 19805 + }, + { + "epoch": 0.28, + "grad_norm": 0.60546875, + "learning_rate": 0.0001800402776917985, + "loss": 1.0522, + "step": 19810 + }, + { + "epoch": 0.28, + "grad_norm": 0.546875, + "learning_rate": 0.0001800252669172154, + "loss": 0.9603, + "step": 19815 + }, + { + "epoch": 0.28, + "grad_norm": 0.59375, + "learning_rate": 0.00018001025112655743, + "loss": 1.0379, + "step": 19820 + }, + { + "epoch": 0.28, + "grad_norm": 0.65234375, + "learning_rate": 0.0001799952303207658, + "loss": 0.9, + "step": 19825 + }, + { + "epoch": 0.28, + "grad_norm": 0.66796875, + "learning_rate": 0.00017998020450078203, + "loss": 0.9419, + "step": 19830 + }, + { + "epoch": 0.28, + "grad_norm": 0.6171875, + "learning_rate": 0.00017996517366754798, + "loss": 0.8759, + "step": 19835 + }, + { + "epoch": 0.28, + "grad_norm": 0.69140625, + "learning_rate": 0.00017995013782200574, + "loss": 1.1232, + "step": 19840 + }, + { + "epoch": 0.28, + "grad_norm": 0.5625, + "learning_rate": 0.0001799350969650978, + "loss": 0.8869, + "step": 19845 + }, + { + "epoch": 0.28, + "grad_norm": 0.62890625, + "learning_rate": 0.00017992005109776694, + "loss": 0.9544, + "step": 19850 + }, + { + "epoch": 0.28, + "grad_norm": 0.52734375, + "learning_rate": 0.0001799050002209563, + "loss": 0.8654, + "step": 19855 + }, + { + "epoch": 0.28, + "grad_norm": 0.60546875, + "learning_rate": 0.0001798899443356092, + "loss": 0.879, + "step": 19860 + }, + { + "epoch": 0.28, + "grad_norm": 0.51953125, + "learning_rate": 0.00017987488344266942, + "loss": 1.0349, + "step": 19865 + }, + { + "epoch": 0.29, + "grad_norm": 0.54296875, + "learning_rate": 0.00017985981754308096, + "loss": 0.9102, + "step": 19870 + }, + { + "epoch": 0.29, + "grad_norm": 0.5390625, + "learning_rate": 0.0001798447466377882, + "loss": 0.8759, + "step": 19875 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.00017982967072773578, + "loss": 1.0562, + "step": 19880 + }, + { + "epoch": 0.29, + "grad_norm": 0.5078125, + "learning_rate": 0.00017981458981386868, + "loss": 0.9362, + "step": 19885 + }, + { + "epoch": 0.29, + "grad_norm": 0.5546875, + "learning_rate": 0.00017979950389713218, + "loss": 0.9396, + "step": 19890 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.0001797844129784719, + "loss": 1.1549, + "step": 19895 + }, + { + "epoch": 0.29, + "grad_norm": 0.5859375, + "learning_rate": 0.00017976931705883376, + "loss": 1.0361, + "step": 19900 + }, + { + "epoch": 0.29, + "grad_norm": 0.55078125, + "learning_rate": 0.00017975421613916395, + "loss": 0.9686, + "step": 19905 + }, + { + "epoch": 0.29, + "grad_norm": 0.53125, + "learning_rate": 0.00017973911022040905, + "loss": 0.9509, + "step": 19910 + }, + { + "epoch": 0.29, + "grad_norm": 0.59765625, + "learning_rate": 0.00017972399930351593, + "loss": 1.1096, + "step": 19915 + }, + { + "epoch": 0.29, + "grad_norm": 0.546875, + "learning_rate": 0.00017970888338943172, + "loss": 1.0179, + "step": 19920 + }, + { + "epoch": 0.29, + "grad_norm": 0.79296875, + "learning_rate": 0.00017969376247910392, + "loss": 1.0277, + "step": 19925 + }, + { + "epoch": 0.29, + "grad_norm": 0.546875, + "learning_rate": 0.0001796786365734803, + "loss": 0.891, + "step": 19930 + }, + { + "epoch": 0.29, + "grad_norm": 0.66015625, + "learning_rate": 0.00017966350567350902, + "loss": 0.9891, + "step": 19935 + }, + { + "epoch": 0.29, + "grad_norm": 0.6875, + "learning_rate": 0.00017964836978013845, + "loss": 1.0103, + "step": 19940 + }, + { + "epoch": 0.29, + "grad_norm": 0.55859375, + "learning_rate": 0.0001796332288943174, + "loss": 1.0574, + "step": 19945 + }, + { + "epoch": 0.29, + "grad_norm": 0.6015625, + "learning_rate": 0.00017961808301699482, + "loss": 0.9753, + "step": 19950 + }, + { + "epoch": 0.29, + "grad_norm": 0.83984375, + "learning_rate": 0.0001796029321491201, + "loss": 1.0732, + "step": 19955 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.00017958777629164301, + "loss": 0.888, + "step": 19960 + }, + { + "epoch": 0.29, + "grad_norm": 0.56640625, + "learning_rate": 0.00017957261544551342, + "loss": 0.9886, + "step": 19965 + }, + { + "epoch": 0.29, + "grad_norm": 0.578125, + "learning_rate": 0.00017955744961168163, + "loss": 0.9075, + "step": 19970 + }, + { + "epoch": 0.29, + "grad_norm": 0.5390625, + "learning_rate": 0.00017954227879109834, + "loss": 0.9966, + "step": 19975 + }, + { + "epoch": 0.29, + "grad_norm": 0.5859375, + "learning_rate": 0.00017952710298471442, + "loss": 1.0853, + "step": 19980 + }, + { + "epoch": 0.29, + "grad_norm": 0.54296875, + "learning_rate": 0.0001795119221934811, + "loss": 0.9075, + "step": 19985 + }, + { + "epoch": 0.29, + "grad_norm": 0.65234375, + "learning_rate": 0.00017949673641834993, + "loss": 0.956, + "step": 19990 + }, + { + "epoch": 0.29, + "grad_norm": 0.54296875, + "learning_rate": 0.0001794815456602728, + "loss": 0.9857, + "step": 19995 + }, + { + "epoch": 0.29, + "grad_norm": 0.52734375, + "learning_rate": 0.00017946634992020187, + "loss": 1.0116, + "step": 20000 + }, + { + "epoch": 0.29, + "grad_norm": 0.5546875, + "learning_rate": 0.00017945114919908962, + "loss": 1.0116, + "step": 20005 + }, + { + "epoch": 0.29, + "grad_norm": 0.6484375, + "learning_rate": 0.00017943594349788882, + "loss": 1.0758, + "step": 20010 + }, + { + "epoch": 0.29, + "grad_norm": 0.64453125, + "learning_rate": 0.00017942073281755264, + "loss": 0.9998, + "step": 20015 + }, + { + "epoch": 0.29, + "grad_norm": 0.58984375, + "learning_rate": 0.00017940551715903448, + "loss": 1.0343, + "step": 20020 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.00017939029652328805, + "loss": 1.0741, + "step": 20025 + }, + { + "epoch": 0.29, + "grad_norm": 0.6328125, + "learning_rate": 0.00017937507091126743, + "loss": 1.0459, + "step": 20030 + }, + { + "epoch": 0.29, + "grad_norm": 0.59765625, + "learning_rate": 0.00017935984032392695, + "loss": 0.9559, + "step": 20035 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.0001793446047622213, + "loss": 0.8793, + "step": 20040 + }, + { + "epoch": 0.29, + "grad_norm": 0.53515625, + "learning_rate": 0.00017932936422710547, + "loss": 0.888, + "step": 20045 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.00017931411871953474, + "loss": 0.8765, + "step": 20050 + }, + { + "epoch": 0.29, + "grad_norm": 0.57421875, + "learning_rate": 0.0001792988682404647, + "loss": 1.0438, + "step": 20055 + }, + { + "epoch": 0.29, + "grad_norm": 0.6171875, + "learning_rate": 0.00017928361279085132, + "loss": 1.1002, + "step": 20060 + }, + { + "epoch": 0.29, + "grad_norm": 0.53125, + "learning_rate": 0.00017926835237165074, + "loss": 1.0579, + "step": 20065 + }, + { + "epoch": 0.29, + "grad_norm": 0.59765625, + "learning_rate": 0.0001792530869838196, + "loss": 1.0192, + "step": 20070 + }, + { + "epoch": 0.29, + "grad_norm": 0.64453125, + "learning_rate": 0.00017923781662831467, + "loss": 1.0652, + "step": 20075 + }, + { + "epoch": 0.29, + "grad_norm": 0.5703125, + "learning_rate": 0.00017922254130609317, + "loss": 0.8573, + "step": 20080 + }, + { + "epoch": 0.29, + "grad_norm": 0.51953125, + "learning_rate": 0.00017920726101811255, + "loss": 0.848, + "step": 20085 + }, + { + "epoch": 0.29, + "grad_norm": 0.56640625, + "learning_rate": 0.0001791919757653306, + "loss": 0.8645, + "step": 20090 + }, + { + "epoch": 0.29, + "grad_norm": 0.515625, + "learning_rate": 0.00017917668554870544, + "loss": 1.0636, + "step": 20095 + }, + { + "epoch": 0.29, + "grad_norm": 0.5390625, + "learning_rate": 0.00017916139036919544, + "loss": 0.9071, + "step": 20100 + }, + { + "epoch": 0.29, + "grad_norm": 0.56640625, + "learning_rate": 0.0001791460902277593, + "loss": 0.9749, + "step": 20105 + }, + { + "epoch": 0.29, + "grad_norm": 0.5546875, + "learning_rate": 0.00017913078512535611, + "loss": 0.9368, + "step": 20110 + }, + { + "epoch": 0.29, + "grad_norm": 0.55859375, + "learning_rate": 0.0001791154750629452, + "loss": 0.8993, + "step": 20115 + }, + { + "epoch": 0.29, + "grad_norm": 0.59765625, + "learning_rate": 0.0001791001600414862, + "loss": 0.8609, + "step": 20120 + }, + { + "epoch": 0.29, + "grad_norm": 0.48046875, + "learning_rate": 0.0001790848400619391, + "loss": 1.0746, + "step": 20125 + }, + { + "epoch": 0.29, + "grad_norm": 0.54296875, + "learning_rate": 0.00017906951512526413, + "loss": 1.0527, + "step": 20130 + }, + { + "epoch": 0.29, + "grad_norm": 0.62890625, + "learning_rate": 0.0001790541852324219, + "loss": 0.9294, + "step": 20135 + }, + { + "epoch": 0.29, + "grad_norm": 0.51953125, + "learning_rate": 0.00017903885038437331, + "loss": 0.8542, + "step": 20140 + }, + { + "epoch": 0.29, + "grad_norm": 0.53125, + "learning_rate": 0.00017902351058207957, + "loss": 0.9395, + "step": 20145 + }, + { + "epoch": 0.29, + "grad_norm": 0.60546875, + "learning_rate": 0.00017900816582650214, + "loss": 1.0316, + "step": 20150 + }, + { + "epoch": 0.29, + "grad_norm": 0.55859375, + "learning_rate": 0.00017899281611860295, + "loss": 0.8784, + "step": 20155 + }, + { + "epoch": 0.29, + "grad_norm": 0.62890625, + "learning_rate": 0.00017897746145934408, + "loss": 0.9608, + "step": 20160 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.00017896210184968793, + "loss": 1.044, + "step": 20165 + }, + { + "epoch": 0.29, + "grad_norm": 0.57421875, + "learning_rate": 0.00017894673729059734, + "loss": 0.8963, + "step": 20170 + }, + { + "epoch": 0.29, + "grad_norm": 0.515625, + "learning_rate": 0.00017893136778303532, + "loss": 1.0971, + "step": 20175 + }, + { + "epoch": 0.29, + "grad_norm": 0.56640625, + "learning_rate": 0.0001789159933279653, + "loss": 0.9522, + "step": 20180 + }, + { + "epoch": 0.29, + "grad_norm": 0.5390625, + "learning_rate": 0.00017890061392635093, + "loss": 0.8735, + "step": 20185 + }, + { + "epoch": 0.29, + "grad_norm": 0.478515625, + "learning_rate": 0.00017888522957915624, + "loss": 0.9023, + "step": 20190 + }, + { + "epoch": 0.29, + "grad_norm": 0.61328125, + "learning_rate": 0.00017886984028734547, + "loss": 0.8952, + "step": 20195 + }, + { + "epoch": 0.29, + "grad_norm": 0.5390625, + "learning_rate": 0.0001788544460518833, + "loss": 0.8191, + "step": 20200 + }, + { + "epoch": 0.29, + "grad_norm": 0.58984375, + "learning_rate": 0.00017883904687373466, + "loss": 0.9671, + "step": 20205 + }, + { + "epoch": 0.29, + "grad_norm": 0.6171875, + "learning_rate": 0.00017882364275386477, + "loss": 1.1059, + "step": 20210 + }, + { + "epoch": 0.29, + "grad_norm": 0.59375, + "learning_rate": 0.00017880823369323918, + "loss": 0.9204, + "step": 20215 + }, + { + "epoch": 0.29, + "grad_norm": 0.63671875, + "learning_rate": 0.00017879281969282373, + "loss": 0.9978, + "step": 20220 + }, + { + "epoch": 0.29, + "grad_norm": 0.53125, + "learning_rate": 0.00017877740075358461, + "loss": 1.0445, + "step": 20225 + }, + { + "epoch": 0.29, + "grad_norm": 0.7578125, + "learning_rate": 0.00017876197687648833, + "loss": 1.1439, + "step": 20230 + }, + { + "epoch": 0.29, + "grad_norm": 0.55078125, + "learning_rate": 0.0001787465480625016, + "loss": 0.9755, + "step": 20235 + }, + { + "epoch": 0.29, + "grad_norm": 0.54296875, + "learning_rate": 0.00017873111431259154, + "loss": 0.947, + "step": 20240 + }, + { + "epoch": 0.29, + "grad_norm": 0.546875, + "learning_rate": 0.00017871567562772559, + "loss": 0.9988, + "step": 20245 + }, + { + "epoch": 0.29, + "grad_norm": 0.51953125, + "learning_rate": 0.00017870023200887143, + "loss": 1.0291, + "step": 20250 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.0001786847834569971, + "loss": 0.8948, + "step": 20255 + }, + { + "epoch": 0.29, + "grad_norm": 0.67578125, + "learning_rate": 0.00017866932997307093, + "loss": 0.9763, + "step": 20260 + }, + { + "epoch": 0.29, + "grad_norm": 0.5703125, + "learning_rate": 0.00017865387155806156, + "loss": 0.9449, + "step": 20265 + }, + { + "epoch": 0.29, + "grad_norm": 0.66796875, + "learning_rate": 0.00017863840821293793, + "loss": 0.9888, + "step": 20270 + }, + { + "epoch": 0.29, + "grad_norm": 0.55078125, + "learning_rate": 0.00017862293993866935, + "loss": 0.9415, + "step": 20275 + }, + { + "epoch": 0.29, + "grad_norm": 0.58984375, + "learning_rate": 0.00017860746673622533, + "loss": 1.0488, + "step": 20280 + }, + { + "epoch": 0.29, + "grad_norm": 0.6875, + "learning_rate": 0.00017859198860657575, + "loss": 0.9914, + "step": 20285 + }, + { + "epoch": 0.29, + "grad_norm": 0.51171875, + "learning_rate": 0.0001785765055506908, + "loss": 0.9855, + "step": 20290 + }, + { + "epoch": 0.29, + "grad_norm": 0.68359375, + "learning_rate": 0.00017856101756954105, + "loss": 1.1754, + "step": 20295 + }, + { + "epoch": 0.29, + "grad_norm": 0.54296875, + "learning_rate": 0.00017854552466409722, + "loss": 0.9697, + "step": 20300 + }, + { + "epoch": 0.29, + "grad_norm": 0.60546875, + "learning_rate": 0.00017853002683533044, + "loss": 0.9202, + "step": 20305 + }, + { + "epoch": 0.29, + "grad_norm": 0.55078125, + "learning_rate": 0.00017851452408421215, + "loss": 0.981, + "step": 20310 + }, + { + "epoch": 0.29, + "grad_norm": 0.578125, + "learning_rate": 0.00017849901641171404, + "loss": 0.9659, + "step": 20315 + }, + { + "epoch": 0.29, + "grad_norm": 0.5546875, + "learning_rate": 0.00017848350381880824, + "loss": 1.0127, + "step": 20320 + }, + { + "epoch": 0.29, + "grad_norm": 0.51953125, + "learning_rate": 0.00017846798630646697, + "loss": 0.8861, + "step": 20325 + }, + { + "epoch": 0.29, + "grad_norm": 0.84765625, + "learning_rate": 0.00017845246387566296, + "loss": 0.9737, + "step": 20330 + }, + { + "epoch": 0.29, + "grad_norm": 0.5703125, + "learning_rate": 0.00017843693652736922, + "loss": 0.9233, + "step": 20335 + }, + { + "epoch": 0.29, + "grad_norm": 0.55859375, + "learning_rate": 0.0001784214042625589, + "loss": 0.8343, + "step": 20340 + }, + { + "epoch": 0.29, + "grad_norm": 0.5546875, + "learning_rate": 0.00017840586708220567, + "loss": 0.9862, + "step": 20345 + }, + { + "epoch": 0.29, + "grad_norm": 0.56640625, + "learning_rate": 0.0001783903249872834, + "loss": 0.9632, + "step": 20350 + }, + { + "epoch": 0.29, + "grad_norm": 0.55078125, + "learning_rate": 0.00017837477797876629, + "loss": 1.0639, + "step": 20355 + }, + { + "epoch": 0.29, + "grad_norm": 0.53125, + "learning_rate": 0.00017835922605762884, + "loss": 0.9325, + "step": 20360 + }, + { + "epoch": 0.29, + "grad_norm": 0.5546875, + "learning_rate": 0.0001783436692248458, + "loss": 0.8678, + "step": 20365 + }, + { + "epoch": 0.29, + "grad_norm": 0.4921875, + "learning_rate": 0.00017832810748139242, + "loss": 1.0376, + "step": 20370 + }, + { + "epoch": 0.29, + "grad_norm": 0.56640625, + "learning_rate": 0.00017831254082824403, + "loss": 0.9049, + "step": 20375 + }, + { + "epoch": 0.29, + "grad_norm": 0.515625, + "learning_rate": 0.00017829696926637638, + "loss": 0.9619, + "step": 20380 + }, + { + "epoch": 0.29, + "grad_norm": 0.56640625, + "learning_rate": 0.00017828139279676551, + "loss": 0.9297, + "step": 20385 + }, + { + "epoch": 0.29, + "grad_norm": 0.57421875, + "learning_rate": 0.00017826581142038782, + "loss": 1.0391, + "step": 20390 + }, + { + "epoch": 0.29, + "grad_norm": 0.59375, + "learning_rate": 0.0001782502251382199, + "loss": 1.0339, + "step": 20395 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.00017823463395123877, + "loss": 1.0506, + "step": 20400 + }, + { + "epoch": 0.29, + "grad_norm": 0.515625, + "learning_rate": 0.00017821903786042166, + "loss": 1.0093, + "step": 20405 + }, + { + "epoch": 0.29, + "grad_norm": 0.578125, + "learning_rate": 0.0001782034368667462, + "loss": 0.8458, + "step": 20410 + }, + { + "epoch": 0.29, + "grad_norm": 0.578125, + "learning_rate": 0.00017818783097119022, + "loss": 1.141, + "step": 20415 + }, + { + "epoch": 0.29, + "grad_norm": 0.59375, + "learning_rate": 0.00017817222017473198, + "loss": 0.891, + "step": 20420 + }, + { + "epoch": 0.29, + "grad_norm": 0.5390625, + "learning_rate": 0.00017815660447834995, + "loss": 0.838, + "step": 20425 + }, + { + "epoch": 0.29, + "grad_norm": 0.59375, + "learning_rate": 0.0001781409838830229, + "loss": 0.9979, + "step": 20430 + }, + { + "epoch": 0.29, + "grad_norm": 0.5625, + "learning_rate": 0.00017812535838973004, + "loss": 1.0084, + "step": 20435 + }, + { + "epoch": 0.29, + "grad_norm": 0.55859375, + "learning_rate": 0.0001781097279994507, + "loss": 0.9987, + "step": 20440 + }, + { + "epoch": 0.29, + "grad_norm": 0.5703125, + "learning_rate": 0.00017809409271316465, + "loss": 1.0072, + "step": 20445 + }, + { + "epoch": 0.29, + "grad_norm": 0.578125, + "learning_rate": 0.00017807845253185194, + "loss": 0.9746, + "step": 20450 + }, + { + "epoch": 0.29, + "grad_norm": 0.6015625, + "learning_rate": 0.00017806280745649288, + "loss": 1.0014, + "step": 20455 + }, + { + "epoch": 0.29, + "grad_norm": 0.6171875, + "learning_rate": 0.00017804715748806818, + "loss": 1.0265, + "step": 20460 + }, + { + "epoch": 0.29, + "grad_norm": 0.5390625, + "learning_rate": 0.00017803150262755873, + "loss": 0.874, + "step": 20465 + }, + { + "epoch": 0.29, + "grad_norm": 0.5546875, + "learning_rate": 0.00017801584287594583, + "loss": 0.8944, + "step": 20470 + }, + { + "epoch": 0.29, + "grad_norm": 0.458984375, + "learning_rate": 0.00017800017823421102, + "loss": 0.9781, + "step": 20475 + }, + { + "epoch": 0.29, + "grad_norm": 0.65234375, + "learning_rate": 0.00017798450870333625, + "loss": 0.9153, + "step": 20480 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.00017796883428430364, + "loss": 1.0358, + "step": 20485 + }, + { + "epoch": 0.29, + "grad_norm": 0.5390625, + "learning_rate": 0.00017795315497809571, + "loss": 1.0432, + "step": 20490 + }, + { + "epoch": 0.29, + "grad_norm": 0.5625, + "learning_rate": 0.00017793747078569523, + "loss": 0.943, + "step": 20495 + }, + { + "epoch": 0.29, + "grad_norm": 0.51171875, + "learning_rate": 0.0001779217817080853, + "loss": 0.8886, + "step": 20500 + }, + { + "epoch": 0.29, + "grad_norm": 0.54296875, + "learning_rate": 0.00017790608774624937, + "loss": 0.8394, + "step": 20505 + }, + { + "epoch": 0.29, + "grad_norm": 0.53515625, + "learning_rate": 0.00017789038890117113, + "loss": 0.9902, + "step": 20510 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.00017787468517383462, + "loss": 0.9307, + "step": 20515 + }, + { + "epoch": 0.29, + "grad_norm": 0.494140625, + "learning_rate": 0.00017785897656522412, + "loss": 0.833, + "step": 20520 + }, + { + "epoch": 0.29, + "grad_norm": 0.54296875, + "learning_rate": 0.0001778432630763243, + "loss": 0.8931, + "step": 20525 + }, + { + "epoch": 0.29, + "grad_norm": 0.609375, + "learning_rate": 0.00017782754470812014, + "loss": 0.8259, + "step": 20530 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.0001778118214615968, + "loss": 0.9197, + "step": 20535 + }, + { + "epoch": 0.29, + "grad_norm": 0.5625, + "learning_rate": 0.00017779609333773988, + "loss": 0.7937, + "step": 20540 + }, + { + "epoch": 0.29, + "grad_norm": 0.58203125, + "learning_rate": 0.0001777803603375352, + "loss": 0.959, + "step": 20545 + }, + { + "epoch": 0.29, + "grad_norm": 0.5, + "learning_rate": 0.000177764622461969, + "loss": 0.7378, + "step": 20550 + }, + { + "epoch": 0.29, + "grad_norm": 0.6328125, + "learning_rate": 0.00017774887971202765, + "loss": 0.9992, + "step": 20555 + }, + { + "epoch": 0.29, + "grad_norm": 0.60546875, + "learning_rate": 0.00017773313208869799, + "loss": 1.0006, + "step": 20560 + }, + { + "epoch": 0.29, + "grad_norm": 0.5625, + "learning_rate": 0.00017771737959296706, + "loss": 0.8735, + "step": 20565 + }, + { + "epoch": 0.3, + "grad_norm": 0.5390625, + "learning_rate": 0.0001777016222258223, + "loss": 0.8964, + "step": 20570 + }, + { + "epoch": 0.3, + "grad_norm": 0.56640625, + "learning_rate": 0.00017768585998825134, + "loss": 1.0407, + "step": 20575 + }, + { + "epoch": 0.3, + "grad_norm": 0.53515625, + "learning_rate": 0.00017767009288124222, + "loss": 0.9184, + "step": 20580 + }, + { + "epoch": 0.3, + "grad_norm": 0.52734375, + "learning_rate": 0.0001776543209057832, + "loss": 0.9547, + "step": 20585 + }, + { + "epoch": 0.3, + "grad_norm": 0.53515625, + "learning_rate": 0.0001776385440628629, + "loss": 0.994, + "step": 20590 + }, + { + "epoch": 0.3, + "grad_norm": 0.57421875, + "learning_rate": 0.00017762276235347026, + "loss": 0.8365, + "step": 20595 + }, + { + "epoch": 0.3, + "grad_norm": 0.6328125, + "learning_rate": 0.00017760697577859447, + "loss": 0.9293, + "step": 20600 + }, + { + "epoch": 0.3, + "grad_norm": 0.58203125, + "learning_rate": 0.000177591184339225, + "loss": 1.171, + "step": 20605 + }, + { + "epoch": 0.3, + "grad_norm": 0.5859375, + "learning_rate": 0.0001775753880363518, + "loss": 1.011, + "step": 20610 + }, + { + "epoch": 0.3, + "grad_norm": 0.55078125, + "learning_rate": 0.00017755958687096487, + "loss": 0.9621, + "step": 20615 + }, + { + "epoch": 0.3, + "grad_norm": 0.68359375, + "learning_rate": 0.00017754378084405473, + "loss": 1.1206, + "step": 20620 + }, + { + "epoch": 0.3, + "grad_norm": 0.59375, + "learning_rate": 0.0001775279699566121, + "loss": 0.9734, + "step": 20625 + }, + { + "epoch": 0.3, + "grad_norm": 0.55078125, + "learning_rate": 0.00017751215420962798, + "loss": 0.9819, + "step": 20630 + }, + { + "epoch": 0.3, + "grad_norm": 0.55078125, + "learning_rate": 0.00017749633360409378, + "loss": 1.086, + "step": 20635 + }, + { + "epoch": 0.3, + "grad_norm": 0.5859375, + "learning_rate": 0.00017748050814100113, + "loss": 1.0491, + "step": 20640 + }, + { + "epoch": 0.3, + "grad_norm": 0.53125, + "learning_rate": 0.000177464677821342, + "loss": 0.869, + "step": 20645 + }, + { + "epoch": 0.3, + "grad_norm": 0.58203125, + "learning_rate": 0.00017744884264610865, + "loss": 1.0623, + "step": 20650 + }, + { + "epoch": 0.3, + "grad_norm": 0.62890625, + "learning_rate": 0.00017743300261629358, + "loss": 0.8689, + "step": 20655 + }, + { + "epoch": 0.3, + "grad_norm": 0.58984375, + "learning_rate": 0.00017741715773288976, + "loss": 1.1893, + "step": 20660 + }, + { + "epoch": 0.3, + "grad_norm": 0.578125, + "learning_rate": 0.0001774013079968903, + "loss": 1.0839, + "step": 20665 + }, + { + "epoch": 0.3, + "grad_norm": 0.61328125, + "learning_rate": 0.0001773854534092887, + "loss": 0.8849, + "step": 20670 + }, + { + "epoch": 0.3, + "grad_norm": 0.5546875, + "learning_rate": 0.00017736959397107879, + "loss": 1.0321, + "step": 20675 + }, + { + "epoch": 0.3, + "grad_norm": 0.5859375, + "learning_rate": 0.00017735372968325454, + "loss": 0.9963, + "step": 20680 + }, + { + "epoch": 0.3, + "grad_norm": 0.5234375, + "learning_rate": 0.00017733786054681047, + "loss": 1.074, + "step": 20685 + }, + { + "epoch": 0.3, + "grad_norm": 0.5546875, + "learning_rate": 0.00017732198656274123, + "loss": 1.1176, + "step": 20690 + }, + { + "epoch": 0.3, + "grad_norm": 0.57421875, + "learning_rate": 0.00017730610773204175, + "loss": 0.9677, + "step": 20695 + }, + { + "epoch": 0.3, + "grad_norm": 0.56640625, + "learning_rate": 0.00017729022405570745, + "loss": 1.0464, + "step": 20700 + }, + { + "epoch": 0.3, + "grad_norm": 0.53125, + "learning_rate": 0.00017727433553473384, + "loss": 1.0147, + "step": 20705 + }, + { + "epoch": 0.3, + "grad_norm": 0.5625, + "learning_rate": 0.0001772584421701169, + "loss": 0.9867, + "step": 20710 + }, + { + "epoch": 0.3, + "grad_norm": 0.55078125, + "learning_rate": 0.0001772425439628528, + "loss": 0.8194, + "step": 20715 + }, + { + "epoch": 0.3, + "grad_norm": 0.6640625, + "learning_rate": 0.00017722664091393808, + "loss": 0.9378, + "step": 20720 + }, + { + "epoch": 0.3, + "grad_norm": 0.5546875, + "learning_rate": 0.00017721073302436955, + "loss": 0.9764, + "step": 20725 + }, + { + "epoch": 0.3, + "grad_norm": 0.55859375, + "learning_rate": 0.00017719482029514435, + "loss": 0.9781, + "step": 20730 + }, + { + "epoch": 0.3, + "grad_norm": 0.478515625, + "learning_rate": 0.00017717890272725986, + "loss": 0.9302, + "step": 20735 + }, + { + "epoch": 0.3, + "grad_norm": 0.61328125, + "learning_rate": 0.0001771629803217139, + "loss": 0.9262, + "step": 20740 + }, + { + "epoch": 0.3, + "grad_norm": 0.609375, + "learning_rate": 0.0001771470530795044, + "loss": 0.8817, + "step": 20745 + }, + { + "epoch": 0.3, + "grad_norm": 0.57421875, + "learning_rate": 0.00017713112100162977, + "loss": 0.9105, + "step": 20750 + }, + { + "epoch": 0.3, + "grad_norm": 0.6328125, + "learning_rate": 0.00017711518408908866, + "loss": 0.8789, + "step": 20755 + }, + { + "epoch": 0.3, + "grad_norm": 0.63671875, + "learning_rate": 0.00017709924234287993, + "loss": 1.0325, + "step": 20760 + }, + { + "epoch": 0.3, + "grad_norm": 0.5234375, + "learning_rate": 0.00017708329576400294, + "loss": 1.0813, + "step": 20765 + }, + { + "epoch": 0.3, + "grad_norm": 0.57421875, + "learning_rate": 0.00017706734435345714, + "loss": 0.8091, + "step": 20770 + }, + { + "epoch": 0.3, + "grad_norm": 0.61328125, + "learning_rate": 0.00017705138811224241, + "loss": 0.9214, + "step": 20775 + }, + { + "epoch": 0.3, + "grad_norm": 0.56640625, + "learning_rate": 0.00017703542704135895, + "loss": 1.1351, + "step": 20780 + }, + { + "epoch": 0.3, + "grad_norm": 0.60546875, + "learning_rate": 0.0001770194611418072, + "loss": 0.9709, + "step": 20785 + }, + { + "epoch": 0.3, + "grad_norm": 0.578125, + "learning_rate": 0.0001770034904145879, + "loss": 1.0779, + "step": 20790 + }, + { + "epoch": 0.3, + "grad_norm": 0.5390625, + "learning_rate": 0.0001769875148607021, + "loss": 1.0441, + "step": 20795 + }, + { + "epoch": 0.3, + "grad_norm": 0.6015625, + "learning_rate": 0.0001769715344811512, + "loss": 1.1271, + "step": 20800 + }, + { + "epoch": 0.3, + "grad_norm": 0.6640625, + "learning_rate": 0.00017695554927693686, + "loss": 0.7982, + "step": 20805 + }, + { + "epoch": 0.3, + "grad_norm": 0.55859375, + "learning_rate": 0.00017693955924906102, + "loss": 0.9824, + "step": 20810 + }, + { + "epoch": 0.3, + "grad_norm": 0.54296875, + "learning_rate": 0.000176923564398526, + "loss": 1.0395, + "step": 20815 + }, + { + "epoch": 0.3, + "grad_norm": 0.59765625, + "learning_rate": 0.00017690756472633437, + "loss": 0.8858, + "step": 20820 + }, + { + "epoch": 0.3, + "grad_norm": 0.61328125, + "learning_rate": 0.00017689156023348898, + "loss": 1.0001, + "step": 20825 + }, + { + "epoch": 0.3, + "grad_norm": 0.58203125, + "learning_rate": 0.000176875550920993, + "loss": 0.9744, + "step": 20830 + }, + { + "epoch": 0.3, + "grad_norm": 0.466796875, + "learning_rate": 0.00017685953678984998, + "loss": 0.8278, + "step": 20835 + }, + { + "epoch": 0.3, + "grad_norm": 0.625, + "learning_rate": 0.0001768435178410636, + "loss": 0.8934, + "step": 20840 + }, + { + "epoch": 0.3, + "grad_norm": 0.5703125, + "learning_rate": 0.00017682749407563805, + "loss": 0.958, + "step": 20845 + }, + { + "epoch": 0.3, + "grad_norm": 0.7578125, + "learning_rate": 0.00017681146549457764, + "loss": 1.0769, + "step": 20850 + }, + { + "epoch": 0.3, + "grad_norm": 0.59375, + "learning_rate": 0.00017679543209888712, + "loss": 1.1344, + "step": 20855 + }, + { + "epoch": 0.3, + "grad_norm": 0.6015625, + "learning_rate": 0.00017677939388957144, + "loss": 0.9309, + "step": 20860 + }, + { + "epoch": 0.3, + "grad_norm": 0.70703125, + "learning_rate": 0.00017676335086763592, + "loss": 0.9748, + "step": 20865 + }, + { + "epoch": 0.3, + "grad_norm": 0.62890625, + "learning_rate": 0.00017674730303408613, + "loss": 1.0452, + "step": 20870 + }, + { + "epoch": 0.3, + "grad_norm": 0.5625, + "learning_rate": 0.00017673125038992796, + "loss": 1.0058, + "step": 20875 + }, + { + "epoch": 0.3, + "grad_norm": 0.55078125, + "learning_rate": 0.00017671519293616767, + "loss": 1.0156, + "step": 20880 + }, + { + "epoch": 0.3, + "grad_norm": 0.62890625, + "learning_rate": 0.00017669913067381167, + "loss": 0.8449, + "step": 20885 + }, + { + "epoch": 0.3, + "grad_norm": 0.498046875, + "learning_rate": 0.00017668306360386685, + "loss": 0.9541, + "step": 20890 + }, + { + "epoch": 0.3, + "grad_norm": 0.53515625, + "learning_rate": 0.00017666699172734022, + "loss": 0.931, + "step": 20895 + }, + { + "epoch": 0.3, + "grad_norm": 0.55859375, + "learning_rate": 0.00017665091504523927, + "loss": 0.9573, + "step": 20900 + }, + { + "epoch": 0.3, + "grad_norm": 0.546875, + "learning_rate": 0.00017663483355857164, + "loss": 0.9587, + "step": 20905 + }, + { + "epoch": 0.3, + "grad_norm": 0.53125, + "learning_rate": 0.0001766187472683454, + "loss": 1.0087, + "step": 20910 + }, + { + "epoch": 0.3, + "grad_norm": 0.66015625, + "learning_rate": 0.00017660265617556878, + "loss": 1.1384, + "step": 20915 + }, + { + "epoch": 0.3, + "grad_norm": 0.578125, + "learning_rate": 0.00017658656028125046, + "loss": 1.1095, + "step": 20920 + }, + { + "epoch": 0.3, + "grad_norm": 0.5546875, + "learning_rate": 0.00017657045958639932, + "loss": 1.0633, + "step": 20925 + }, + { + "epoch": 0.3, + "grad_norm": 0.5625, + "learning_rate": 0.00017655435409202452, + "loss": 1.0446, + "step": 20930 + }, + { + "epoch": 0.3, + "grad_norm": 0.66015625, + "learning_rate": 0.00017653824379913565, + "loss": 1.067, + "step": 20935 + }, + { + "epoch": 0.3, + "grad_norm": 0.546875, + "learning_rate": 0.0001765221287087425, + "loss": 1.037, + "step": 20940 + }, + { + "epoch": 0.3, + "grad_norm": 0.62109375, + "learning_rate": 0.00017650600882185513, + "loss": 0.9658, + "step": 20945 + }, + { + "epoch": 0.3, + "grad_norm": 0.59765625, + "learning_rate": 0.000176489884139484, + "loss": 0.9823, + "step": 20950 + }, + { + "epoch": 0.3, + "grad_norm": 0.5703125, + "learning_rate": 0.00017647375466263988, + "loss": 0.9067, + "step": 20955 + }, + { + "epoch": 0.3, + "grad_norm": 0.51953125, + "learning_rate": 0.00017645762039233368, + "loss": 0.9366, + "step": 20960 + }, + { + "epoch": 0.3, + "grad_norm": 0.5546875, + "learning_rate": 0.00017644148132957673, + "loss": 0.9275, + "step": 20965 + }, + { + "epoch": 0.3, + "grad_norm": 0.63671875, + "learning_rate": 0.0001764253374753807, + "loss": 0.8771, + "step": 20970 + }, + { + "epoch": 0.3, + "grad_norm": 0.64453125, + "learning_rate": 0.00017640918883075747, + "loss": 1.0067, + "step": 20975 + }, + { + "epoch": 0.3, + "grad_norm": 0.60546875, + "learning_rate": 0.00017639303539671928, + "loss": 1.0476, + "step": 20980 + }, + { + "epoch": 0.3, + "grad_norm": 0.4609375, + "learning_rate": 0.00017637687717427859, + "loss": 1.1025, + "step": 20985 + }, + { + "epoch": 0.3, + "grad_norm": 0.5859375, + "learning_rate": 0.00017636071416444828, + "loss": 1.0378, + "step": 20990 + }, + { + "epoch": 0.3, + "grad_norm": 0.52734375, + "learning_rate": 0.00017634454636824144, + "loss": 0.9369, + "step": 20995 + }, + { + "epoch": 0.3, + "grad_norm": 0.66015625, + "learning_rate": 0.0001763283737866715, + "loss": 1.0305, + "step": 21000 + }, + { + "epoch": 0.3, + "grad_norm": 0.70703125, + "learning_rate": 0.0001763121964207521, + "loss": 1.0911, + "step": 21005 + }, + { + "epoch": 0.3, + "grad_norm": 0.62109375, + "learning_rate": 0.00017629601427149736, + "loss": 0.963, + "step": 21010 + }, + { + "epoch": 0.3, + "grad_norm": 0.5703125, + "learning_rate": 0.00017627982733992153, + "loss": 0.9377, + "step": 21015 + }, + { + "epoch": 0.3, + "grad_norm": 0.55859375, + "learning_rate": 0.0001762636356270393, + "loss": 1.0163, + "step": 21020 + }, + { + "epoch": 0.3, + "grad_norm": 0.5390625, + "learning_rate": 0.00017624743913386546, + "loss": 1.0069, + "step": 21025 + }, + { + "epoch": 0.3, + "grad_norm": 0.56640625, + "learning_rate": 0.00017623123786141532, + "loss": 0.9348, + "step": 21030 + }, + { + "epoch": 0.3, + "grad_norm": 0.58203125, + "learning_rate": 0.0001762150318107044, + "loss": 0.9641, + "step": 21035 + }, + { + "epoch": 0.3, + "grad_norm": 0.57421875, + "learning_rate": 0.00017619882098274844, + "loss": 0.9824, + "step": 21040 + }, + { + "epoch": 0.3, + "grad_norm": 0.59765625, + "learning_rate": 0.00017618260537856365, + "loss": 0.978, + "step": 21045 + }, + { + "epoch": 0.3, + "grad_norm": 0.59375, + "learning_rate": 0.00017616638499916634, + "loss": 1.2459, + "step": 21050 + }, + { + "epoch": 0.3, + "grad_norm": 0.5546875, + "learning_rate": 0.0001761501598455733, + "loss": 0.9898, + "step": 21055 + }, + { + "epoch": 0.3, + "grad_norm": 0.56640625, + "learning_rate": 0.0001761339299188015, + "loss": 1.0247, + "step": 21060 + }, + { + "epoch": 0.3, + "grad_norm": 0.6328125, + "learning_rate": 0.0001761176952198683, + "loss": 1.0085, + "step": 21065 + }, + { + "epoch": 0.3, + "grad_norm": 0.5625, + "learning_rate": 0.00017610145574979122, + "loss": 1.0736, + "step": 21070 + }, + { + "epoch": 0.3, + "grad_norm": 0.5390625, + "learning_rate": 0.00017608521150958825, + "loss": 1.0348, + "step": 21075 + }, + { + "epoch": 0.3, + "grad_norm": 0.51171875, + "learning_rate": 0.00017606896250027757, + "loss": 0.8328, + "step": 21080 + }, + { + "epoch": 0.3, + "grad_norm": 0.57421875, + "learning_rate": 0.00017605270872287767, + "loss": 0.8138, + "step": 21085 + }, + { + "epoch": 0.3, + "grad_norm": 0.54296875, + "learning_rate": 0.0001760364501784074, + "loss": 1.0117, + "step": 21090 + }, + { + "epoch": 0.3, + "grad_norm": 0.59375, + "learning_rate": 0.0001760201868678858, + "loss": 0.9574, + "step": 21095 + }, + { + "epoch": 0.3, + "grad_norm": 0.61328125, + "learning_rate": 0.00017600391879233233, + "loss": 1.0016, + "step": 21100 + }, + { + "epoch": 0.3, + "grad_norm": 0.5625, + "learning_rate": 0.00017598764595276665, + "loss": 1.1719, + "step": 21105 + }, + { + "epoch": 0.3, + "grad_norm": 0.51953125, + "learning_rate": 0.0001759713683502088, + "loss": 0.8461, + "step": 21110 + }, + { + "epoch": 0.3, + "grad_norm": 0.5, + "learning_rate": 0.00017595508598567902, + "loss": 0.9367, + "step": 21115 + }, + { + "epoch": 0.3, + "grad_norm": 0.609375, + "learning_rate": 0.00017593879886019799, + "loss": 1.0118, + "step": 21120 + }, + { + "epoch": 0.3, + "grad_norm": 0.5546875, + "learning_rate": 0.00017592250697478652, + "loss": 0.8151, + "step": 21125 + }, + { + "epoch": 0.3, + "grad_norm": 0.54296875, + "learning_rate": 0.0001759062103304659, + "loss": 0.8744, + "step": 21130 + }, + { + "epoch": 0.3, + "grad_norm": 0.55078125, + "learning_rate": 0.0001758899089282575, + "loss": 0.853, + "step": 21135 + }, + { + "epoch": 0.3, + "grad_norm": 0.55859375, + "learning_rate": 0.00017587360276918323, + "loss": 0.9114, + "step": 21140 + }, + { + "epoch": 0.3, + "grad_norm": 0.59765625, + "learning_rate": 0.0001758572918542651, + "loss": 1.012, + "step": 21145 + }, + { + "epoch": 0.3, + "grad_norm": 0.5390625, + "learning_rate": 0.00017584097618452552, + "loss": 0.9139, + "step": 21150 + }, + { + "epoch": 0.3, + "grad_norm": 0.53515625, + "learning_rate": 0.0001758246557609872, + "loss": 0.8946, + "step": 21155 + }, + { + "epoch": 0.3, + "grad_norm": 0.54296875, + "learning_rate": 0.0001758083305846731, + "loss": 1.1681, + "step": 21160 + }, + { + "epoch": 0.3, + "grad_norm": 0.55859375, + "learning_rate": 0.00017579200065660647, + "loss": 0.9387, + "step": 21165 + }, + { + "epoch": 0.3, + "grad_norm": 0.61328125, + "learning_rate": 0.00017577566597781093, + "loss": 1.0792, + "step": 21170 + }, + { + "epoch": 0.3, + "grad_norm": 0.75, + "learning_rate": 0.00017575932654931035, + "loss": 1.1199, + "step": 21175 + }, + { + "epoch": 0.3, + "grad_norm": 0.546875, + "learning_rate": 0.00017574298237212887, + "loss": 0.913, + "step": 21180 + }, + { + "epoch": 0.3, + "grad_norm": 0.51171875, + "learning_rate": 0.00017572663344729103, + "loss": 0.9306, + "step": 21185 + }, + { + "epoch": 0.3, + "grad_norm": 0.5078125, + "learning_rate": 0.00017571027977582158, + "loss": 0.9791, + "step": 21190 + }, + { + "epoch": 0.3, + "grad_norm": 0.51171875, + "learning_rate": 0.00017569392135874552, + "loss": 0.9209, + "step": 21195 + }, + { + "epoch": 0.3, + "grad_norm": 0.62890625, + "learning_rate": 0.00017567755819708828, + "loss": 1.0335, + "step": 21200 + }, + { + "epoch": 0.3, + "grad_norm": 0.58203125, + "learning_rate": 0.0001756611902918755, + "loss": 0.9791, + "step": 21205 + }, + { + "epoch": 0.3, + "grad_norm": 0.69140625, + "learning_rate": 0.00017564481764413312, + "loss": 0.9551, + "step": 21210 + }, + { + "epoch": 0.3, + "grad_norm": 0.55078125, + "learning_rate": 0.00017562844025488747, + "loss": 0.9948, + "step": 21215 + }, + { + "epoch": 0.3, + "grad_norm": 0.55078125, + "learning_rate": 0.000175612058125165, + "loss": 0.9004, + "step": 21220 + }, + { + "epoch": 0.3, + "grad_norm": 0.55859375, + "learning_rate": 0.00017559567125599265, + "loss": 0.9636, + "step": 21225 + }, + { + "epoch": 0.3, + "grad_norm": 0.59375, + "learning_rate": 0.0001755792796483975, + "loss": 0.9681, + "step": 21230 + }, + { + "epoch": 0.3, + "grad_norm": 0.6015625, + "learning_rate": 0.00017556288330340705, + "loss": 0.9337, + "step": 21235 + }, + { + "epoch": 0.3, + "grad_norm": 0.546875, + "learning_rate": 0.000175546482222049, + "loss": 0.8969, + "step": 21240 + }, + { + "epoch": 0.3, + "grad_norm": 0.5703125, + "learning_rate": 0.0001755300764053514, + "loss": 0.8336, + "step": 21245 + }, + { + "epoch": 0.3, + "grad_norm": 0.68359375, + "learning_rate": 0.00017551366585434258, + "loss": 0.9793, + "step": 21250 + }, + { + "epoch": 0.3, + "grad_norm": 0.578125, + "learning_rate": 0.00017549725057005122, + "loss": 0.9903, + "step": 21255 + }, + { + "epoch": 0.3, + "grad_norm": 0.53515625, + "learning_rate": 0.0001754808305535062, + "loss": 0.8948, + "step": 21260 + }, + { + "epoch": 0.31, + "grad_norm": 0.61328125, + "learning_rate": 0.00017546440580573674, + "loss": 1.1077, + "step": 21265 + }, + { + "epoch": 0.31, + "grad_norm": 0.5390625, + "learning_rate": 0.00017544797632777236, + "loss": 0.9889, + "step": 21270 + }, + { + "epoch": 0.31, + "grad_norm": 0.51953125, + "learning_rate": 0.0001754315421206429, + "loss": 0.999, + "step": 21275 + }, + { + "epoch": 0.31, + "grad_norm": 0.6328125, + "learning_rate": 0.0001754151031853785, + "loss": 1.0356, + "step": 21280 + }, + { + "epoch": 0.31, + "grad_norm": 0.54296875, + "learning_rate": 0.00017539865952300953, + "loss": 0.9596, + "step": 21285 + }, + { + "epoch": 0.31, + "grad_norm": 0.5390625, + "learning_rate": 0.0001753822111345667, + "loss": 1.0203, + "step": 21290 + }, + { + "epoch": 0.31, + "grad_norm": 0.58203125, + "learning_rate": 0.00017536575802108102, + "loss": 1.1007, + "step": 21295 + }, + { + "epoch": 0.31, + "grad_norm": 0.5390625, + "learning_rate": 0.00017534930018358382, + "loss": 0.9677, + "step": 21300 + }, + { + "epoch": 0.31, + "grad_norm": 0.58984375, + "learning_rate": 0.00017533283762310667, + "loss": 0.8472, + "step": 21305 + }, + { + "epoch": 0.31, + "grad_norm": 1.0, + "learning_rate": 0.00017531637034068142, + "loss": 1.0809, + "step": 21310 + }, + { + "epoch": 0.31, + "grad_norm": 0.625, + "learning_rate": 0.0001752998983373403, + "loss": 1.0434, + "step": 21315 + }, + { + "epoch": 0.31, + "grad_norm": 0.5546875, + "learning_rate": 0.00017528342161411583, + "loss": 1.0721, + "step": 21320 + }, + { + "epoch": 0.31, + "grad_norm": 0.52734375, + "learning_rate": 0.00017526694017204074, + "loss": 0.9382, + "step": 21325 + }, + { + "epoch": 0.31, + "grad_norm": 0.482421875, + "learning_rate": 0.0001752504540121481, + "loss": 0.9678, + "step": 21330 + }, + { + "epoch": 0.31, + "grad_norm": 0.58984375, + "learning_rate": 0.00017523396313547134, + "loss": 0.8703, + "step": 21335 + }, + { + "epoch": 0.31, + "grad_norm": 0.66796875, + "learning_rate": 0.00017521746754304407, + "loss": 0.9302, + "step": 21340 + }, + { + "epoch": 0.31, + "grad_norm": 0.59765625, + "learning_rate": 0.00017520096723590024, + "loss": 1.0109, + "step": 21345 + }, + { + "epoch": 0.31, + "grad_norm": 0.62890625, + "learning_rate": 0.00017518446221507415, + "loss": 0.9523, + "step": 21350 + }, + { + "epoch": 0.31, + "grad_norm": 0.6796875, + "learning_rate": 0.00017516795248160037, + "loss": 0.9953, + "step": 21355 + }, + { + "epoch": 0.31, + "grad_norm": 0.640625, + "learning_rate": 0.00017515143803651368, + "loss": 0.9026, + "step": 21360 + }, + { + "epoch": 0.31, + "grad_norm": 0.515625, + "learning_rate": 0.00017513491888084928, + "loss": 0.8433, + "step": 21365 + }, + { + "epoch": 0.31, + "grad_norm": 0.6015625, + "learning_rate": 0.0001751183950156426, + "loss": 1.0761, + "step": 21370 + }, + { + "epoch": 0.31, + "grad_norm": 0.62890625, + "learning_rate": 0.00017510186644192937, + "loss": 0.9921, + "step": 21375 + }, + { + "epoch": 0.31, + "grad_norm": 0.490234375, + "learning_rate": 0.0001750853331607456, + "loss": 0.9785, + "step": 21380 + }, + { + "epoch": 0.31, + "grad_norm": 0.5078125, + "learning_rate": 0.00017506879517312763, + "loss": 0.8458, + "step": 21385 + }, + { + "epoch": 0.31, + "grad_norm": 0.52734375, + "learning_rate": 0.0001750522524801121, + "loss": 1.0463, + "step": 21390 + }, + { + "epoch": 0.31, + "grad_norm": 0.56640625, + "learning_rate": 0.0001750357050827359, + "loss": 0.8554, + "step": 21395 + }, + { + "epoch": 0.31, + "grad_norm": 0.6171875, + "learning_rate": 0.00017501915298203624, + "loss": 0.9932, + "step": 21400 + }, + { + "epoch": 0.31, + "grad_norm": 0.451171875, + "learning_rate": 0.00017500259617905063, + "loss": 0.8362, + "step": 21405 + }, + { + "epoch": 0.31, + "grad_norm": 0.5546875, + "learning_rate": 0.00017498603467481689, + "loss": 0.9355, + "step": 21410 + }, + { + "epoch": 0.31, + "grad_norm": 0.54296875, + "learning_rate": 0.0001749694684703731, + "loss": 0.9487, + "step": 21415 + }, + { + "epoch": 0.31, + "grad_norm": 0.578125, + "learning_rate": 0.0001749528975667576, + "loss": 0.7915, + "step": 21420 + }, + { + "epoch": 0.31, + "grad_norm": 0.6640625, + "learning_rate": 0.00017493632196500914, + "loss": 1.0153, + "step": 21425 + }, + { + "epoch": 0.31, + "grad_norm": 0.59375, + "learning_rate": 0.0001749197416661667, + "loss": 1.1029, + "step": 21430 + }, + { + "epoch": 0.31, + "grad_norm": 0.5078125, + "learning_rate": 0.0001749031566712695, + "loss": 0.8185, + "step": 21435 + }, + { + "epoch": 0.31, + "grad_norm": 0.55078125, + "learning_rate": 0.00017488656698135712, + "loss": 0.8799, + "step": 21440 + }, + { + "epoch": 0.31, + "grad_norm": 0.55078125, + "learning_rate": 0.00017486997259746946, + "loss": 0.9096, + "step": 21445 + }, + { + "epoch": 0.31, + "grad_norm": 0.62109375, + "learning_rate": 0.00017485337352064666, + "loss": 1.0953, + "step": 21450 + }, + { + "epoch": 0.31, + "grad_norm": 0.5859375, + "learning_rate": 0.00017483676975192913, + "loss": 0.8612, + "step": 21455 + }, + { + "epoch": 0.31, + "grad_norm": 0.66796875, + "learning_rate": 0.00017482016129235763, + "loss": 1.1556, + "step": 21460 + }, + { + "epoch": 0.31, + "grad_norm": 0.60546875, + "learning_rate": 0.00017480354814297322, + "loss": 0.9108, + "step": 21465 + }, + { + "epoch": 0.31, + "grad_norm": 0.50390625, + "learning_rate": 0.00017478693030481722, + "loss": 0.9021, + "step": 21470 + }, + { + "epoch": 0.31, + "grad_norm": 0.494140625, + "learning_rate": 0.00017477030777893123, + "loss": 0.9815, + "step": 21475 + }, + { + "epoch": 0.31, + "grad_norm": 0.55078125, + "learning_rate": 0.00017475368056635724, + "loss": 1.0603, + "step": 21480 + }, + { + "epoch": 0.31, + "grad_norm": 0.5078125, + "learning_rate": 0.00017473704866813737, + "loss": 0.9242, + "step": 21485 + }, + { + "epoch": 0.31, + "grad_norm": 0.57421875, + "learning_rate": 0.00017472041208531423, + "loss": 0.8487, + "step": 21490 + }, + { + "epoch": 0.31, + "grad_norm": 0.56640625, + "learning_rate": 0.00017470377081893048, + "loss": 1.0269, + "step": 21495 + }, + { + "epoch": 0.31, + "grad_norm": 0.61328125, + "learning_rate": 0.00017468712487002933, + "loss": 0.9441, + "step": 21500 + }, + { + "epoch": 0.31, + "grad_norm": 0.5859375, + "learning_rate": 0.00017467047423965415, + "loss": 1.1466, + "step": 21505 + }, + { + "epoch": 0.31, + "grad_norm": 0.53515625, + "learning_rate": 0.0001746538189288486, + "loss": 1.0247, + "step": 21510 + }, + { + "epoch": 0.31, + "grad_norm": 0.53125, + "learning_rate": 0.00017463715893865664, + "loss": 1.0308, + "step": 21515 + }, + { + "epoch": 0.31, + "grad_norm": 0.55078125, + "learning_rate": 0.00017462049427012256, + "loss": 0.9886, + "step": 21520 + }, + { + "epoch": 0.31, + "grad_norm": 0.52734375, + "learning_rate": 0.0001746038249242909, + "loss": 0.914, + "step": 21525 + }, + { + "epoch": 0.31, + "grad_norm": 0.6328125, + "learning_rate": 0.00017458715090220656, + "loss": 0.9864, + "step": 21530 + }, + { + "epoch": 0.31, + "grad_norm": 0.57421875, + "learning_rate": 0.00017457047220491463, + "loss": 1.0514, + "step": 21535 + }, + { + "epoch": 0.31, + "grad_norm": 0.578125, + "learning_rate": 0.00017455378883346056, + "loss": 1.0187, + "step": 21540 + }, + { + "epoch": 0.31, + "grad_norm": 0.75, + "learning_rate": 0.00017453710078889012, + "loss": 0.9507, + "step": 21545 + }, + { + "epoch": 0.31, + "grad_norm": 0.486328125, + "learning_rate": 0.0001745204080722493, + "loss": 1.0244, + "step": 21550 + }, + { + "epoch": 0.31, + "grad_norm": 0.51171875, + "learning_rate": 0.00017450371068458446, + "loss": 0.9657, + "step": 21555 + }, + { + "epoch": 0.31, + "grad_norm": 0.76171875, + "learning_rate": 0.00017448700862694215, + "loss": 1.0195, + "step": 21560 + }, + { + "epoch": 0.31, + "grad_norm": 0.55078125, + "learning_rate": 0.00017447030190036932, + "loss": 0.9239, + "step": 21565 + }, + { + "epoch": 0.31, + "grad_norm": 0.49609375, + "learning_rate": 0.00017445359050591313, + "loss": 0.9354, + "step": 21570 + }, + { + "epoch": 0.31, + "grad_norm": 0.7109375, + "learning_rate": 0.0001744368744446211, + "loss": 1.0583, + "step": 21575 + }, + { + "epoch": 0.31, + "grad_norm": 0.58203125, + "learning_rate": 0.00017442015371754103, + "loss": 0.9223, + "step": 21580 + }, + { + "epoch": 0.31, + "grad_norm": 0.55859375, + "learning_rate": 0.00017440342832572095, + "loss": 1.046, + "step": 21585 + }, + { + "epoch": 0.31, + "grad_norm": 0.58203125, + "learning_rate": 0.00017438669827020924, + "loss": 1.1004, + "step": 21590 + }, + { + "epoch": 0.31, + "grad_norm": 0.55078125, + "learning_rate": 0.00017436996355205456, + "loss": 0.8468, + "step": 21595 + }, + { + "epoch": 0.31, + "grad_norm": 0.51171875, + "learning_rate": 0.00017435322417230586, + "loss": 1.0398, + "step": 21600 + }, + { + "epoch": 0.31, + "grad_norm": 0.578125, + "learning_rate": 0.0001743364801320124, + "loss": 1.0182, + "step": 21605 + }, + { + "epoch": 0.31, + "grad_norm": 0.63671875, + "learning_rate": 0.00017431973143222372, + "loss": 1.0255, + "step": 21610 + }, + { + "epoch": 0.31, + "grad_norm": 0.5546875, + "learning_rate": 0.0001743029780739896, + "loss": 0.8777, + "step": 21615 + }, + { + "epoch": 0.31, + "grad_norm": 0.6015625, + "learning_rate": 0.00017428622005836018, + "loss": 0.9486, + "step": 21620 + }, + { + "epoch": 0.31, + "grad_norm": 0.51953125, + "learning_rate": 0.0001742694573863859, + "loss": 0.9823, + "step": 21625 + }, + { + "epoch": 0.31, + "grad_norm": 0.66796875, + "learning_rate": 0.00017425269005911744, + "loss": 0.9608, + "step": 21630 + }, + { + "epoch": 0.31, + "grad_norm": 0.609375, + "learning_rate": 0.00017423591807760582, + "loss": 0.9378, + "step": 21635 + }, + { + "epoch": 0.31, + "grad_norm": 0.578125, + "learning_rate": 0.0001742191414429023, + "loss": 0.8654, + "step": 21640 + }, + { + "epoch": 0.31, + "grad_norm": 0.6171875, + "learning_rate": 0.00017420236015605847, + "loss": 0.9431, + "step": 21645 + }, + { + "epoch": 0.31, + "grad_norm": 0.470703125, + "learning_rate": 0.0001741855742181262, + "loss": 0.9499, + "step": 21650 + }, + { + "epoch": 0.31, + "grad_norm": 0.57421875, + "learning_rate": 0.00017416878363015763, + "loss": 1.0357, + "step": 21655 + }, + { + "epoch": 0.31, + "grad_norm": 0.59765625, + "learning_rate": 0.00017415198839320525, + "loss": 0.9856, + "step": 21660 + }, + { + "epoch": 0.31, + "grad_norm": 0.55859375, + "learning_rate": 0.00017413518850832173, + "loss": 0.992, + "step": 21665 + }, + { + "epoch": 0.31, + "grad_norm": 0.57421875, + "learning_rate": 0.00017411838397656023, + "loss": 0.9234, + "step": 21670 + }, + { + "epoch": 0.31, + "grad_norm": 0.625, + "learning_rate": 0.00017410157479897398, + "loss": 0.9881, + "step": 21675 + }, + { + "epoch": 0.31, + "grad_norm": 0.5390625, + "learning_rate": 0.00017408476097661662, + "loss": 1.0233, + "step": 21680 + }, + { + "epoch": 0.31, + "grad_norm": 0.56640625, + "learning_rate": 0.00017406794251054208, + "loss": 1.0662, + "step": 21685 + }, + { + "epoch": 0.31, + "grad_norm": 0.56640625, + "learning_rate": 0.00017405111940180454, + "loss": 0.9822, + "step": 21690 + }, + { + "epoch": 0.31, + "grad_norm": 0.6171875, + "learning_rate": 0.0001740342916514585, + "loss": 0.9748, + "step": 21695 + }, + { + "epoch": 0.31, + "grad_norm": 0.466796875, + "learning_rate": 0.00017401745926055875, + "loss": 0.7273, + "step": 21700 + }, + { + "epoch": 0.31, + "grad_norm": 0.61328125, + "learning_rate": 0.00017400062223016033, + "loss": 0.8134, + "step": 21705 + }, + { + "epoch": 0.31, + "grad_norm": 0.64453125, + "learning_rate": 0.00017398378056131866, + "loss": 1.0092, + "step": 21710 + }, + { + "epoch": 0.31, + "grad_norm": 0.61328125, + "learning_rate": 0.00017396693425508934, + "loss": 1.0338, + "step": 21715 + }, + { + "epoch": 0.31, + "grad_norm": 0.5546875, + "learning_rate": 0.00017395008331252834, + "loss": 0.9591, + "step": 21720 + }, + { + "epoch": 0.31, + "grad_norm": 0.52734375, + "learning_rate": 0.00017393322773469192, + "loss": 0.9183, + "step": 21725 + }, + { + "epoch": 0.31, + "grad_norm": 0.5390625, + "learning_rate": 0.00017391636752263657, + "loss": 1.0216, + "step": 21730 + }, + { + "epoch": 0.31, + "grad_norm": 0.703125, + "learning_rate": 0.0001738995026774191, + "loss": 0.9468, + "step": 21735 + }, + { + "epoch": 0.31, + "grad_norm": 0.65625, + "learning_rate": 0.00017388263320009667, + "loss": 0.9883, + "step": 21740 + }, + { + "epoch": 0.31, + "grad_norm": 0.62109375, + "learning_rate": 0.0001738657590917266, + "loss": 0.9116, + "step": 21745 + }, + { + "epoch": 0.31, + "grad_norm": 0.64453125, + "learning_rate": 0.0001738488803533667, + "loss": 1.0142, + "step": 21750 + }, + { + "epoch": 0.31, + "grad_norm": 0.62890625, + "learning_rate": 0.00017383199698607483, + "loss": 1.0342, + "step": 21755 + }, + { + "epoch": 0.31, + "grad_norm": 0.56640625, + "learning_rate": 0.0001738151089909093, + "loss": 0.9699, + "step": 21760 + }, + { + "epoch": 0.31, + "grad_norm": 0.61328125, + "learning_rate": 0.00017379821636892865, + "loss": 1.0844, + "step": 21765 + }, + { + "epoch": 0.31, + "grad_norm": 0.60546875, + "learning_rate": 0.0001737813191211918, + "loss": 1.0149, + "step": 21770 + }, + { + "epoch": 0.31, + "grad_norm": 0.546875, + "learning_rate": 0.0001737644172487578, + "loss": 0.9203, + "step": 21775 + }, + { + "epoch": 0.31, + "grad_norm": 0.578125, + "learning_rate": 0.0001737475107526861, + "loss": 0.928, + "step": 21780 + }, + { + "epoch": 0.31, + "grad_norm": 0.61328125, + "learning_rate": 0.00017373059963403647, + "loss": 1.048, + "step": 21785 + }, + { + "epoch": 0.31, + "grad_norm": 0.65625, + "learning_rate": 0.00017371368389386887, + "loss": 0.955, + "step": 21790 + }, + { + "epoch": 0.31, + "grad_norm": 0.56640625, + "learning_rate": 0.0001736967635332436, + "loss": 0.9854, + "step": 21795 + }, + { + "epoch": 0.31, + "grad_norm": 0.5859375, + "learning_rate": 0.0001736798385532213, + "loss": 0.8858, + "step": 21800 + }, + { + "epoch": 0.31, + "grad_norm": 0.6171875, + "learning_rate": 0.00017366290895486276, + "loss": 0.9952, + "step": 21805 + }, + { + "epoch": 0.31, + "grad_norm": 0.56640625, + "learning_rate": 0.0001736459747392292, + "loss": 0.9809, + "step": 21810 + }, + { + "epoch": 0.31, + "grad_norm": 0.51953125, + "learning_rate": 0.00017362903590738204, + "loss": 0.9138, + "step": 21815 + }, + { + "epoch": 0.31, + "grad_norm": 0.66796875, + "learning_rate": 0.0001736120924603831, + "loss": 1.1694, + "step": 21820 + }, + { + "epoch": 0.31, + "grad_norm": 0.578125, + "learning_rate": 0.00017359514439929435, + "loss": 0.9206, + "step": 21825 + }, + { + "epoch": 0.31, + "grad_norm": 0.59765625, + "learning_rate": 0.00017357819172517816, + "loss": 0.7781, + "step": 21830 + }, + { + "epoch": 0.31, + "grad_norm": 0.5546875, + "learning_rate": 0.00017356123443909707, + "loss": 0.9021, + "step": 21835 + }, + { + "epoch": 0.31, + "grad_norm": 0.65625, + "learning_rate": 0.00017354427254211404, + "loss": 1.0017, + "step": 21840 + }, + { + "epoch": 0.31, + "grad_norm": 0.5625, + "learning_rate": 0.00017352730603529225, + "loss": 0.8842, + "step": 21845 + }, + { + "epoch": 0.31, + "grad_norm": 0.546875, + "learning_rate": 0.0001735103349196952, + "loss": 0.9533, + "step": 21850 + }, + { + "epoch": 0.31, + "grad_norm": 0.578125, + "learning_rate": 0.0001734933591963866, + "loss": 1.0208, + "step": 21855 + }, + { + "epoch": 0.31, + "grad_norm": 0.5859375, + "learning_rate": 0.00017347637886643057, + "loss": 0.8829, + "step": 21860 + }, + { + "epoch": 0.31, + "grad_norm": 0.53125, + "learning_rate": 0.0001734593939308914, + "loss": 1.0942, + "step": 21865 + }, + { + "epoch": 0.31, + "grad_norm": 0.60546875, + "learning_rate": 0.00017344240439083378, + "loss": 0.9608, + "step": 21870 + }, + { + "epoch": 0.31, + "grad_norm": 0.625, + "learning_rate": 0.0001734254102473226, + "loss": 0.916, + "step": 21875 + }, + { + "epoch": 0.31, + "grad_norm": 0.6171875, + "learning_rate": 0.00017340841150142308, + "loss": 1.1468, + "step": 21880 + }, + { + "epoch": 0.31, + "grad_norm": 0.5390625, + "learning_rate": 0.00017339140815420074, + "loss": 0.8373, + "step": 21885 + }, + { + "epoch": 0.31, + "grad_norm": 0.578125, + "learning_rate": 0.00017337440020672134, + "loss": 1.135, + "step": 21890 + }, + { + "epoch": 0.31, + "grad_norm": 0.53515625, + "learning_rate": 0.00017335738766005097, + "loss": 1.0092, + "step": 21895 + }, + { + "epoch": 0.31, + "grad_norm": 0.6796875, + "learning_rate": 0.000173340370515256, + "loss": 0.9375, + "step": 21900 + }, + { + "epoch": 0.31, + "grad_norm": 0.625, + "learning_rate": 0.00017332334877340303, + "loss": 0.9806, + "step": 21905 + }, + { + "epoch": 0.31, + "grad_norm": 0.56640625, + "learning_rate": 0.0001733063224355591, + "loss": 1.0827, + "step": 21910 + }, + { + "epoch": 0.31, + "grad_norm": 0.5234375, + "learning_rate": 0.00017328929150279142, + "loss": 1.0186, + "step": 21915 + }, + { + "epoch": 0.31, + "grad_norm": 0.58984375, + "learning_rate": 0.0001732722559761674, + "loss": 1.0553, + "step": 21920 + }, + { + "epoch": 0.31, + "grad_norm": 0.5390625, + "learning_rate": 0.000173255215856755, + "loss": 0.921, + "step": 21925 + }, + { + "epoch": 0.31, + "grad_norm": 0.48828125, + "learning_rate": 0.00017323817114562218, + "loss": 0.9589, + "step": 21930 + }, + { + "epoch": 0.31, + "grad_norm": 0.53515625, + "learning_rate": 0.00017322112184383742, + "loss": 0.9213, + "step": 21935 + }, + { + "epoch": 0.31, + "grad_norm": 0.5546875, + "learning_rate": 0.00017320406795246933, + "loss": 0.9863, + "step": 21940 + }, + { + "epoch": 0.31, + "grad_norm": 0.55078125, + "learning_rate": 0.00017318700947258688, + "loss": 0.8876, + "step": 21945 + }, + { + "epoch": 0.31, + "grad_norm": 0.62890625, + "learning_rate": 0.00017316994640525935, + "loss": 0.7832, + "step": 21950 + }, + { + "epoch": 0.31, + "grad_norm": 0.625, + "learning_rate": 0.00017315287875155623, + "loss": 0.9013, + "step": 21955 + }, + { + "epoch": 0.32, + "grad_norm": 0.68359375, + "learning_rate": 0.00017313580651254738, + "loss": 1.0958, + "step": 21960 + }, + { + "epoch": 0.32, + "grad_norm": 0.51953125, + "learning_rate": 0.00017311872968930281, + "loss": 1.1191, + "step": 21965 + }, + { + "epoch": 0.32, + "grad_norm": 0.65625, + "learning_rate": 0.00017310164828289305, + "loss": 1.0535, + "step": 21970 + }, + { + "epoch": 0.32, + "grad_norm": 0.5234375, + "learning_rate": 0.00017308456229438873, + "loss": 1.0017, + "step": 21975 + }, + { + "epoch": 0.32, + "grad_norm": 0.5859375, + "learning_rate": 0.00017306747172486078, + "loss": 1.079, + "step": 21980 + }, + { + "epoch": 0.32, + "grad_norm": 0.609375, + "learning_rate": 0.0001730503765753805, + "loss": 0.9727, + "step": 21985 + }, + { + "epoch": 0.32, + "grad_norm": 0.64453125, + "learning_rate": 0.0001730332768470194, + "loss": 1.0199, + "step": 21990 + }, + { + "epoch": 0.32, + "grad_norm": 0.578125, + "learning_rate": 0.00017301617254084938, + "loss": 0.936, + "step": 21995 + }, + { + "epoch": 0.32, + "grad_norm": 0.52734375, + "learning_rate": 0.00017299906365794246, + "loss": 1.0371, + "step": 22000 + }, + { + "epoch": 0.32, + "grad_norm": 0.640625, + "learning_rate": 0.0001729819501993711, + "loss": 0.9254, + "step": 22005 + }, + { + "epoch": 0.32, + "grad_norm": 0.5625, + "learning_rate": 0.000172964832166208, + "loss": 1.041, + "step": 22010 + }, + { + "epoch": 0.32, + "grad_norm": 0.55859375, + "learning_rate": 0.00017294770955952608, + "loss": 0.9152, + "step": 22015 + }, + { + "epoch": 0.32, + "grad_norm": 0.53515625, + "learning_rate": 0.00017293058238039867, + "loss": 0.9246, + "step": 22020 + }, + { + "epoch": 0.32, + "grad_norm": 0.53125, + "learning_rate": 0.00017291345062989927, + "loss": 0.7779, + "step": 22025 + }, + { + "epoch": 0.32, + "grad_norm": 0.5546875, + "learning_rate": 0.00017289631430910177, + "loss": 1.0301, + "step": 22030 + }, + { + "epoch": 0.32, + "grad_norm": 0.609375, + "learning_rate": 0.00017287917341908025, + "loss": 1.1244, + "step": 22035 + }, + { + "epoch": 0.32, + "grad_norm": 0.59375, + "learning_rate": 0.00017286202796090917, + "loss": 0.7826, + "step": 22040 + }, + { + "epoch": 0.32, + "grad_norm": 0.6171875, + "learning_rate": 0.00017284487793566317, + "loss": 0.8691, + "step": 22045 + }, + { + "epoch": 0.32, + "grad_norm": 0.66796875, + "learning_rate": 0.00017282772334441729, + "loss": 1.0271, + "step": 22050 + }, + { + "epoch": 0.32, + "grad_norm": 0.412109375, + "learning_rate": 0.0001728105641882467, + "loss": 0.7768, + "step": 22055 + }, + { + "epoch": 0.32, + "grad_norm": 0.55859375, + "learning_rate": 0.00017279340046822707, + "loss": 0.9809, + "step": 22060 + }, + { + "epoch": 0.32, + "grad_norm": 0.5546875, + "learning_rate": 0.00017277623218543418, + "loss": 0.8828, + "step": 22065 + }, + { + "epoch": 0.32, + "grad_norm": 0.59375, + "learning_rate": 0.0001727590593409442, + "loss": 1.0735, + "step": 22070 + }, + { + "epoch": 0.32, + "grad_norm": 0.56640625, + "learning_rate": 0.00017274188193583346, + "loss": 0.7952, + "step": 22075 + }, + { + "epoch": 0.32, + "grad_norm": 0.62890625, + "learning_rate": 0.00017272469997117878, + "loss": 1.0377, + "step": 22080 + }, + { + "epoch": 0.32, + "grad_norm": 0.73828125, + "learning_rate": 0.00017270751344805702, + "loss": 0.9094, + "step": 22085 + }, + { + "epoch": 0.32, + "grad_norm": 0.56640625, + "learning_rate": 0.00017269032236754556, + "loss": 0.9699, + "step": 22090 + }, + { + "epoch": 0.32, + "grad_norm": 0.640625, + "learning_rate": 0.00017267312673072187, + "loss": 1.0409, + "step": 22095 + }, + { + "epoch": 0.32, + "grad_norm": 0.54296875, + "learning_rate": 0.00017265592653866385, + "loss": 0.8788, + "step": 22100 + }, + { + "epoch": 0.32, + "grad_norm": 0.50390625, + "learning_rate": 0.00017263872179244956, + "loss": 0.947, + "step": 22105 + }, + { + "epoch": 0.32, + "grad_norm": 0.58984375, + "learning_rate": 0.00017262151249315753, + "loss": 0.9475, + "step": 22110 + }, + { + "epoch": 0.32, + "grad_norm": 0.5703125, + "learning_rate": 0.00017260429864186634, + "loss": 0.9822, + "step": 22115 + }, + { + "epoch": 0.32, + "grad_norm": 0.59765625, + "learning_rate": 0.00017258708023965504, + "loss": 0.9842, + "step": 22120 + }, + { + "epoch": 0.32, + "grad_norm": 0.54296875, + "learning_rate": 0.00017256985728760288, + "loss": 0.8861, + "step": 22125 + }, + { + "epoch": 0.32, + "grad_norm": 0.53515625, + "learning_rate": 0.00017255262978678942, + "loss": 0.9062, + "step": 22130 + }, + { + "epoch": 0.32, + "grad_norm": 0.5625, + "learning_rate": 0.00017253539773829448, + "loss": 0.9959, + "step": 22135 + }, + { + "epoch": 0.32, + "grad_norm": 0.58203125, + "learning_rate": 0.0001725181611431982, + "loss": 1.0269, + "step": 22140 + }, + { + "epoch": 0.32, + "grad_norm": 0.58984375, + "learning_rate": 0.000172500920002581, + "loss": 1.0099, + "step": 22145 + }, + { + "epoch": 0.32, + "grad_norm": 0.57421875, + "learning_rate": 0.00017248367431752355, + "loss": 1.0521, + "step": 22150 + }, + { + "epoch": 0.32, + "grad_norm": 0.6171875, + "learning_rate": 0.00017246642408910685, + "loss": 0.9477, + "step": 22155 + }, + { + "epoch": 0.32, + "grad_norm": 0.65625, + "learning_rate": 0.00017244916931841216, + "loss": 1.0187, + "step": 22160 + }, + { + "epoch": 0.32, + "grad_norm": 0.62890625, + "learning_rate": 0.000172431910006521, + "loss": 0.9319, + "step": 22165 + }, + { + "epoch": 0.32, + "grad_norm": 0.5703125, + "learning_rate": 0.00017241464615451525, + "loss": 0.9638, + "step": 22170 + }, + { + "epoch": 0.32, + "grad_norm": 0.72265625, + "learning_rate": 0.000172397377763477, + "loss": 1.0252, + "step": 22175 + }, + { + "epoch": 0.32, + "grad_norm": 0.5, + "learning_rate": 0.00017238010483448866, + "loss": 1.0172, + "step": 22180 + }, + { + "epoch": 0.32, + "grad_norm": 0.6796875, + "learning_rate": 0.00017236282736863293, + "loss": 0.9769, + "step": 22185 + }, + { + "epoch": 0.32, + "grad_norm": 0.59765625, + "learning_rate": 0.00017234554536699274, + "loss": 1.0083, + "step": 22190 + }, + { + "epoch": 0.32, + "grad_norm": 0.55859375, + "learning_rate": 0.0001723282588306514, + "loss": 1.0638, + "step": 22195 + }, + { + "epoch": 0.32, + "grad_norm": 0.494140625, + "learning_rate": 0.0001723109677606924, + "loss": 0.9845, + "step": 22200 + }, + { + "epoch": 0.32, + "grad_norm": 0.55078125, + "learning_rate": 0.00017229367215819958, + "loss": 0.9822, + "step": 22205 + }, + { + "epoch": 0.32, + "grad_norm": 0.5859375, + "learning_rate": 0.00017227637202425706, + "loss": 1.1154, + "step": 22210 + }, + { + "epoch": 0.32, + "grad_norm": 0.53515625, + "learning_rate": 0.00017225906735994923, + "loss": 0.9841, + "step": 22215 + }, + { + "epoch": 0.32, + "grad_norm": 0.60546875, + "learning_rate": 0.00017224175816636075, + "loss": 0.9897, + "step": 22220 + }, + { + "epoch": 0.32, + "grad_norm": 0.5234375, + "learning_rate": 0.00017222444444457664, + "loss": 0.9637, + "step": 22225 + }, + { + "epoch": 0.32, + "grad_norm": 0.59375, + "learning_rate": 0.00017220712619568204, + "loss": 1.0782, + "step": 22230 + }, + { + "epoch": 0.32, + "grad_norm": 0.67578125, + "learning_rate": 0.00017218980342076254, + "loss": 1.1999, + "step": 22235 + }, + { + "epoch": 0.32, + "grad_norm": 0.54296875, + "learning_rate": 0.00017217247612090396, + "loss": 0.9888, + "step": 22240 + }, + { + "epoch": 0.32, + "grad_norm": 0.57421875, + "learning_rate": 0.00017215514429719237, + "loss": 0.8246, + "step": 22245 + }, + { + "epoch": 0.32, + "grad_norm": 0.52734375, + "learning_rate": 0.00017213780795071417, + "loss": 0.8804, + "step": 22250 + }, + { + "epoch": 0.32, + "grad_norm": 0.61328125, + "learning_rate": 0.000172120467082556, + "loss": 1.2292, + "step": 22255 + }, + { + "epoch": 0.32, + "grad_norm": 0.53125, + "learning_rate": 0.0001721031216938048, + "loss": 0.9579, + "step": 22260 + }, + { + "epoch": 0.32, + "grad_norm": 0.5234375, + "learning_rate": 0.00017208577178554787, + "loss": 0.9219, + "step": 22265 + }, + { + "epoch": 0.32, + "grad_norm": 0.578125, + "learning_rate": 0.0001720684173588726, + "loss": 0.9643, + "step": 22270 + }, + { + "epoch": 0.32, + "grad_norm": 0.609375, + "learning_rate": 0.00017205105841486688, + "loss": 0.8721, + "step": 22275 + }, + { + "epoch": 0.32, + "grad_norm": 0.51171875, + "learning_rate": 0.00017203369495461877, + "loss": 0.9526, + "step": 22280 + }, + { + "epoch": 0.32, + "grad_norm": 0.515625, + "learning_rate": 0.00017201632697921659, + "loss": 0.9051, + "step": 22285 + }, + { + "epoch": 0.32, + "grad_norm": 0.5703125, + "learning_rate": 0.00017199895448974904, + "loss": 1.0717, + "step": 22290 + }, + { + "epoch": 0.32, + "grad_norm": 0.63671875, + "learning_rate": 0.00017198157748730502, + "loss": 0.9993, + "step": 22295 + }, + { + "epoch": 0.32, + "grad_norm": 0.48828125, + "learning_rate": 0.00017196419597297372, + "loss": 1.0139, + "step": 22300 + }, + { + "epoch": 0.32, + "grad_norm": 0.6015625, + "learning_rate": 0.00017194680994784468, + "loss": 0.8819, + "step": 22305 + }, + { + "epoch": 0.32, + "grad_norm": 0.58203125, + "learning_rate": 0.00017192941941300763, + "loss": 1.0517, + "step": 22310 + }, + { + "epoch": 0.32, + "grad_norm": 0.515625, + "learning_rate": 0.00017191202436955268, + "loss": 0.9249, + "step": 22315 + }, + { + "epoch": 0.32, + "grad_norm": 0.62109375, + "learning_rate": 0.00017189462481857014, + "loss": 1.0895, + "step": 22320 + }, + { + "epoch": 0.32, + "grad_norm": 0.5703125, + "learning_rate": 0.00017187722076115062, + "loss": 0.9648, + "step": 22325 + }, + { + "epoch": 0.32, + "grad_norm": 0.625, + "learning_rate": 0.00017185981219838503, + "loss": 0.8843, + "step": 22330 + }, + { + "epoch": 0.32, + "grad_norm": 0.671875, + "learning_rate": 0.00017184239913136458, + "loss": 1.0308, + "step": 22335 + }, + { + "epoch": 0.32, + "grad_norm": 0.52734375, + "learning_rate": 0.00017182498156118075, + "loss": 0.969, + "step": 22340 + }, + { + "epoch": 0.32, + "grad_norm": 0.515625, + "learning_rate": 0.00017180755948892524, + "loss": 0.8646, + "step": 22345 + }, + { + "epoch": 0.32, + "grad_norm": 0.58203125, + "learning_rate": 0.00017179013291569018, + "loss": 0.9084, + "step": 22350 + }, + { + "epoch": 0.32, + "grad_norm": 0.65625, + "learning_rate": 0.00017177270184256775, + "loss": 1.0191, + "step": 22355 + }, + { + "epoch": 0.32, + "grad_norm": 0.578125, + "learning_rate": 0.00017175526627065065, + "loss": 1.1, + "step": 22360 + }, + { + "epoch": 0.32, + "grad_norm": 0.6171875, + "learning_rate": 0.00017173782620103176, + "loss": 1.0031, + "step": 22365 + }, + { + "epoch": 0.32, + "grad_norm": 0.54296875, + "learning_rate": 0.0001717203816348042, + "loss": 1.0448, + "step": 22370 + }, + { + "epoch": 0.32, + "grad_norm": 0.49609375, + "learning_rate": 0.00017170293257306148, + "loss": 0.8559, + "step": 22375 + }, + { + "epoch": 0.32, + "grad_norm": 0.5703125, + "learning_rate": 0.00017168547901689723, + "loss": 0.9636, + "step": 22380 + }, + { + "epoch": 0.32, + "grad_norm": 0.54296875, + "learning_rate": 0.00017166802096740553, + "loss": 1.0384, + "step": 22385 + }, + { + "epoch": 0.32, + "grad_norm": 0.53515625, + "learning_rate": 0.00017165055842568067, + "loss": 0.9458, + "step": 22390 + }, + { + "epoch": 0.32, + "grad_norm": 0.58984375, + "learning_rate": 0.0001716330913928172, + "loss": 1.0262, + "step": 22395 + }, + { + "epoch": 0.32, + "grad_norm": 0.53125, + "learning_rate": 0.00017161561986990995, + "loss": 1.0135, + "step": 22400 + }, + { + "epoch": 0.32, + "grad_norm": 0.61328125, + "learning_rate": 0.0001715981438580541, + "loss": 1.0603, + "step": 22405 + }, + { + "epoch": 0.32, + "grad_norm": 0.59375, + "learning_rate": 0.00017158066335834507, + "loss": 1.0851, + "step": 22410 + }, + { + "epoch": 0.32, + "grad_norm": 0.59375, + "learning_rate": 0.0001715631783718785, + "loss": 0.8572, + "step": 22415 + }, + { + "epoch": 0.32, + "grad_norm": 0.58984375, + "learning_rate": 0.00017154568889975042, + "loss": 0.9603, + "step": 22420 + }, + { + "epoch": 0.32, + "grad_norm": 0.6328125, + "learning_rate": 0.0001715281949430571, + "loss": 1.0527, + "step": 22425 + }, + { + "epoch": 0.32, + "grad_norm": 0.5859375, + "learning_rate": 0.000171510696502895, + "loss": 0.8258, + "step": 22430 + }, + { + "epoch": 0.32, + "grad_norm": 0.5703125, + "learning_rate": 0.00017149319358036107, + "loss": 1.0781, + "step": 22435 + }, + { + "epoch": 0.32, + "grad_norm": 0.5546875, + "learning_rate": 0.0001714756861765523, + "loss": 1.0145, + "step": 22440 + }, + { + "epoch": 0.32, + "grad_norm": 0.55078125, + "learning_rate": 0.00017145817429256612, + "loss": 0.8742, + "step": 22445 + }, + { + "epoch": 0.32, + "grad_norm": 0.6953125, + "learning_rate": 0.0001714406579295002, + "loss": 0.9866, + "step": 22450 + }, + { + "epoch": 0.32, + "grad_norm": 0.5234375, + "learning_rate": 0.0001714231370884525, + "loss": 0.8931, + "step": 22455 + }, + { + "epoch": 0.32, + "grad_norm": 0.60546875, + "learning_rate": 0.00017140561177052117, + "loss": 0.9762, + "step": 22460 + }, + { + "epoch": 0.32, + "grad_norm": 0.58203125, + "learning_rate": 0.0001713880819768048, + "loss": 1.0102, + "step": 22465 + }, + { + "epoch": 0.32, + "grad_norm": 0.515625, + "learning_rate": 0.00017137054770840213, + "loss": 1.1021, + "step": 22470 + }, + { + "epoch": 0.32, + "grad_norm": 0.625, + "learning_rate": 0.00017135300896641229, + "loss": 1.1629, + "step": 22475 + }, + { + "epoch": 0.32, + "grad_norm": 0.5546875, + "learning_rate": 0.00017133546575193452, + "loss": 1.0134, + "step": 22480 + }, + { + "epoch": 0.32, + "grad_norm": 0.57421875, + "learning_rate": 0.00017131791806606857, + "loss": 1.0137, + "step": 22485 + }, + { + "epoch": 0.32, + "grad_norm": 0.61328125, + "learning_rate": 0.00017130036590991426, + "loss": 0.9579, + "step": 22490 + }, + { + "epoch": 0.32, + "grad_norm": 0.55859375, + "learning_rate": 0.00017128280928457182, + "loss": 0.8749, + "step": 22495 + }, + { + "epoch": 0.32, + "grad_norm": 0.63671875, + "learning_rate": 0.0001712652481911417, + "loss": 1.0704, + "step": 22500 + }, + { + "epoch": 0.32, + "grad_norm": 0.72265625, + "learning_rate": 0.00017124768263072467, + "loss": 1.0315, + "step": 22505 + }, + { + "epoch": 0.32, + "grad_norm": 0.6640625, + "learning_rate": 0.00017123011260442174, + "loss": 1.0279, + "step": 22510 + }, + { + "epoch": 0.32, + "grad_norm": 0.57421875, + "learning_rate": 0.00017121253811333423, + "loss": 0.9216, + "step": 22515 + }, + { + "epoch": 0.32, + "grad_norm": 0.6171875, + "learning_rate": 0.00017119495915856375, + "loss": 1.0082, + "step": 22520 + }, + { + "epoch": 0.32, + "grad_norm": 0.5078125, + "learning_rate": 0.0001711773757412121, + "loss": 0.8658, + "step": 22525 + }, + { + "epoch": 0.32, + "grad_norm": 0.5234375, + "learning_rate": 0.00017115978786238153, + "loss": 1.0071, + "step": 22530 + }, + { + "epoch": 0.32, + "grad_norm": 0.609375, + "learning_rate": 0.00017114219552317436, + "loss": 0.9824, + "step": 22535 + }, + { + "epoch": 0.32, + "grad_norm": 0.49609375, + "learning_rate": 0.00017112459872469337, + "loss": 1.0176, + "step": 22540 + }, + { + "epoch": 0.32, + "grad_norm": 0.63671875, + "learning_rate": 0.00017110699746804154, + "loss": 0.9751, + "step": 22545 + }, + { + "epoch": 0.32, + "grad_norm": 0.5703125, + "learning_rate": 0.0001710893917543221, + "loss": 0.8675, + "step": 22550 + }, + { + "epoch": 0.32, + "grad_norm": 0.53515625, + "learning_rate": 0.00017107178158463863, + "loss": 0.9408, + "step": 22555 + }, + { + "epoch": 0.32, + "grad_norm": 0.58203125, + "learning_rate": 0.00017105416696009497, + "loss": 0.9809, + "step": 22560 + }, + { + "epoch": 0.32, + "grad_norm": 0.578125, + "learning_rate": 0.0001710365478817952, + "loss": 0.9696, + "step": 22565 + }, + { + "epoch": 0.32, + "grad_norm": 0.6015625, + "learning_rate": 0.0001710189243508437, + "loss": 0.9519, + "step": 22570 + }, + { + "epoch": 0.32, + "grad_norm": 0.490234375, + "learning_rate": 0.0001710012963683451, + "loss": 1.0546, + "step": 22575 + }, + { + "epoch": 0.32, + "grad_norm": 0.5703125, + "learning_rate": 0.00017098366393540442, + "loss": 0.9163, + "step": 22580 + }, + { + "epoch": 0.32, + "grad_norm": 0.54296875, + "learning_rate": 0.00017096602705312682, + "loss": 1.0133, + "step": 22585 + }, + { + "epoch": 0.32, + "grad_norm": 0.53125, + "learning_rate": 0.00017094838572261783, + "loss": 0.9241, + "step": 22590 + }, + { + "epoch": 0.32, + "grad_norm": 0.55078125, + "learning_rate": 0.00017093073994498318, + "loss": 0.9855, + "step": 22595 + }, + { + "epoch": 0.32, + "grad_norm": 0.546875, + "learning_rate": 0.00017091308972132905, + "loss": 0.9835, + "step": 22600 + }, + { + "epoch": 0.32, + "grad_norm": 0.58203125, + "learning_rate": 0.00017089543505276162, + "loss": 1.0074, + "step": 22605 + }, + { + "epoch": 0.32, + "grad_norm": 0.55859375, + "learning_rate": 0.0001708777759403876, + "loss": 0.8852, + "step": 22610 + }, + { + "epoch": 0.32, + "grad_norm": 0.55859375, + "learning_rate": 0.00017086011238531386, + "loss": 0.9527, + "step": 22615 + }, + { + "epoch": 0.32, + "grad_norm": 0.54296875, + "learning_rate": 0.0001708424443886476, + "loss": 0.9278, + "step": 22620 + }, + { + "epoch": 0.32, + "grad_norm": 0.6171875, + "learning_rate": 0.00017082477195149622, + "loss": 0.9386, + "step": 22625 + }, + { + "epoch": 0.32, + "grad_norm": 0.52734375, + "learning_rate": 0.0001708070950749675, + "loss": 0.8705, + "step": 22630 + }, + { + "epoch": 0.32, + "grad_norm": 0.59765625, + "learning_rate": 0.00017078941376016938, + "loss": 1.0218, + "step": 22635 + }, + { + "epoch": 0.32, + "grad_norm": 0.6328125, + "learning_rate": 0.00017077172800821018, + "loss": 1.1595, + "step": 22640 + }, + { + "epoch": 0.32, + "grad_norm": 0.7890625, + "learning_rate": 0.00017075403782019848, + "loss": 1.0909, + "step": 22645 + }, + { + "epoch": 0.32, + "grad_norm": 0.53125, + "learning_rate": 0.00017073634319724309, + "loss": 0.8685, + "step": 22650 + }, + { + "epoch": 0.32, + "grad_norm": 0.60546875, + "learning_rate": 0.00017071864414045318, + "loss": 0.9616, + "step": 22655 + }, + { + "epoch": 0.33, + "grad_norm": 0.6484375, + "learning_rate": 0.0001707009406509381, + "loss": 0.9912, + "step": 22660 + }, + { + "epoch": 0.33, + "grad_norm": 0.5703125, + "learning_rate": 0.00017068323272980752, + "loss": 0.9322, + "step": 22665 + }, + { + "epoch": 0.33, + "grad_norm": 0.58984375, + "learning_rate": 0.00017066552037817143, + "loss": 1.0414, + "step": 22670 + }, + { + "epoch": 0.33, + "grad_norm": 0.51953125, + "learning_rate": 0.00017064780359714004, + "loss": 0.9212, + "step": 22675 + }, + { + "epoch": 0.33, + "grad_norm": 0.671875, + "learning_rate": 0.00017063008238782387, + "loss": 1.0547, + "step": 22680 + }, + { + "epoch": 0.33, + "grad_norm": 0.54296875, + "learning_rate": 0.00017061235675133366, + "loss": 0.8523, + "step": 22685 + }, + { + "epoch": 0.33, + "grad_norm": 0.5625, + "learning_rate": 0.00017059462668878055, + "loss": 0.9595, + "step": 22690 + }, + { + "epoch": 0.33, + "grad_norm": 0.515625, + "learning_rate": 0.0001705768922012758, + "loss": 0.9903, + "step": 22695 + }, + { + "epoch": 0.33, + "grad_norm": 0.62109375, + "learning_rate": 0.0001705591532899311, + "loss": 1.0138, + "step": 22700 + }, + { + "epoch": 0.33, + "grad_norm": 0.640625, + "learning_rate": 0.0001705414099558583, + "loss": 0.9734, + "step": 22705 + }, + { + "epoch": 0.33, + "grad_norm": 0.68359375, + "learning_rate": 0.00017052366220016957, + "loss": 1.0777, + "step": 22710 + }, + { + "epoch": 0.33, + "grad_norm": 0.69921875, + "learning_rate": 0.0001705059100239774, + "loss": 1.0454, + "step": 22715 + }, + { + "epoch": 0.33, + "grad_norm": 0.59375, + "learning_rate": 0.00017048815342839447, + "loss": 1.1218, + "step": 22720 + }, + { + "epoch": 0.33, + "grad_norm": 0.6328125, + "learning_rate": 0.00017047039241453382, + "loss": 0.9762, + "step": 22725 + }, + { + "epoch": 0.33, + "grad_norm": 0.61328125, + "learning_rate": 0.0001704526269835087, + "loss": 0.9931, + "step": 22730 + }, + { + "epoch": 0.33, + "grad_norm": 0.50390625, + "learning_rate": 0.0001704348571364327, + "loss": 0.919, + "step": 22735 + }, + { + "epoch": 0.33, + "grad_norm": 0.5234375, + "learning_rate": 0.00017041708287441959, + "loss": 0.9993, + "step": 22740 + }, + { + "epoch": 0.33, + "grad_norm": 0.53515625, + "learning_rate": 0.0001703993041985836, + "loss": 1.067, + "step": 22745 + }, + { + "epoch": 0.33, + "grad_norm": 0.515625, + "learning_rate": 0.00017038152111003898, + "loss": 0.9398, + "step": 22750 + }, + { + "epoch": 0.33, + "grad_norm": 0.54296875, + "learning_rate": 0.00017036373360990048, + "loss": 1.1059, + "step": 22755 + }, + { + "epoch": 0.33, + "grad_norm": 0.6328125, + "learning_rate": 0.00017034594169928303, + "loss": 1.0214, + "step": 22760 + }, + { + "epoch": 0.33, + "grad_norm": 0.625, + "learning_rate": 0.00017032814537930183, + "loss": 0.9617, + "step": 22765 + }, + { + "epoch": 0.33, + "grad_norm": 0.46484375, + "learning_rate": 0.00017031034465107237, + "loss": 0.8269, + "step": 22770 + }, + { + "epoch": 0.33, + "grad_norm": 0.58203125, + "learning_rate": 0.00017029253951571046, + "loss": 0.8859, + "step": 22775 + }, + { + "epoch": 0.33, + "grad_norm": 0.5234375, + "learning_rate": 0.00017027472997433208, + "loss": 0.9395, + "step": 22780 + }, + { + "epoch": 0.33, + "grad_norm": 0.5546875, + "learning_rate": 0.0001702569160280536, + "loss": 0.9338, + "step": 22785 + }, + { + "epoch": 0.33, + "grad_norm": 0.515625, + "learning_rate": 0.00017023909767799163, + "loss": 0.9962, + "step": 22790 + }, + { + "epoch": 0.33, + "grad_norm": 0.5234375, + "learning_rate": 0.000170221274925263, + "loss": 0.8451, + "step": 22795 + }, + { + "epoch": 0.33, + "grad_norm": 0.6328125, + "learning_rate": 0.00017020344777098488, + "loss": 1.0406, + "step": 22800 + }, + { + "epoch": 0.33, + "grad_norm": 0.65625, + "learning_rate": 0.0001701856162162747, + "loss": 1.0097, + "step": 22805 + }, + { + "epoch": 0.33, + "grad_norm": 0.69140625, + "learning_rate": 0.0001701677802622502, + "loss": 0.9807, + "step": 22810 + }, + { + "epoch": 0.33, + "grad_norm": 0.55078125, + "learning_rate": 0.00017014993991002926, + "loss": 0.886, + "step": 22815 + }, + { + "epoch": 0.33, + "grad_norm": 0.546875, + "learning_rate": 0.00017013209516073024, + "loss": 1.0094, + "step": 22820 + }, + { + "epoch": 0.33, + "grad_norm": 0.51953125, + "learning_rate": 0.00017011424601547158, + "loss": 1.0373, + "step": 22825 + }, + { + "epoch": 0.33, + "grad_norm": 0.462890625, + "learning_rate": 0.00017009639247537214, + "loss": 0.9939, + "step": 22830 + }, + { + "epoch": 0.33, + "grad_norm": 0.546875, + "learning_rate": 0.00017007853454155102, + "loss": 1.0919, + "step": 22835 + }, + { + "epoch": 0.33, + "grad_norm": 0.51953125, + "learning_rate": 0.00017006067221512748, + "loss": 0.9612, + "step": 22840 + }, + { + "epoch": 0.33, + "grad_norm": 0.625, + "learning_rate": 0.00017004280549722127, + "loss": 0.9532, + "step": 22845 + }, + { + "epoch": 0.33, + "grad_norm": 0.546875, + "learning_rate": 0.0001700249343889522, + "loss": 0.8965, + "step": 22850 + }, + { + "epoch": 0.33, + "grad_norm": 0.60546875, + "learning_rate": 0.0001700070588914405, + "loss": 1.0316, + "step": 22855 + }, + { + "epoch": 0.33, + "grad_norm": 0.578125, + "learning_rate": 0.00016998917900580665, + "loss": 1.1005, + "step": 22860 + }, + { + "epoch": 0.33, + "grad_norm": 0.515625, + "learning_rate": 0.00016997129473317132, + "loss": 0.9847, + "step": 22865 + }, + { + "epoch": 0.33, + "grad_norm": 0.7421875, + "learning_rate": 0.00016995340607465554, + "loss": 0.8212, + "step": 22870 + }, + { + "epoch": 0.33, + "grad_norm": 0.5390625, + "learning_rate": 0.00016993551303138063, + "loss": 0.9523, + "step": 22875 + }, + { + "epoch": 0.33, + "grad_norm": 0.56640625, + "learning_rate": 0.00016991761560446807, + "loss": 0.942, + "step": 22880 + }, + { + "epoch": 0.33, + "grad_norm": 0.5625, + "learning_rate": 0.00016989971379503978, + "loss": 0.9276, + "step": 22885 + }, + { + "epoch": 0.33, + "grad_norm": 0.56640625, + "learning_rate": 0.0001698818076042178, + "loss": 0.9119, + "step": 22890 + }, + { + "epoch": 0.33, + "grad_norm": 0.55078125, + "learning_rate": 0.0001698638970331245, + "loss": 0.938, + "step": 22895 + }, + { + "epoch": 0.33, + "grad_norm": 0.53125, + "learning_rate": 0.0001698459820828826, + "loss": 0.9448, + "step": 22900 + }, + { + "epoch": 0.33, + "grad_norm": 0.6953125, + "learning_rate": 0.00016982806275461497, + "loss": 1.0021, + "step": 22905 + }, + { + "epoch": 0.33, + "grad_norm": 0.5859375, + "learning_rate": 0.00016981013904944487, + "loss": 1.0421, + "step": 22910 + }, + { + "epoch": 0.33, + "grad_norm": 0.63671875, + "learning_rate": 0.00016979221096849573, + "loss": 1.02, + "step": 22915 + }, + { + "epoch": 0.33, + "grad_norm": 0.59765625, + "learning_rate": 0.00016977427851289133, + "loss": 1.1506, + "step": 22920 + }, + { + "epoch": 0.33, + "grad_norm": 0.66796875, + "learning_rate": 0.00016975634168375566, + "loss": 0.867, + "step": 22925 + }, + { + "epoch": 0.33, + "grad_norm": 0.56640625, + "learning_rate": 0.00016973840048221307, + "loss": 0.999, + "step": 22930 + }, + { + "epoch": 0.33, + "grad_norm": 0.58203125, + "learning_rate": 0.00016972045490938812, + "loss": 0.9237, + "step": 22935 + }, + { + "epoch": 0.33, + "grad_norm": 0.76953125, + "learning_rate": 0.00016970250496640564, + "loss": 1.0466, + "step": 22940 + }, + { + "epoch": 0.33, + "grad_norm": 0.49609375, + "learning_rate": 0.00016968455065439076, + "loss": 0.832, + "step": 22945 + }, + { + "epoch": 0.33, + "grad_norm": 0.7578125, + "learning_rate": 0.00016966659197446889, + "loss": 0.9661, + "step": 22950 + }, + { + "epoch": 0.33, + "grad_norm": 0.51953125, + "learning_rate": 0.0001696486289277657, + "loss": 0.8908, + "step": 22955 + }, + { + "epoch": 0.33, + "grad_norm": 0.54296875, + "learning_rate": 0.0001696306615154071, + "loss": 0.9853, + "step": 22960 + }, + { + "epoch": 0.33, + "grad_norm": 0.58984375, + "learning_rate": 0.00016961268973851937, + "loss": 1.0202, + "step": 22965 + }, + { + "epoch": 0.33, + "grad_norm": 0.640625, + "learning_rate": 0.00016959471359822895, + "loss": 0.8762, + "step": 22970 + }, + { + "epoch": 0.33, + "grad_norm": 0.57421875, + "learning_rate": 0.00016957673309566258, + "loss": 1.0241, + "step": 22975 + }, + { + "epoch": 0.33, + "grad_norm": 0.578125, + "learning_rate": 0.00016955874823194737, + "loss": 1.0761, + "step": 22980 + }, + { + "epoch": 0.33, + "grad_norm": 0.53125, + "learning_rate": 0.0001695407590082106, + "loss": 1.0919, + "step": 22985 + }, + { + "epoch": 0.33, + "grad_norm": 0.61328125, + "learning_rate": 0.00016952276542557985, + "loss": 1.0389, + "step": 22990 + }, + { + "epoch": 0.33, + "grad_norm": 0.59375, + "learning_rate": 0.000169504767485183, + "loss": 1.0269, + "step": 22995 + }, + { + "epoch": 0.33, + "grad_norm": 0.55078125, + "learning_rate": 0.00016948676518814816, + "loss": 0.9425, + "step": 23000 + }, + { + "epoch": 0.33, + "grad_norm": 0.7578125, + "learning_rate": 0.0001694687585356037, + "loss": 1.1517, + "step": 23005 + }, + { + "epoch": 0.33, + "grad_norm": 0.6171875, + "learning_rate": 0.0001694507475286784, + "loss": 0.9304, + "step": 23010 + }, + { + "epoch": 0.33, + "grad_norm": 0.5546875, + "learning_rate": 0.0001694327321685011, + "loss": 0.8311, + "step": 23015 + }, + { + "epoch": 0.33, + "grad_norm": 0.56640625, + "learning_rate": 0.0001694147124562011, + "loss": 0.9417, + "step": 23020 + }, + { + "epoch": 0.33, + "grad_norm": 0.5703125, + "learning_rate": 0.00016939668839290785, + "loss": 0.871, + "step": 23025 + }, + { + "epoch": 0.33, + "grad_norm": 0.671875, + "learning_rate": 0.00016937865997975116, + "loss": 0.9416, + "step": 23030 + }, + { + "epoch": 0.33, + "grad_norm": 0.68359375, + "learning_rate": 0.000169360627217861, + "loss": 0.9172, + "step": 23035 + }, + { + "epoch": 0.33, + "grad_norm": 0.515625, + "learning_rate": 0.00016934259010836775, + "loss": 1.0004, + "step": 23040 + }, + { + "epoch": 0.33, + "grad_norm": 0.5703125, + "learning_rate": 0.000169324548652402, + "loss": 1.0068, + "step": 23045 + }, + { + "epoch": 0.33, + "grad_norm": 0.62890625, + "learning_rate": 0.00016930650285109454, + "loss": 0.9082, + "step": 23050 + }, + { + "epoch": 0.33, + "grad_norm": 0.640625, + "learning_rate": 0.0001692884527055766, + "loss": 0.9053, + "step": 23055 + }, + { + "epoch": 0.33, + "grad_norm": 0.5625, + "learning_rate": 0.0001692703982169795, + "loss": 1.0665, + "step": 23060 + }, + { + "epoch": 0.33, + "grad_norm": 0.54296875, + "learning_rate": 0.00016925233938643497, + "loss": 1.0995, + "step": 23065 + }, + { + "epoch": 0.33, + "grad_norm": 0.625, + "learning_rate": 0.00016923427621507491, + "loss": 0.954, + "step": 23070 + }, + { + "epoch": 0.33, + "grad_norm": 0.59765625, + "learning_rate": 0.0001692162087040316, + "loss": 1.0213, + "step": 23075 + }, + { + "epoch": 0.33, + "grad_norm": 0.56640625, + "learning_rate": 0.00016919813685443744, + "loss": 0.9033, + "step": 23080 + }, + { + "epoch": 0.33, + "grad_norm": 0.56640625, + "learning_rate": 0.0001691800606674253, + "loss": 1.0971, + "step": 23085 + }, + { + "epoch": 0.33, + "grad_norm": 0.53125, + "learning_rate": 0.00016916198014412816, + "loss": 1.0208, + "step": 23090 + }, + { + "epoch": 0.33, + "grad_norm": 0.52734375, + "learning_rate": 0.00016914389528567932, + "loss": 0.8997, + "step": 23095 + }, + { + "epoch": 0.33, + "grad_norm": 0.578125, + "learning_rate": 0.0001691258060932124, + "loss": 0.8185, + "step": 23100 + }, + { + "epoch": 0.33, + "grad_norm": 0.54296875, + "learning_rate": 0.0001691077125678612, + "loss": 0.9328, + "step": 23105 + }, + { + "epoch": 0.33, + "grad_norm": 0.55078125, + "learning_rate": 0.0001690896147107599, + "loss": 0.8601, + "step": 23110 + }, + { + "epoch": 0.33, + "grad_norm": 0.5859375, + "learning_rate": 0.00016907151252304283, + "loss": 0.9554, + "step": 23115 + }, + { + "epoch": 0.33, + "grad_norm": 0.65234375, + "learning_rate": 0.0001690534060058447, + "loss": 0.931, + "step": 23120 + }, + { + "epoch": 0.33, + "grad_norm": 0.5546875, + "learning_rate": 0.00016903529516030044, + "loss": 0.8989, + "step": 23125 + }, + { + "epoch": 0.33, + "grad_norm": 0.6015625, + "learning_rate": 0.00016901717998754528, + "loss": 1.0203, + "step": 23130 + }, + { + "epoch": 0.33, + "grad_norm": 0.6171875, + "learning_rate": 0.00016899906048871462, + "loss": 1.0554, + "step": 23135 + }, + { + "epoch": 0.33, + "grad_norm": 0.53515625, + "learning_rate": 0.0001689809366649443, + "loss": 1.0396, + "step": 23140 + }, + { + "epoch": 0.33, + "grad_norm": 0.6015625, + "learning_rate": 0.0001689628085173703, + "loss": 0.9693, + "step": 23145 + }, + { + "epoch": 0.33, + "grad_norm": 0.546875, + "learning_rate": 0.00016894467604712892, + "loss": 0.8956, + "step": 23150 + }, + { + "epoch": 0.33, + "grad_norm": 0.578125, + "learning_rate": 0.00016892653925535672, + "loss": 0.9386, + "step": 23155 + }, + { + "epoch": 0.33, + "grad_norm": 0.60546875, + "learning_rate": 0.00016890839814319057, + "loss": 1.0394, + "step": 23160 + }, + { + "epoch": 0.33, + "grad_norm": 0.59375, + "learning_rate": 0.0001688902527117675, + "loss": 0.9978, + "step": 23165 + }, + { + "epoch": 0.33, + "grad_norm": 0.484375, + "learning_rate": 0.00016887210296222496, + "loss": 0.8688, + "step": 23170 + }, + { + "epoch": 0.33, + "grad_norm": 0.56640625, + "learning_rate": 0.00016885394889570057, + "loss": 1.0429, + "step": 23175 + }, + { + "epoch": 0.33, + "grad_norm": 0.66015625, + "learning_rate": 0.0001688357905133322, + "loss": 1.021, + "step": 23180 + }, + { + "epoch": 0.33, + "grad_norm": 0.73828125, + "learning_rate": 0.00016881762781625813, + "loss": 0.8929, + "step": 23185 + }, + { + "epoch": 0.33, + "grad_norm": 0.66015625, + "learning_rate": 0.00016879946080561675, + "loss": 1.0132, + "step": 23190 + }, + { + "epoch": 0.33, + "grad_norm": 0.5625, + "learning_rate": 0.0001687812894825468, + "loss": 1.0581, + "step": 23195 + }, + { + "epoch": 0.33, + "grad_norm": 0.54296875, + "learning_rate": 0.00016876311384818733, + "loss": 0.9398, + "step": 23200 + }, + { + "epoch": 0.33, + "grad_norm": 0.5390625, + "learning_rate": 0.00016874493390367756, + "loss": 1.0428, + "step": 23205 + }, + { + "epoch": 0.33, + "grad_norm": 0.56640625, + "learning_rate": 0.000168726749650157, + "loss": 0.972, + "step": 23210 + }, + { + "epoch": 0.33, + "grad_norm": 0.67578125, + "learning_rate": 0.00016870856108876554, + "loss": 0.9241, + "step": 23215 + }, + { + "epoch": 0.33, + "grad_norm": 0.61328125, + "learning_rate": 0.00016869036822064323, + "loss": 0.9303, + "step": 23220 + }, + { + "epoch": 0.33, + "grad_norm": 0.6328125, + "learning_rate": 0.00016867217104693036, + "loss": 0.9909, + "step": 23225 + }, + { + "epoch": 0.33, + "grad_norm": 0.62890625, + "learning_rate": 0.0001686539695687676, + "loss": 0.9815, + "step": 23230 + }, + { + "epoch": 0.33, + "grad_norm": 0.578125, + "learning_rate": 0.00016863576378729588, + "loss": 1.0365, + "step": 23235 + }, + { + "epoch": 0.33, + "grad_norm": 0.546875, + "learning_rate": 0.00016861755370365628, + "loss": 0.8689, + "step": 23240 + }, + { + "epoch": 0.33, + "grad_norm": 0.51171875, + "learning_rate": 0.00016859933931899027, + "loss": 0.9151, + "step": 23245 + }, + { + "epoch": 0.33, + "grad_norm": 0.58984375, + "learning_rate": 0.00016858112063443956, + "loss": 0.9882, + "step": 23250 + }, + { + "epoch": 0.33, + "grad_norm": 0.59765625, + "learning_rate": 0.0001685628976511461, + "loss": 1.182, + "step": 23255 + }, + { + "epoch": 0.33, + "grad_norm": 0.546875, + "learning_rate": 0.00016854467037025214, + "loss": 1.0126, + "step": 23260 + }, + { + "epoch": 0.33, + "grad_norm": 0.51953125, + "learning_rate": 0.00016852643879290016, + "loss": 0.9376, + "step": 23265 + }, + { + "epoch": 0.33, + "grad_norm": 0.609375, + "learning_rate": 0.00016850820292023294, + "loss": 0.9557, + "step": 23270 + }, + { + "epoch": 0.33, + "grad_norm": 0.6015625, + "learning_rate": 0.00016848996275339352, + "loss": 0.9623, + "step": 23275 + }, + { + "epoch": 0.33, + "grad_norm": 0.60546875, + "learning_rate": 0.00016847171829352528, + "loss": 1.0208, + "step": 23280 + }, + { + "epoch": 0.33, + "grad_norm": 0.5859375, + "learning_rate": 0.00016845346954177172, + "loss": 0.9661, + "step": 23285 + }, + { + "epoch": 0.33, + "grad_norm": 0.53515625, + "learning_rate": 0.00016843521649927674, + "loss": 1.0168, + "step": 23290 + }, + { + "epoch": 0.33, + "grad_norm": 0.5859375, + "learning_rate": 0.00016841695916718443, + "loss": 0.9827, + "step": 23295 + }, + { + "epoch": 0.33, + "grad_norm": 0.61328125, + "learning_rate": 0.0001683986975466392, + "loss": 1.0155, + "step": 23300 + }, + { + "epoch": 0.33, + "grad_norm": 0.55078125, + "learning_rate": 0.00016838043163878573, + "loss": 0.9813, + "step": 23305 + }, + { + "epoch": 0.33, + "grad_norm": 0.67578125, + "learning_rate": 0.00016836216144476893, + "loss": 0.8198, + "step": 23310 + }, + { + "epoch": 0.33, + "grad_norm": 0.5546875, + "learning_rate": 0.000168343886965734, + "loss": 1.0811, + "step": 23315 + }, + { + "epoch": 0.33, + "grad_norm": 0.5703125, + "learning_rate": 0.00016832560820282636, + "loss": 0.9135, + "step": 23320 + }, + { + "epoch": 0.33, + "grad_norm": 0.5390625, + "learning_rate": 0.0001683073251571918, + "loss": 0.9304, + "step": 23325 + }, + { + "epoch": 0.33, + "grad_norm": 0.69140625, + "learning_rate": 0.0001682890378299763, + "loss": 1.0073, + "step": 23330 + }, + { + "epoch": 0.33, + "grad_norm": 0.58984375, + "learning_rate": 0.00016827074622232616, + "loss": 0.99, + "step": 23335 + }, + { + "epoch": 0.33, + "grad_norm": 0.55859375, + "learning_rate": 0.00016825245033538785, + "loss": 0.8615, + "step": 23340 + }, + { + "epoch": 0.33, + "grad_norm": 0.51171875, + "learning_rate": 0.00016823415017030825, + "loss": 0.8889, + "step": 23345 + }, + { + "epoch": 0.33, + "grad_norm": 0.6015625, + "learning_rate": 0.00016821584572823442, + "loss": 0.982, + "step": 23350 + }, + { + "epoch": 0.34, + "grad_norm": 0.6015625, + "learning_rate": 0.00016819753701031363, + "loss": 1.0136, + "step": 23355 + }, + { + "epoch": 0.34, + "grad_norm": 0.5546875, + "learning_rate": 0.00016817922401769363, + "loss": 1.0189, + "step": 23360 + }, + { + "epoch": 0.34, + "grad_norm": 0.54296875, + "learning_rate": 0.00016816090675152214, + "loss": 1.0293, + "step": 23365 + }, + { + "epoch": 0.34, + "grad_norm": 0.62109375, + "learning_rate": 0.00016814258521294744, + "loss": 0.9796, + "step": 23370 + }, + { + "epoch": 0.34, + "grad_norm": 0.640625, + "learning_rate": 0.00016812425940311787, + "loss": 0.9483, + "step": 23375 + }, + { + "epoch": 0.34, + "grad_norm": 0.52734375, + "learning_rate": 0.00016810592932318212, + "loss": 0.926, + "step": 23380 + }, + { + "epoch": 0.34, + "grad_norm": 0.5234375, + "learning_rate": 0.0001680875949742892, + "loss": 0.907, + "step": 23385 + }, + { + "epoch": 0.34, + "grad_norm": 0.474609375, + "learning_rate": 0.0001680692563575882, + "loss": 0.9811, + "step": 23390 + }, + { + "epoch": 0.34, + "grad_norm": 0.5546875, + "learning_rate": 0.00016805091347422876, + "loss": 1.0047, + "step": 23395 + }, + { + "epoch": 0.34, + "grad_norm": 0.59375, + "learning_rate": 0.00016803256632536053, + "loss": 0.8818, + "step": 23400 + }, + { + "epoch": 0.34, + "grad_norm": 0.609375, + "learning_rate": 0.00016801421491213358, + "loss": 1.011, + "step": 23405 + }, + { + "epoch": 0.34, + "grad_norm": 0.5390625, + "learning_rate": 0.00016799585923569816, + "loss": 0.9497, + "step": 23410 + }, + { + "epoch": 0.34, + "grad_norm": 0.5625, + "learning_rate": 0.00016797749929720485, + "loss": 1.0023, + "step": 23415 + }, + { + "epoch": 0.34, + "grad_norm": 0.5703125, + "learning_rate": 0.00016795913509780447, + "loss": 0.8086, + "step": 23420 + }, + { + "epoch": 0.34, + "grad_norm": 0.6015625, + "learning_rate": 0.0001679407666386481, + "loss": 0.9474, + "step": 23425 + }, + { + "epoch": 0.34, + "grad_norm": 0.5, + "learning_rate": 0.00016792239392088708, + "loss": 1.0872, + "step": 23430 + }, + { + "epoch": 0.34, + "grad_norm": 0.56640625, + "learning_rate": 0.00016790401694567305, + "loss": 0.9645, + "step": 23435 + }, + { + "epoch": 0.34, + "grad_norm": 0.6171875, + "learning_rate": 0.00016788563571415793, + "loss": 0.9173, + "step": 23440 + }, + { + "epoch": 0.34, + "grad_norm": 0.61328125, + "learning_rate": 0.00016786725022749382, + "loss": 0.8202, + "step": 23445 + }, + { + "epoch": 0.34, + "grad_norm": 0.56640625, + "learning_rate": 0.00016784886048683322, + "loss": 0.9017, + "step": 23450 + }, + { + "epoch": 0.34, + "grad_norm": 0.6484375, + "learning_rate": 0.00016783046649332872, + "loss": 1.0489, + "step": 23455 + }, + { + "epoch": 0.34, + "grad_norm": 0.5859375, + "learning_rate": 0.00016781206824813337, + "loss": 0.923, + "step": 23460 + }, + { + "epoch": 0.34, + "grad_norm": 0.7265625, + "learning_rate": 0.00016779366575240032, + "loss": 0.9346, + "step": 23465 + }, + { + "epoch": 0.34, + "grad_norm": 0.59765625, + "learning_rate": 0.0001677752590072831, + "loss": 0.9595, + "step": 23470 + }, + { + "epoch": 0.34, + "grad_norm": 0.60546875, + "learning_rate": 0.00016775684801393546, + "loss": 0.8984, + "step": 23475 + }, + { + "epoch": 0.34, + "grad_norm": 0.56640625, + "learning_rate": 0.00016773843277351138, + "loss": 1.0412, + "step": 23480 + }, + { + "epoch": 0.34, + "grad_norm": 0.59765625, + "learning_rate": 0.00016772001328716523, + "loss": 0.9927, + "step": 23485 + }, + { + "epoch": 0.34, + "grad_norm": 0.53515625, + "learning_rate": 0.00016770158955605152, + "loss": 0.8891, + "step": 23490 + }, + { + "epoch": 0.34, + "grad_norm": 0.70703125, + "learning_rate": 0.00016768316158132505, + "loss": 1.0668, + "step": 23495 + }, + { + "epoch": 0.34, + "grad_norm": 0.54296875, + "learning_rate": 0.00016766472936414093, + "loss": 0.9172, + "step": 23500 + }, + { + "epoch": 0.34, + "grad_norm": 0.57421875, + "learning_rate": 0.0001676462929056545, + "loss": 0.9436, + "step": 23505 + }, + { + "epoch": 0.34, + "grad_norm": 0.640625, + "learning_rate": 0.00016762785220702142, + "loss": 1.0329, + "step": 23510 + }, + { + "epoch": 0.34, + "grad_norm": 0.55078125, + "learning_rate": 0.0001676094072693975, + "loss": 0.862, + "step": 23515 + }, + { + "epoch": 0.34, + "grad_norm": 0.6015625, + "learning_rate": 0.000167590958093939, + "loss": 1.1708, + "step": 23520 + }, + { + "epoch": 0.34, + "grad_norm": 0.578125, + "learning_rate": 0.0001675725046818022, + "loss": 1.0274, + "step": 23525 + }, + { + "epoch": 0.34, + "grad_norm": 0.578125, + "learning_rate": 0.00016755404703414388, + "loss": 0.9976, + "step": 23530 + }, + { + "epoch": 0.34, + "grad_norm": 0.6328125, + "learning_rate": 0.00016753558515212095, + "loss": 1.0193, + "step": 23535 + }, + { + "epoch": 0.34, + "grad_norm": 0.5390625, + "learning_rate": 0.00016751711903689062, + "loss": 1.018, + "step": 23540 + }, + { + "epoch": 0.34, + "grad_norm": 0.54296875, + "learning_rate": 0.00016749864868961038, + "loss": 1.0502, + "step": 23545 + }, + { + "epoch": 0.34, + "grad_norm": 0.625, + "learning_rate": 0.00016748017411143798, + "loss": 1.0112, + "step": 23550 + }, + { + "epoch": 0.34, + "grad_norm": 0.56640625, + "learning_rate": 0.00016746169530353137, + "loss": 0.9149, + "step": 23555 + }, + { + "epoch": 0.34, + "grad_norm": 0.578125, + "learning_rate": 0.00016744321226704888, + "loss": 1.0269, + "step": 23560 + }, + { + "epoch": 0.34, + "grad_norm": 0.5234375, + "learning_rate": 0.00016742472500314904, + "loss": 0.8589, + "step": 23565 + }, + { + "epoch": 0.34, + "grad_norm": 0.6171875, + "learning_rate": 0.00016740623351299067, + "loss": 0.9598, + "step": 23570 + }, + { + "epoch": 0.34, + "grad_norm": 0.65625, + "learning_rate": 0.00016738773779773278, + "loss": 1.0552, + "step": 23575 + }, + { + "epoch": 0.34, + "grad_norm": 0.5546875, + "learning_rate": 0.00016736923785853476, + "loss": 1.0401, + "step": 23580 + }, + { + "epoch": 0.34, + "grad_norm": 0.6640625, + "learning_rate": 0.00016735073369655615, + "loss": 1.0078, + "step": 23585 + }, + { + "epoch": 0.34, + "grad_norm": 0.61328125, + "learning_rate": 0.0001673322253129569, + "loss": 0.9583, + "step": 23590 + }, + { + "epoch": 0.34, + "grad_norm": 0.5390625, + "learning_rate": 0.00016731371270889707, + "loss": 0.9447, + "step": 23595 + }, + { + "epoch": 0.34, + "grad_norm": 0.53125, + "learning_rate": 0.00016729519588553704, + "loss": 0.8466, + "step": 23600 + }, + { + "epoch": 0.34, + "grad_norm": 0.53515625, + "learning_rate": 0.00016727667484403748, + "loss": 0.9608, + "step": 23605 + }, + { + "epoch": 0.34, + "grad_norm": 0.6015625, + "learning_rate": 0.00016725814958555932, + "loss": 0.9509, + "step": 23610 + }, + { + "epoch": 0.34, + "grad_norm": 0.6015625, + "learning_rate": 0.00016723962011126376, + "loss": 0.9129, + "step": 23615 + }, + { + "epoch": 0.34, + "grad_norm": 0.51171875, + "learning_rate": 0.00016722108642231224, + "loss": 0.9439, + "step": 23620 + }, + { + "epoch": 0.34, + "grad_norm": 0.73046875, + "learning_rate": 0.00016720254851986647, + "loss": 1.2049, + "step": 23625 + }, + { + "epoch": 0.34, + "grad_norm": 0.578125, + "learning_rate": 0.0001671840064050884, + "loss": 1.0429, + "step": 23630 + }, + { + "epoch": 0.34, + "grad_norm": 0.6328125, + "learning_rate": 0.0001671654600791403, + "loss": 0.9667, + "step": 23635 + }, + { + "epoch": 0.34, + "grad_norm": 0.5546875, + "learning_rate": 0.00016714690954318465, + "loss": 0.9415, + "step": 23640 + }, + { + "epoch": 0.34, + "grad_norm": 0.5625, + "learning_rate": 0.00016712835479838428, + "loss": 0.9917, + "step": 23645 + }, + { + "epoch": 0.34, + "grad_norm": 0.50390625, + "learning_rate": 0.00016710979584590215, + "loss": 0.9524, + "step": 23650 + }, + { + "epoch": 0.34, + "grad_norm": 0.54296875, + "learning_rate": 0.00016709123268690158, + "loss": 1.0102, + "step": 23655 + }, + { + "epoch": 0.34, + "grad_norm": 0.54296875, + "learning_rate": 0.00016707266532254615, + "loss": 1.0184, + "step": 23660 + }, + { + "epoch": 0.34, + "grad_norm": 0.5546875, + "learning_rate": 0.00016705409375399963, + "loss": 0.9577, + "step": 23665 + }, + { + "epoch": 0.34, + "grad_norm": 0.55859375, + "learning_rate": 0.00016703551798242621, + "loss": 0.9503, + "step": 23670 + }, + { + "epoch": 0.34, + "grad_norm": 0.5078125, + "learning_rate": 0.00016701693800899014, + "loss": 1.2045, + "step": 23675 + }, + { + "epoch": 0.34, + "grad_norm": 0.515625, + "learning_rate": 0.00016699835383485604, + "loss": 1.0171, + "step": 23680 + }, + { + "epoch": 0.34, + "grad_norm": 0.52734375, + "learning_rate": 0.00016697976546118886, + "loss": 0.859, + "step": 23685 + }, + { + "epoch": 0.34, + "grad_norm": 0.55859375, + "learning_rate": 0.00016696117288915368, + "loss": 0.9017, + "step": 23690 + }, + { + "epoch": 0.34, + "grad_norm": 0.53125, + "learning_rate": 0.00016694257611991594, + "loss": 1.0393, + "step": 23695 + }, + { + "epoch": 0.34, + "grad_norm": 0.546875, + "learning_rate": 0.00016692397515464125, + "loss": 0.9315, + "step": 23700 + }, + { + "epoch": 0.34, + "grad_norm": 0.703125, + "learning_rate": 0.00016690536999449561, + "loss": 0.933, + "step": 23705 + }, + { + "epoch": 0.34, + "grad_norm": 0.5078125, + "learning_rate": 0.00016688676064064516, + "loss": 0.8206, + "step": 23710 + }, + { + "epoch": 0.34, + "grad_norm": 0.54296875, + "learning_rate": 0.0001668681470942564, + "loss": 0.9649, + "step": 23715 + }, + { + "epoch": 0.34, + "grad_norm": 0.53515625, + "learning_rate": 0.000166849529356496, + "loss": 0.939, + "step": 23720 + }, + { + "epoch": 0.34, + "grad_norm": 0.51953125, + "learning_rate": 0.00016683090742853097, + "loss": 0.9569, + "step": 23725 + }, + { + "epoch": 0.34, + "grad_norm": 0.55078125, + "learning_rate": 0.00016681228131152856, + "loss": 0.9348, + "step": 23730 + }, + { + "epoch": 0.34, + "grad_norm": 0.6015625, + "learning_rate": 0.00016679365100665626, + "loss": 1.0092, + "step": 23735 + }, + { + "epoch": 0.34, + "grad_norm": 0.5390625, + "learning_rate": 0.00016677501651508184, + "loss": 0.9932, + "step": 23740 + }, + { + "epoch": 0.34, + "grad_norm": 0.62890625, + "learning_rate": 0.0001667563778379733, + "loss": 0.9631, + "step": 23745 + }, + { + "epoch": 0.34, + "grad_norm": 0.5234375, + "learning_rate": 0.000166737734976499, + "loss": 1.0865, + "step": 23750 + }, + { + "epoch": 0.34, + "grad_norm": 0.55859375, + "learning_rate": 0.0001667190879318275, + "loss": 1.0609, + "step": 23755 + }, + { + "epoch": 0.34, + "grad_norm": 0.6015625, + "learning_rate": 0.00016670043670512753, + "loss": 1.0621, + "step": 23760 + }, + { + "epoch": 0.34, + "grad_norm": 0.66015625, + "learning_rate": 0.00016668178129756824, + "loss": 1.0944, + "step": 23765 + }, + { + "epoch": 0.34, + "grad_norm": 0.5703125, + "learning_rate": 0.00016666312171031896, + "loss": 0.8663, + "step": 23770 + }, + { + "epoch": 0.34, + "grad_norm": 0.54296875, + "learning_rate": 0.00016664445794454928, + "loss": 1.04, + "step": 23775 + }, + { + "epoch": 0.34, + "grad_norm": 0.5703125, + "learning_rate": 0.00016662579000142907, + "loss": 0.93, + "step": 23780 + }, + { + "epoch": 0.34, + "grad_norm": 0.53515625, + "learning_rate": 0.00016660711788212847, + "loss": 0.9787, + "step": 23785 + }, + { + "epoch": 0.34, + "grad_norm": 0.640625, + "learning_rate": 0.0001665884415878179, + "loss": 0.9356, + "step": 23790 + }, + { + "epoch": 0.34, + "grad_norm": 0.546875, + "learning_rate": 0.0001665697611196679, + "loss": 0.9909, + "step": 23795 + }, + { + "epoch": 0.34, + "grad_norm": 0.58203125, + "learning_rate": 0.00016655107647884946, + "loss": 0.9665, + "step": 23800 + }, + { + "epoch": 0.34, + "grad_norm": 0.5625, + "learning_rate": 0.0001665323876665338, + "loss": 0.774, + "step": 23805 + }, + { + "epoch": 0.34, + "grad_norm": 0.55859375, + "learning_rate": 0.00016651369468389228, + "loss": 1.0398, + "step": 23810 + }, + { + "epoch": 0.34, + "grad_norm": 0.68359375, + "learning_rate": 0.00016649499753209666, + "loss": 1.0225, + "step": 23815 + }, + { + "epoch": 0.34, + "grad_norm": 0.56640625, + "learning_rate": 0.00016647629621231882, + "loss": 0.8501, + "step": 23820 + }, + { + "epoch": 0.34, + "grad_norm": 0.53125, + "learning_rate": 0.00016645759072573104, + "loss": 1.0198, + "step": 23825 + }, + { + "epoch": 0.34, + "grad_norm": 0.55859375, + "learning_rate": 0.00016643888107350577, + "loss": 0.9066, + "step": 23830 + }, + { + "epoch": 0.34, + "grad_norm": 0.52734375, + "learning_rate": 0.0001664201672568158, + "loss": 0.9364, + "step": 23835 + }, + { + "epoch": 0.34, + "grad_norm": 0.5390625, + "learning_rate": 0.00016640144927683407, + "loss": 0.8964, + "step": 23840 + }, + { + "epoch": 0.34, + "grad_norm": 0.515625, + "learning_rate": 0.00016638272713473387, + "loss": 0.9231, + "step": 23845 + }, + { + "epoch": 0.34, + "grad_norm": 0.59375, + "learning_rate": 0.00016636400083168878, + "loss": 1.0691, + "step": 23850 + }, + { + "epoch": 0.34, + "grad_norm": 0.5234375, + "learning_rate": 0.00016634527036887245, + "loss": 0.8508, + "step": 23855 + }, + { + "epoch": 0.34, + "grad_norm": 0.55078125, + "learning_rate": 0.0001663265357474591, + "loss": 0.9175, + "step": 23860 + }, + { + "epoch": 0.34, + "grad_norm": 0.54296875, + "learning_rate": 0.0001663077969686229, + "loss": 0.927, + "step": 23865 + }, + { + "epoch": 0.34, + "grad_norm": 0.498046875, + "learning_rate": 0.0001662890540335385, + "loss": 0.9039, + "step": 23870 + }, + { + "epoch": 0.34, + "grad_norm": 0.515625, + "learning_rate": 0.00016627030694338067, + "loss": 1.0943, + "step": 23875 + }, + { + "epoch": 0.34, + "grad_norm": 0.609375, + "learning_rate": 0.00016625155569932455, + "loss": 0.9051, + "step": 23880 + }, + { + "epoch": 0.34, + "grad_norm": 0.5234375, + "learning_rate": 0.00016623280030254542, + "loss": 0.9562, + "step": 23885 + }, + { + "epoch": 0.34, + "grad_norm": 0.55078125, + "learning_rate": 0.000166214040754219, + "loss": 0.9317, + "step": 23890 + }, + { + "epoch": 0.34, + "grad_norm": 0.53125, + "learning_rate": 0.00016619527705552103, + "loss": 0.8951, + "step": 23895 + }, + { + "epoch": 0.34, + "grad_norm": 0.58984375, + "learning_rate": 0.00016617650920762773, + "loss": 0.8851, + "step": 23900 + }, + { + "epoch": 0.34, + "grad_norm": 0.5703125, + "learning_rate": 0.00016615773721171545, + "loss": 1.0109, + "step": 23905 + }, + { + "epoch": 0.34, + "grad_norm": 0.58984375, + "learning_rate": 0.00016613896106896085, + "loss": 1.1413, + "step": 23910 + }, + { + "epoch": 0.34, + "grad_norm": 0.6796875, + "learning_rate": 0.0001661201807805409, + "loss": 1.0307, + "step": 23915 + }, + { + "epoch": 0.34, + "grad_norm": 0.66796875, + "learning_rate": 0.00016610139634763265, + "loss": 0.9929, + "step": 23920 + }, + { + "epoch": 0.34, + "grad_norm": 0.51953125, + "learning_rate": 0.00016608260777141361, + "loss": 0.8565, + "step": 23925 + }, + { + "epoch": 0.34, + "grad_norm": 0.58203125, + "learning_rate": 0.00016606381505306149, + "loss": 1.0399, + "step": 23930 + }, + { + "epoch": 0.34, + "grad_norm": 0.609375, + "learning_rate": 0.00016604501819375415, + "loss": 1.004, + "step": 23935 + }, + { + "epoch": 0.34, + "grad_norm": 0.546875, + "learning_rate": 0.00016602621719466988, + "loss": 0.9961, + "step": 23940 + }, + { + "epoch": 0.34, + "grad_norm": 0.59765625, + "learning_rate": 0.00016600741205698714, + "loss": 0.9562, + "step": 23945 + }, + { + "epoch": 0.34, + "grad_norm": 0.515625, + "learning_rate": 0.00016598860278188457, + "loss": 0.9537, + "step": 23950 + }, + { + "epoch": 0.34, + "grad_norm": 0.58203125, + "learning_rate": 0.00016596978937054129, + "loss": 1.1004, + "step": 23955 + }, + { + "epoch": 0.34, + "grad_norm": 0.5234375, + "learning_rate": 0.00016595097182413643, + "loss": 0.9118, + "step": 23960 + }, + { + "epoch": 0.34, + "grad_norm": 0.546875, + "learning_rate": 0.00016593215014384957, + "loss": 0.9165, + "step": 23965 + }, + { + "epoch": 0.34, + "grad_norm": 0.54296875, + "learning_rate": 0.00016591332433086044, + "loss": 1.1373, + "step": 23970 + }, + { + "epoch": 0.34, + "grad_norm": 0.494140625, + "learning_rate": 0.0001658944943863491, + "loss": 0.9159, + "step": 23975 + }, + { + "epoch": 0.34, + "grad_norm": 0.59375, + "learning_rate": 0.00016587566031149576, + "loss": 0.8789, + "step": 23980 + }, + { + "epoch": 0.34, + "grad_norm": 0.5859375, + "learning_rate": 0.00016585682210748103, + "loss": 1.0135, + "step": 23985 + }, + { + "epoch": 0.34, + "grad_norm": 0.59375, + "learning_rate": 0.0001658379797754857, + "loss": 0.864, + "step": 23990 + }, + { + "epoch": 0.34, + "grad_norm": 0.6015625, + "learning_rate": 0.0001658191333166908, + "loss": 1.0405, + "step": 23995 + }, + { + "epoch": 0.34, + "grad_norm": 0.5625, + "learning_rate": 0.00016580028273227763, + "loss": 0.909, + "step": 24000 + }, + { + "epoch": 0.34, + "grad_norm": 0.59375, + "learning_rate": 0.0001657814280234278, + "loss": 0.8624, + "step": 24005 + }, + { + "epoch": 0.34, + "grad_norm": 0.5859375, + "learning_rate": 0.00016576256919132321, + "loss": 0.9113, + "step": 24010 + }, + { + "epoch": 0.34, + "grad_norm": 0.59375, + "learning_rate": 0.00016574370623714582, + "loss": 0.9907, + "step": 24015 + }, + { + "epoch": 0.34, + "grad_norm": 0.5546875, + "learning_rate": 0.00016572483916207808, + "loss": 0.9848, + "step": 24020 + }, + { + "epoch": 0.34, + "grad_norm": 0.69921875, + "learning_rate": 0.00016570596796730257, + "loss": 0.9872, + "step": 24025 + }, + { + "epoch": 0.34, + "grad_norm": 0.5625, + "learning_rate": 0.00016568709265400212, + "loss": 0.9887, + "step": 24030 + }, + { + "epoch": 0.34, + "grad_norm": 0.6171875, + "learning_rate": 0.00016566821322335992, + "loss": 0.9673, + "step": 24035 + }, + { + "epoch": 0.34, + "grad_norm": 0.609375, + "learning_rate": 0.00016564932967655933, + "loss": 0.9441, + "step": 24040 + }, + { + "epoch": 0.34, + "grad_norm": 0.65234375, + "learning_rate": 0.00016563044201478396, + "loss": 1.0899, + "step": 24045 + }, + { + "epoch": 0.34, + "grad_norm": 0.50390625, + "learning_rate": 0.0001656115502392178, + "loss": 0.8546, + "step": 24050 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.00016559265435104486, + "loss": 0.8485, + "step": 24055 + }, + { + "epoch": 0.35, + "grad_norm": 0.5859375, + "learning_rate": 0.00016557375435144968, + "loss": 0.9084, + "step": 24060 + }, + { + "epoch": 0.35, + "grad_norm": 0.5703125, + "learning_rate": 0.00016555485024161693, + "loss": 0.8803, + "step": 24065 + }, + { + "epoch": 0.35, + "grad_norm": 0.6328125, + "learning_rate": 0.00016553594202273146, + "loss": 0.949, + "step": 24070 + }, + { + "epoch": 0.35, + "grad_norm": 0.59765625, + "learning_rate": 0.00016551702969597854, + "loss": 1.1374, + "step": 24075 + }, + { + "epoch": 0.35, + "grad_norm": 0.51171875, + "learning_rate": 0.00016549811326254353, + "loss": 1.0132, + "step": 24080 + }, + { + "epoch": 0.35, + "grad_norm": 0.546875, + "learning_rate": 0.00016547919272361222, + "loss": 0.9281, + "step": 24085 + }, + { + "epoch": 0.35, + "grad_norm": 0.5234375, + "learning_rate": 0.00016546026808037054, + "loss": 0.8351, + "step": 24090 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.0001654413393340047, + "loss": 0.9989, + "step": 24095 + }, + { + "epoch": 0.35, + "grad_norm": 0.53515625, + "learning_rate": 0.00016542240648570114, + "loss": 0.9545, + "step": 24100 + }, + { + "epoch": 0.35, + "grad_norm": 0.51171875, + "learning_rate": 0.00016540346953664668, + "loss": 0.9323, + "step": 24105 + }, + { + "epoch": 0.35, + "grad_norm": 0.5859375, + "learning_rate": 0.00016538452848802824, + "loss": 1.0501, + "step": 24110 + }, + { + "epoch": 0.35, + "grad_norm": 0.5625, + "learning_rate": 0.0001653655833410331, + "loss": 0.9696, + "step": 24115 + }, + { + "epoch": 0.35, + "grad_norm": 0.609375, + "learning_rate": 0.00016534663409684876, + "loss": 0.9939, + "step": 24120 + }, + { + "epoch": 0.35, + "grad_norm": 0.5625, + "learning_rate": 0.00016532768075666295, + "loss": 1.0044, + "step": 24125 + }, + { + "epoch": 0.35, + "grad_norm": 0.5390625, + "learning_rate": 0.0001653087233216637, + "loss": 0.9425, + "step": 24130 + }, + { + "epoch": 0.35, + "grad_norm": 0.60546875, + "learning_rate": 0.00016528976179303932, + "loss": 0.9097, + "step": 24135 + }, + { + "epoch": 0.35, + "grad_norm": 0.66015625, + "learning_rate": 0.0001652707961719783, + "loss": 0.927, + "step": 24140 + }, + { + "epoch": 0.35, + "grad_norm": 0.63671875, + "learning_rate": 0.0001652518264596694, + "loss": 1.0928, + "step": 24145 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.00016523285265730177, + "loss": 1.136, + "step": 24150 + }, + { + "epoch": 0.35, + "grad_norm": 0.640625, + "learning_rate": 0.00016521387476606462, + "loss": 1.1036, + "step": 24155 + }, + { + "epoch": 0.35, + "grad_norm": 0.515625, + "learning_rate": 0.0001651948927871475, + "loss": 0.9845, + "step": 24160 + }, + { + "epoch": 0.35, + "grad_norm": 0.51171875, + "learning_rate": 0.00016517590672174027, + "loss": 0.8934, + "step": 24165 + }, + { + "epoch": 0.35, + "grad_norm": 0.51171875, + "learning_rate": 0.00016515691657103298, + "loss": 0.9262, + "step": 24170 + }, + { + "epoch": 0.35, + "grad_norm": 0.56640625, + "learning_rate": 0.00016513792233621594, + "loss": 0.9136, + "step": 24175 + }, + { + "epoch": 0.35, + "grad_norm": 0.53125, + "learning_rate": 0.00016511892401847975, + "loss": 0.9593, + "step": 24180 + }, + { + "epoch": 0.35, + "grad_norm": 0.5546875, + "learning_rate": 0.00016509992161901525, + "loss": 0.9288, + "step": 24185 + }, + { + "epoch": 0.35, + "grad_norm": 0.83203125, + "learning_rate": 0.0001650809151390135, + "loss": 0.9901, + "step": 24190 + }, + { + "epoch": 0.35, + "grad_norm": 0.5546875, + "learning_rate": 0.00016506190457966585, + "loss": 0.9776, + "step": 24195 + }, + { + "epoch": 0.35, + "grad_norm": 0.66796875, + "learning_rate": 0.00016504288994216397, + "loss": 0.9804, + "step": 24200 + }, + { + "epoch": 0.35, + "grad_norm": 0.5390625, + "learning_rate": 0.00016502387122769964, + "loss": 0.8687, + "step": 24205 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.000165004848437465, + "loss": 0.9509, + "step": 24210 + }, + { + "epoch": 0.35, + "grad_norm": 0.51171875, + "learning_rate": 0.00016498582157265245, + "loss": 0.9531, + "step": 24215 + }, + { + "epoch": 0.35, + "grad_norm": 0.5390625, + "learning_rate": 0.00016496679063445456, + "loss": 0.9361, + "step": 24220 + }, + { + "epoch": 0.35, + "grad_norm": 0.51953125, + "learning_rate": 0.00016494775562406425, + "loss": 0.841, + "step": 24225 + }, + { + "epoch": 0.35, + "grad_norm": 0.69140625, + "learning_rate": 0.0001649287165426747, + "loss": 1.1791, + "step": 24230 + }, + { + "epoch": 0.35, + "grad_norm": 0.578125, + "learning_rate": 0.0001649096733914792, + "loss": 0.9415, + "step": 24235 + }, + { + "epoch": 0.35, + "grad_norm": 0.5234375, + "learning_rate": 0.00016489062617167145, + "loss": 0.9468, + "step": 24240 + }, + { + "epoch": 0.35, + "grad_norm": 0.51171875, + "learning_rate": 0.00016487157488444536, + "loss": 0.9885, + "step": 24245 + }, + { + "epoch": 0.35, + "grad_norm": 0.68359375, + "learning_rate": 0.00016485251953099505, + "loss": 0.95, + "step": 24250 + }, + { + "epoch": 0.35, + "grad_norm": 0.69140625, + "learning_rate": 0.00016483346011251498, + "loss": 1.0973, + "step": 24255 + }, + { + "epoch": 0.35, + "grad_norm": 0.61328125, + "learning_rate": 0.0001648143966301998, + "loss": 0.9447, + "step": 24260 + }, + { + "epoch": 0.35, + "grad_norm": 0.5703125, + "learning_rate": 0.00016479532908524438, + "loss": 0.9326, + "step": 24265 + }, + { + "epoch": 0.35, + "grad_norm": 0.6171875, + "learning_rate": 0.000164776257478844, + "loss": 0.909, + "step": 24270 + }, + { + "epoch": 0.35, + "grad_norm": 0.625, + "learning_rate": 0.00016475718181219398, + "loss": 0.985, + "step": 24275 + }, + { + "epoch": 0.35, + "grad_norm": 0.546875, + "learning_rate": 0.00016473810208649003, + "loss": 0.8886, + "step": 24280 + }, + { + "epoch": 0.35, + "grad_norm": 0.578125, + "learning_rate": 0.00016471901830292815, + "loss": 1.0953, + "step": 24285 + }, + { + "epoch": 0.35, + "grad_norm": 0.61328125, + "learning_rate": 0.0001646999304627045, + "loss": 0.9343, + "step": 24290 + }, + { + "epoch": 0.35, + "grad_norm": 0.57421875, + "learning_rate": 0.0001646808385670155, + "loss": 0.9034, + "step": 24295 + }, + { + "epoch": 0.35, + "grad_norm": 0.625, + "learning_rate": 0.00016466174261705785, + "loss": 0.9041, + "step": 24300 + }, + { + "epoch": 0.35, + "grad_norm": 0.609375, + "learning_rate": 0.00016464264261402859, + "loss": 0.8977, + "step": 24305 + }, + { + "epoch": 0.35, + "grad_norm": 0.55078125, + "learning_rate": 0.0001646235385591248, + "loss": 1.0345, + "step": 24310 + }, + { + "epoch": 0.35, + "grad_norm": 0.494140625, + "learning_rate": 0.0001646044304535441, + "loss": 1.0311, + "step": 24315 + }, + { + "epoch": 0.35, + "grad_norm": 0.52734375, + "learning_rate": 0.00016458531829848406, + "loss": 1.018, + "step": 24320 + }, + { + "epoch": 0.35, + "grad_norm": 0.69921875, + "learning_rate": 0.0001645662020951427, + "loss": 0.8999, + "step": 24325 + }, + { + "epoch": 0.35, + "grad_norm": 0.59765625, + "learning_rate": 0.0001645470818447183, + "loss": 1.0338, + "step": 24330 + }, + { + "epoch": 0.35, + "grad_norm": 0.69921875, + "learning_rate": 0.00016452795754840928, + "loss": 0.9788, + "step": 24335 + }, + { + "epoch": 0.35, + "grad_norm": 0.56640625, + "learning_rate": 0.0001645088292074144, + "loss": 0.9003, + "step": 24340 + }, + { + "epoch": 0.35, + "grad_norm": 0.59765625, + "learning_rate": 0.0001644896968229326, + "loss": 0.9124, + "step": 24345 + }, + { + "epoch": 0.35, + "grad_norm": 0.5390625, + "learning_rate": 0.0001644705603961632, + "loss": 0.9349, + "step": 24350 + }, + { + "epoch": 0.35, + "grad_norm": 0.56640625, + "learning_rate": 0.00016445141992830562, + "loss": 0.9792, + "step": 24355 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.00016443227542055965, + "loss": 1.0906, + "step": 24360 + }, + { + "epoch": 0.35, + "grad_norm": 0.59375, + "learning_rate": 0.00016441312687412527, + "loss": 0.8854, + "step": 24365 + }, + { + "epoch": 0.35, + "grad_norm": 0.63671875, + "learning_rate": 0.00016439397429020272, + "loss": 1.1025, + "step": 24370 + }, + { + "epoch": 0.35, + "grad_norm": 0.52734375, + "learning_rate": 0.00016437481766999254, + "loss": 0.9103, + "step": 24375 + }, + { + "epoch": 0.35, + "grad_norm": 0.59765625, + "learning_rate": 0.00016435565701469548, + "loss": 1.1122, + "step": 24380 + }, + { + "epoch": 0.35, + "grad_norm": 0.5703125, + "learning_rate": 0.00016433649232551253, + "loss": 0.9879, + "step": 24385 + }, + { + "epoch": 0.35, + "grad_norm": 0.52734375, + "learning_rate": 0.00016431732360364497, + "loss": 0.9769, + "step": 24390 + }, + { + "epoch": 0.35, + "grad_norm": 0.52734375, + "learning_rate": 0.0001642981508502943, + "loss": 0.8533, + "step": 24395 + }, + { + "epoch": 0.35, + "grad_norm": 0.55859375, + "learning_rate": 0.00016427897406666233, + "loss": 1.0708, + "step": 24400 + }, + { + "epoch": 0.35, + "grad_norm": 0.5078125, + "learning_rate": 0.00016425979325395104, + "loss": 0.8953, + "step": 24405 + }, + { + "epoch": 0.35, + "grad_norm": 0.55859375, + "learning_rate": 0.00016424060841336275, + "loss": 0.9551, + "step": 24410 + }, + { + "epoch": 0.35, + "grad_norm": 0.56640625, + "learning_rate": 0.00016422141954609994, + "loss": 1.033, + "step": 24415 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.0001642022266533654, + "loss": 1.1218, + "step": 24420 + }, + { + "epoch": 0.35, + "grad_norm": 0.6015625, + "learning_rate": 0.00016418302973636223, + "loss": 0.8115, + "step": 24425 + }, + { + "epoch": 0.35, + "grad_norm": 0.65625, + "learning_rate": 0.0001641638287962936, + "loss": 1.0355, + "step": 24430 + }, + { + "epoch": 0.35, + "grad_norm": 0.5234375, + "learning_rate": 0.00016414462383436312, + "loss": 0.9497, + "step": 24435 + }, + { + "epoch": 0.35, + "grad_norm": 0.59765625, + "learning_rate": 0.00016412541485177456, + "loss": 0.9697, + "step": 24440 + }, + { + "epoch": 0.35, + "grad_norm": 0.703125, + "learning_rate": 0.00016410620184973196, + "loss": 1.0461, + "step": 24445 + }, + { + "epoch": 0.35, + "grad_norm": 0.6015625, + "learning_rate": 0.00016408698482943962, + "loss": 0.9266, + "step": 24450 + }, + { + "epoch": 0.35, + "grad_norm": 0.62890625, + "learning_rate": 0.0001640677637921021, + "loss": 0.9798, + "step": 24455 + }, + { + "epoch": 0.35, + "grad_norm": 0.56640625, + "learning_rate": 0.00016404853873892416, + "loss": 0.9772, + "step": 24460 + }, + { + "epoch": 0.35, + "grad_norm": 0.57421875, + "learning_rate": 0.00016402930967111088, + "loss": 0.8672, + "step": 24465 + }, + { + "epoch": 0.35, + "grad_norm": 0.62890625, + "learning_rate": 0.00016401007658986753, + "loss": 1.0654, + "step": 24470 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.00016399083949639968, + "loss": 0.9599, + "step": 24475 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.00016397159839191315, + "loss": 1.083, + "step": 24480 + }, + { + "epoch": 0.35, + "grad_norm": 0.5546875, + "learning_rate": 0.00016395235327761395, + "loss": 0.9926, + "step": 24485 + }, + { + "epoch": 0.35, + "grad_norm": 0.59765625, + "learning_rate": 0.00016393310415470844, + "loss": 1.0446, + "step": 24490 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.00016391385102440314, + "loss": 1.0361, + "step": 24495 + }, + { + "epoch": 0.35, + "grad_norm": 0.578125, + "learning_rate": 0.00016389459388790485, + "loss": 1.0226, + "step": 24500 + }, + { + "epoch": 0.35, + "grad_norm": 0.62109375, + "learning_rate": 0.00016387533274642065, + "loss": 0.9835, + "step": 24505 + }, + { + "epoch": 0.35, + "grad_norm": 0.60546875, + "learning_rate": 0.00016385606760115784, + "loss": 1.0376, + "step": 24510 + }, + { + "epoch": 0.35, + "grad_norm": 0.56640625, + "learning_rate": 0.00016383679845332401, + "loss": 0.9766, + "step": 24515 + }, + { + "epoch": 0.35, + "grad_norm": 0.546875, + "learning_rate": 0.00016381752530412693, + "loss": 0.9625, + "step": 24520 + }, + { + "epoch": 0.35, + "grad_norm": 0.5859375, + "learning_rate": 0.00016379824815477466, + "loss": 1.0135, + "step": 24525 + }, + { + "epoch": 0.35, + "grad_norm": 0.671875, + "learning_rate": 0.0001637789670064756, + "loss": 0.905, + "step": 24530 + }, + { + "epoch": 0.35, + "grad_norm": 0.53125, + "learning_rate": 0.0001637596818604382, + "loss": 0.963, + "step": 24535 + }, + { + "epoch": 0.35, + "grad_norm": 0.56640625, + "learning_rate": 0.00016374039271787133, + "loss": 0.9131, + "step": 24540 + }, + { + "epoch": 0.35, + "grad_norm": 0.75, + "learning_rate": 0.00016372109957998404, + "loss": 0.8397, + "step": 24545 + }, + { + "epoch": 0.35, + "grad_norm": 0.52734375, + "learning_rate": 0.00016370180244798567, + "loss": 0.7781, + "step": 24550 + }, + { + "epoch": 0.35, + "grad_norm": 0.5546875, + "learning_rate": 0.00016368250132308578, + "loss": 0.9317, + "step": 24555 + }, + { + "epoch": 0.35, + "grad_norm": 0.5234375, + "learning_rate": 0.0001636631962064942, + "loss": 0.9688, + "step": 24560 + }, + { + "epoch": 0.35, + "grad_norm": 0.91015625, + "learning_rate": 0.00016364388709942093, + "loss": 1.1305, + "step": 24565 + }, + { + "epoch": 0.35, + "grad_norm": 0.52734375, + "learning_rate": 0.00016362457400307637, + "loss": 0.9365, + "step": 24570 + }, + { + "epoch": 0.35, + "grad_norm": 0.58984375, + "learning_rate": 0.000163605256918671, + "loss": 1.0317, + "step": 24575 + }, + { + "epoch": 0.35, + "grad_norm": 1.0703125, + "learning_rate": 0.00016358593584741576, + "loss": 1.0129, + "step": 24580 + }, + { + "epoch": 0.35, + "grad_norm": 0.52734375, + "learning_rate": 0.00016356661079052157, + "loss": 0.882, + "step": 24585 + }, + { + "epoch": 0.35, + "grad_norm": 0.5625, + "learning_rate": 0.00016354728174919984, + "loss": 0.9529, + "step": 24590 + }, + { + "epoch": 0.35, + "grad_norm": 0.53515625, + "learning_rate": 0.00016352794872466215, + "loss": 0.9644, + "step": 24595 + }, + { + "epoch": 0.35, + "grad_norm": 0.6015625, + "learning_rate": 0.00016350861171812023, + "loss": 0.8416, + "step": 24600 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.00016348927073078624, + "loss": 0.9917, + "step": 24605 + }, + { + "epoch": 0.35, + "grad_norm": 0.57421875, + "learning_rate": 0.00016346992576387242, + "loss": 1.1237, + "step": 24610 + }, + { + "epoch": 0.35, + "grad_norm": 0.578125, + "learning_rate": 0.0001634505768185914, + "loss": 0.8601, + "step": 24615 + }, + { + "epoch": 0.35, + "grad_norm": 0.6328125, + "learning_rate": 0.00016343122389615594, + "loss": 1.0272, + "step": 24620 + }, + { + "epoch": 0.35, + "grad_norm": 0.5859375, + "learning_rate": 0.00016341186699777912, + "loss": 0.9921, + "step": 24625 + }, + { + "epoch": 0.35, + "grad_norm": 0.578125, + "learning_rate": 0.00016339250612467426, + "loss": 0.7782, + "step": 24630 + }, + { + "epoch": 0.35, + "grad_norm": 0.462890625, + "learning_rate": 0.00016337314127805495, + "loss": 0.9831, + "step": 24635 + }, + { + "epoch": 0.35, + "grad_norm": 0.53125, + "learning_rate": 0.0001633537724591349, + "loss": 0.8844, + "step": 24640 + }, + { + "epoch": 0.35, + "grad_norm": 0.6875, + "learning_rate": 0.00016333439966912828, + "loss": 0.9384, + "step": 24645 + }, + { + "epoch": 0.35, + "grad_norm": 0.55859375, + "learning_rate": 0.00016331502290924937, + "loss": 0.8616, + "step": 24650 + }, + { + "epoch": 0.35, + "grad_norm": 0.51953125, + "learning_rate": 0.00016329564218071273, + "loss": 0.7969, + "step": 24655 + }, + { + "epoch": 0.35, + "grad_norm": 0.56640625, + "learning_rate": 0.0001632762574847331, + "loss": 1.045, + "step": 24660 + }, + { + "epoch": 0.35, + "grad_norm": 0.53515625, + "learning_rate": 0.0001632568688225256, + "loss": 1.0361, + "step": 24665 + }, + { + "epoch": 0.35, + "grad_norm": 0.69921875, + "learning_rate": 0.00016323747619530554, + "loss": 0.9363, + "step": 24670 + }, + { + "epoch": 0.35, + "grad_norm": 0.609375, + "learning_rate": 0.00016321807960428843, + "loss": 0.9388, + "step": 24675 + }, + { + "epoch": 0.35, + "grad_norm": 0.5859375, + "learning_rate": 0.00016319867905069009, + "loss": 1.0529, + "step": 24680 + }, + { + "epoch": 0.35, + "grad_norm": 0.578125, + "learning_rate": 0.00016317927453572657, + "loss": 0.8853, + "step": 24685 + }, + { + "epoch": 0.35, + "grad_norm": 0.57421875, + "learning_rate": 0.00016315986606061416, + "loss": 0.9914, + "step": 24690 + }, + { + "epoch": 0.35, + "grad_norm": 0.53515625, + "learning_rate": 0.00016314045362656945, + "loss": 1.0792, + "step": 24695 + }, + { + "epoch": 0.35, + "grad_norm": 0.62890625, + "learning_rate": 0.0001631210372348092, + "loss": 0.9683, + "step": 24700 + }, + { + "epoch": 0.35, + "grad_norm": 0.56640625, + "learning_rate": 0.00016310161688655036, + "loss": 1.0693, + "step": 24705 + }, + { + "epoch": 0.35, + "grad_norm": 0.66796875, + "learning_rate": 0.00016308219258301038, + "loss": 1.0178, + "step": 24710 + }, + { + "epoch": 0.35, + "grad_norm": 0.5234375, + "learning_rate": 0.0001630627643254067, + "loss": 1.0458, + "step": 24715 + }, + { + "epoch": 0.35, + "grad_norm": 0.59765625, + "learning_rate": 0.00016304333211495715, + "loss": 0.9108, + "step": 24720 + }, + { + "epoch": 0.35, + "grad_norm": 0.54296875, + "learning_rate": 0.00016302389595287975, + "loss": 0.8966, + "step": 24725 + }, + { + "epoch": 0.35, + "grad_norm": 0.498046875, + "learning_rate": 0.00016300445584039274, + "loss": 0.8942, + "step": 24730 + }, + { + "epoch": 0.35, + "grad_norm": 0.58203125, + "learning_rate": 0.0001629850117787147, + "loss": 0.8664, + "step": 24735 + }, + { + "epoch": 0.35, + "grad_norm": 0.67578125, + "learning_rate": 0.0001629655637690644, + "loss": 0.9213, + "step": 24740 + }, + { + "epoch": 0.35, + "grad_norm": 0.515625, + "learning_rate": 0.00016294611181266082, + "loss": 0.9186, + "step": 24745 + }, + { + "epoch": 0.36, + "grad_norm": 0.61328125, + "learning_rate": 0.00016292665591072328, + "loss": 0.8974, + "step": 24750 + }, + { + "epoch": 0.36, + "grad_norm": 0.67578125, + "learning_rate": 0.0001629071960644713, + "loss": 0.9719, + "step": 24755 + }, + { + "epoch": 0.36, + "grad_norm": 0.5, + "learning_rate": 0.00016288773227512459, + "loss": 0.9197, + "step": 24760 + }, + { + "epoch": 0.36, + "grad_norm": 0.59765625, + "learning_rate": 0.0001628682645439032, + "loss": 0.9864, + "step": 24765 + }, + { + "epoch": 0.36, + "grad_norm": 0.5546875, + "learning_rate": 0.0001628487928720274, + "loss": 0.9193, + "step": 24770 + }, + { + "epoch": 0.36, + "grad_norm": 0.5703125, + "learning_rate": 0.0001628293172607177, + "loss": 0.8703, + "step": 24775 + }, + { + "epoch": 0.36, + "grad_norm": 0.474609375, + "learning_rate": 0.0001628098377111948, + "loss": 1.0337, + "step": 24780 + }, + { + "epoch": 0.36, + "grad_norm": 0.65234375, + "learning_rate": 0.00016279035422467976, + "loss": 0.8703, + "step": 24785 + }, + { + "epoch": 0.36, + "grad_norm": 0.61328125, + "learning_rate": 0.00016277086680239382, + "loss": 0.8611, + "step": 24790 + }, + { + "epoch": 0.36, + "grad_norm": 0.51171875, + "learning_rate": 0.00016275137544555842, + "loss": 1.1237, + "step": 24795 + }, + { + "epoch": 0.36, + "grad_norm": 0.63671875, + "learning_rate": 0.00016273188015539537, + "loss": 1.1163, + "step": 24800 + }, + { + "epoch": 0.36, + "grad_norm": 0.53515625, + "learning_rate": 0.00016271238093312662, + "loss": 0.9018, + "step": 24805 + }, + { + "epoch": 0.36, + "grad_norm": 0.55078125, + "learning_rate": 0.00016269287777997442, + "loss": 0.9558, + "step": 24810 + }, + { + "epoch": 0.36, + "grad_norm": 0.640625, + "learning_rate": 0.00016267337069716123, + "loss": 0.926, + "step": 24815 + }, + { + "epoch": 0.36, + "grad_norm": 0.5703125, + "learning_rate": 0.00016265385968590977, + "loss": 1.006, + "step": 24820 + }, + { + "epoch": 0.36, + "grad_norm": 0.57421875, + "learning_rate": 0.00016263434474744304, + "loss": 0.9435, + "step": 24825 + }, + { + "epoch": 0.36, + "grad_norm": 0.58984375, + "learning_rate": 0.00016261482588298426, + "loss": 0.9433, + "step": 24830 + }, + { + "epoch": 0.36, + "grad_norm": 0.57421875, + "learning_rate": 0.00016259530309375685, + "loss": 0.952, + "step": 24835 + }, + { + "epoch": 0.36, + "grad_norm": 0.5078125, + "learning_rate": 0.00016257577638098457, + "loss": 0.9448, + "step": 24840 + }, + { + "epoch": 0.36, + "grad_norm": 0.53125, + "learning_rate": 0.00016255624574589136, + "loss": 0.9726, + "step": 24845 + }, + { + "epoch": 0.36, + "grad_norm": 0.546875, + "learning_rate": 0.0001625367111897014, + "loss": 0.9088, + "step": 24850 + }, + { + "epoch": 0.36, + "grad_norm": 0.65625, + "learning_rate": 0.0001625171727136392, + "loss": 0.8722, + "step": 24855 + }, + { + "epoch": 0.36, + "grad_norm": 0.69921875, + "learning_rate": 0.0001624976303189294, + "loss": 1.1047, + "step": 24860 + }, + { + "epoch": 0.36, + "grad_norm": 0.61328125, + "learning_rate": 0.00016247808400679693, + "loss": 1.0619, + "step": 24865 + }, + { + "epoch": 0.36, + "grad_norm": 0.52734375, + "learning_rate": 0.00016245853377846702, + "loss": 1.0366, + "step": 24870 + }, + { + "epoch": 0.36, + "grad_norm": 0.59375, + "learning_rate": 0.00016243897963516508, + "loss": 1.0427, + "step": 24875 + }, + { + "epoch": 0.36, + "grad_norm": 0.5, + "learning_rate": 0.00016241942157811678, + "loss": 1.0127, + "step": 24880 + }, + { + "epoch": 0.36, + "grad_norm": 0.578125, + "learning_rate": 0.00016239985960854805, + "loss": 0.9216, + "step": 24885 + }, + { + "epoch": 0.36, + "grad_norm": 0.55859375, + "learning_rate": 0.00016238029372768505, + "loss": 0.8924, + "step": 24890 + }, + { + "epoch": 0.36, + "grad_norm": 0.5625, + "learning_rate": 0.00016236072393675417, + "loss": 0.9695, + "step": 24895 + }, + { + "epoch": 0.36, + "grad_norm": 0.5703125, + "learning_rate": 0.0001623411502369821, + "loss": 0.9501, + "step": 24900 + }, + { + "epoch": 0.36, + "grad_norm": 0.6015625, + "learning_rate": 0.00016232157262959573, + "loss": 1.0881, + "step": 24905 + }, + { + "epoch": 0.36, + "grad_norm": 0.7265625, + "learning_rate": 0.00016230199111582226, + "loss": 0.9827, + "step": 24910 + }, + { + "epoch": 0.36, + "grad_norm": 0.55859375, + "learning_rate": 0.00016228240569688898, + "loss": 0.9261, + "step": 24915 + }, + { + "epoch": 0.36, + "grad_norm": 0.55078125, + "learning_rate": 0.0001622628163740236, + "loss": 0.9561, + "step": 24920 + }, + { + "epoch": 0.36, + "grad_norm": 0.578125, + "learning_rate": 0.00016224322314845394, + "loss": 0.979, + "step": 24925 + }, + { + "epoch": 0.36, + "grad_norm": 0.59765625, + "learning_rate": 0.00016222362602140818, + "loss": 1.0244, + "step": 24930 + }, + { + "epoch": 0.36, + "grad_norm": 0.55078125, + "learning_rate": 0.0001622040249941147, + "loss": 0.9016, + "step": 24935 + }, + { + "epoch": 0.36, + "grad_norm": 0.578125, + "learning_rate": 0.00016218442006780208, + "loss": 0.928, + "step": 24940 + }, + { + "epoch": 0.36, + "grad_norm": 0.5859375, + "learning_rate": 0.00016216481124369918, + "loss": 1.0769, + "step": 24945 + }, + { + "epoch": 0.36, + "grad_norm": 0.5859375, + "learning_rate": 0.0001621451985230351, + "loss": 1.0409, + "step": 24950 + }, + { + "epoch": 0.36, + "grad_norm": 0.65234375, + "learning_rate": 0.00016212558190703923, + "loss": 0.987, + "step": 24955 + }, + { + "epoch": 0.36, + "grad_norm": 0.5546875, + "learning_rate": 0.00016210596139694112, + "loss": 1.0778, + "step": 24960 + }, + { + "epoch": 0.36, + "grad_norm": 0.54296875, + "learning_rate": 0.0001620863369939706, + "loss": 0.9534, + "step": 24965 + }, + { + "epoch": 0.36, + "grad_norm": 0.7421875, + "learning_rate": 0.0001620667086993578, + "loss": 1.0017, + "step": 24970 + }, + { + "epoch": 0.36, + "grad_norm": 0.52734375, + "learning_rate": 0.00016204707651433297, + "loss": 0.8229, + "step": 24975 + }, + { + "epoch": 0.36, + "grad_norm": 0.59375, + "learning_rate": 0.00016202744044012675, + "loss": 0.8823, + "step": 24980 + }, + { + "epoch": 0.36, + "grad_norm": 0.640625, + "learning_rate": 0.0001620078004779699, + "loss": 0.9721, + "step": 24985 + }, + { + "epoch": 0.36, + "grad_norm": 0.65234375, + "learning_rate": 0.0001619881566290935, + "loss": 1.0124, + "step": 24990 + }, + { + "epoch": 0.36, + "grad_norm": 0.5234375, + "learning_rate": 0.00016196850889472887, + "loss": 0.9635, + "step": 24995 + }, + { + "epoch": 0.36, + "grad_norm": 0.703125, + "learning_rate": 0.00016194885727610747, + "loss": 1.0222, + "step": 25000 + }, + { + "epoch": 0.36, + "grad_norm": 0.55078125, + "learning_rate": 0.00016192920177446118, + "loss": 0.9272, + "step": 25005 + }, + { + "epoch": 0.36, + "grad_norm": 0.57421875, + "learning_rate": 0.00016190954239102197, + "loss": 0.8514, + "step": 25010 + }, + { + "epoch": 0.36, + "grad_norm": 0.5546875, + "learning_rate": 0.00016188987912702215, + "loss": 0.9072, + "step": 25015 + }, + { + "epoch": 0.36, + "grad_norm": 0.578125, + "learning_rate": 0.00016187021198369426, + "loss": 1.005, + "step": 25020 + }, + { + "epoch": 0.36, + "grad_norm": 0.546875, + "learning_rate": 0.00016185054096227094, + "loss": 0.9324, + "step": 25025 + }, + { + "epoch": 0.36, + "grad_norm": 0.5, + "learning_rate": 0.00016183086606398533, + "loss": 0.8911, + "step": 25030 + }, + { + "epoch": 0.36, + "grad_norm": 0.61328125, + "learning_rate": 0.0001618111872900706, + "loss": 1.1515, + "step": 25035 + }, + { + "epoch": 0.36, + "grad_norm": 0.59765625, + "learning_rate": 0.00016179150464176023, + "loss": 1.0191, + "step": 25040 + }, + { + "epoch": 0.36, + "grad_norm": 0.58984375, + "learning_rate": 0.000161771818120288, + "loss": 0.8963, + "step": 25045 + }, + { + "epoch": 0.36, + "grad_norm": 0.53515625, + "learning_rate": 0.00016175212772688786, + "loss": 0.9288, + "step": 25050 + }, + { + "epoch": 0.36, + "grad_norm": 0.52734375, + "learning_rate": 0.00016173243346279402, + "loss": 0.9832, + "step": 25055 + }, + { + "epoch": 0.36, + "grad_norm": 0.5703125, + "learning_rate": 0.0001617127353292409, + "loss": 0.8714, + "step": 25060 + }, + { + "epoch": 0.36, + "grad_norm": 0.478515625, + "learning_rate": 0.0001616930333274633, + "loss": 1.0185, + "step": 25065 + }, + { + "epoch": 0.36, + "grad_norm": 0.5234375, + "learning_rate": 0.0001616733274586961, + "loss": 0.9872, + "step": 25070 + }, + { + "epoch": 0.36, + "grad_norm": 0.55078125, + "learning_rate": 0.00016165361772417448, + "loss": 0.9913, + "step": 25075 + }, + { + "epoch": 0.36, + "grad_norm": 0.5546875, + "learning_rate": 0.0001616339041251339, + "loss": 0.9422, + "step": 25080 + }, + { + "epoch": 0.36, + "grad_norm": 0.58203125, + "learning_rate": 0.00016161418666281, + "loss": 0.9239, + "step": 25085 + }, + { + "epoch": 0.36, + "grad_norm": 0.56640625, + "learning_rate": 0.0001615944653384387, + "loss": 0.8856, + "step": 25090 + }, + { + "epoch": 0.36, + "grad_norm": 0.54296875, + "learning_rate": 0.00016157474015325617, + "loss": 0.9683, + "step": 25095 + }, + { + "epoch": 0.36, + "grad_norm": 0.55078125, + "learning_rate": 0.0001615550111084988, + "loss": 0.9395, + "step": 25100 + }, + { + "epoch": 0.36, + "grad_norm": 0.60546875, + "learning_rate": 0.00016153527820540324, + "loss": 0.8642, + "step": 25105 + }, + { + "epoch": 0.36, + "grad_norm": 0.58203125, + "learning_rate": 0.00016151554144520637, + "loss": 1.0026, + "step": 25110 + }, + { + "epoch": 0.36, + "grad_norm": 0.5390625, + "learning_rate": 0.00016149580082914526, + "loss": 0.9313, + "step": 25115 + }, + { + "epoch": 0.36, + "grad_norm": 0.58203125, + "learning_rate": 0.00016147605635845734, + "loss": 1.0754, + "step": 25120 + }, + { + "epoch": 0.36, + "grad_norm": 0.6171875, + "learning_rate": 0.00016145630803438018, + "loss": 0.9872, + "step": 25125 + }, + { + "epoch": 0.36, + "grad_norm": 0.51953125, + "learning_rate": 0.00016143655585815165, + "loss": 0.9619, + "step": 25130 + }, + { + "epoch": 0.36, + "grad_norm": 0.5625, + "learning_rate": 0.00016141679983100983, + "loss": 0.965, + "step": 25135 + }, + { + "epoch": 0.36, + "grad_norm": 0.62890625, + "learning_rate": 0.00016139703995419303, + "loss": 0.9198, + "step": 25140 + }, + { + "epoch": 0.36, + "grad_norm": 0.55078125, + "learning_rate": 0.00016137727622893988, + "loss": 1.0207, + "step": 25145 + }, + { + "epoch": 0.36, + "grad_norm": 0.52734375, + "learning_rate": 0.00016135750865648913, + "loss": 0.8741, + "step": 25150 + }, + { + "epoch": 0.36, + "grad_norm": 0.69140625, + "learning_rate": 0.00016133773723807986, + "loss": 0.9791, + "step": 25155 + }, + { + "epoch": 0.36, + "grad_norm": 0.609375, + "learning_rate": 0.00016131796197495134, + "loss": 0.8629, + "step": 25160 + }, + { + "epoch": 0.36, + "grad_norm": 0.58203125, + "learning_rate": 0.00016129818286834315, + "loss": 0.875, + "step": 25165 + }, + { + "epoch": 0.36, + "grad_norm": 0.546875, + "learning_rate": 0.00016127839991949503, + "loss": 0.9423, + "step": 25170 + }, + { + "epoch": 0.36, + "grad_norm": 0.5546875, + "learning_rate": 0.00016125861312964705, + "loss": 0.9443, + "step": 25175 + }, + { + "epoch": 0.36, + "grad_norm": 0.58203125, + "learning_rate": 0.00016123882250003942, + "loss": 0.8957, + "step": 25180 + }, + { + "epoch": 0.36, + "grad_norm": 0.5234375, + "learning_rate": 0.0001612190280319126, + "loss": 1.0252, + "step": 25185 + }, + { + "epoch": 0.36, + "grad_norm": 0.54296875, + "learning_rate": 0.00016119922972650743, + "loss": 0.8536, + "step": 25190 + }, + { + "epoch": 0.36, + "grad_norm": 0.6015625, + "learning_rate": 0.00016117942758506483, + "loss": 0.9057, + "step": 25195 + }, + { + "epoch": 0.36, + "grad_norm": 0.5625, + "learning_rate": 0.00016115962160882604, + "loss": 0.917, + "step": 25200 + }, + { + "epoch": 0.36, + "grad_norm": 0.546875, + "learning_rate": 0.0001611398117990325, + "loss": 0.9083, + "step": 25205 + }, + { + "epoch": 0.36, + "grad_norm": 0.58984375, + "learning_rate": 0.00016111999815692594, + "loss": 1.0793, + "step": 25210 + }, + { + "epoch": 0.36, + "grad_norm": 0.58984375, + "learning_rate": 0.00016110018068374825, + "loss": 0.9917, + "step": 25215 + }, + { + "epoch": 0.36, + "grad_norm": 0.52734375, + "learning_rate": 0.0001610803593807417, + "loss": 1.0187, + "step": 25220 + }, + { + "epoch": 0.36, + "grad_norm": 0.6015625, + "learning_rate": 0.0001610605342491486, + "loss": 0.8417, + "step": 25225 + }, + { + "epoch": 0.36, + "grad_norm": 0.578125, + "learning_rate": 0.00016104070529021172, + "loss": 0.9645, + "step": 25230 + }, + { + "epoch": 0.36, + "grad_norm": 0.52734375, + "learning_rate": 0.00016102087250517388, + "loss": 0.882, + "step": 25235 + }, + { + "epoch": 0.36, + "grad_norm": 0.51953125, + "learning_rate": 0.00016100103589527826, + "loss": 1.0314, + "step": 25240 + }, + { + "epoch": 0.36, + "grad_norm": 0.640625, + "learning_rate": 0.00016098119546176825, + "loss": 1.0143, + "step": 25245 + }, + { + "epoch": 0.36, + "grad_norm": 0.61328125, + "learning_rate": 0.00016096135120588744, + "loss": 0.9423, + "step": 25250 + }, + { + "epoch": 0.36, + "grad_norm": 0.60546875, + "learning_rate": 0.00016094150312887973, + "loss": 1.1706, + "step": 25255 + }, + { + "epoch": 0.36, + "grad_norm": 0.62109375, + "learning_rate": 0.0001609216512319892, + "loss": 0.9264, + "step": 25260 + }, + { + "epoch": 0.36, + "grad_norm": 0.54296875, + "learning_rate": 0.00016090179551646013, + "loss": 0.8544, + "step": 25265 + }, + { + "epoch": 0.36, + "grad_norm": 0.78515625, + "learning_rate": 0.00016088193598353724, + "loss": 0.968, + "step": 25270 + }, + { + "epoch": 0.36, + "grad_norm": 0.5625, + "learning_rate": 0.00016086207263446518, + "loss": 0.9744, + "step": 25275 + }, + { + "epoch": 0.36, + "grad_norm": 0.62890625, + "learning_rate": 0.00016084220547048916, + "loss": 1.0384, + "step": 25280 + }, + { + "epoch": 0.36, + "grad_norm": 0.6015625, + "learning_rate": 0.00016082233449285437, + "loss": 1.0272, + "step": 25285 + }, + { + "epoch": 0.36, + "grad_norm": 0.54296875, + "learning_rate": 0.00016080245970280638, + "loss": 0.9706, + "step": 25290 + }, + { + "epoch": 0.36, + "grad_norm": 0.59375, + "learning_rate": 0.000160782581101591, + "loss": 1.0, + "step": 25295 + }, + { + "epoch": 0.36, + "grad_norm": 0.53125, + "learning_rate": 0.00016076269869045418, + "loss": 0.912, + "step": 25300 + }, + { + "epoch": 0.36, + "grad_norm": 0.54296875, + "learning_rate": 0.0001607428124706422, + "loss": 0.8741, + "step": 25305 + }, + { + "epoch": 0.36, + "grad_norm": 0.62890625, + "learning_rate": 0.00016072292244340158, + "loss": 0.8362, + "step": 25310 + }, + { + "epoch": 0.36, + "grad_norm": 0.55859375, + "learning_rate": 0.00016070302860997902, + "loss": 0.8294, + "step": 25315 + }, + { + "epoch": 0.36, + "grad_norm": 0.56640625, + "learning_rate": 0.00016068313097162147, + "loss": 0.9885, + "step": 25320 + }, + { + "epoch": 0.36, + "grad_norm": 0.58203125, + "learning_rate": 0.0001606632295295762, + "loss": 0.8356, + "step": 25325 + }, + { + "epoch": 0.36, + "grad_norm": 0.6953125, + "learning_rate": 0.00016064332428509056, + "loss": 1.0694, + "step": 25330 + }, + { + "epoch": 0.36, + "grad_norm": 0.58984375, + "learning_rate": 0.00016062341523941234, + "loss": 1.0154, + "step": 25335 + }, + { + "epoch": 0.36, + "grad_norm": 0.59375, + "learning_rate": 0.00016060350239378935, + "loss": 0.9468, + "step": 25340 + }, + { + "epoch": 0.36, + "grad_norm": 0.57421875, + "learning_rate": 0.00016058358574946985, + "loss": 0.9269, + "step": 25345 + }, + { + "epoch": 0.36, + "grad_norm": 0.57421875, + "learning_rate": 0.0001605636653077022, + "loss": 1.0031, + "step": 25350 + }, + { + "epoch": 0.36, + "grad_norm": 0.609375, + "learning_rate": 0.000160543741069735, + "loss": 0.9001, + "step": 25355 + }, + { + "epoch": 0.36, + "grad_norm": 0.59765625, + "learning_rate": 0.0001605238130368172, + "loss": 0.9295, + "step": 25360 + }, + { + "epoch": 0.36, + "grad_norm": 0.5234375, + "learning_rate": 0.00016050388121019782, + "loss": 0.8978, + "step": 25365 + }, + { + "epoch": 0.36, + "grad_norm": 0.56640625, + "learning_rate": 0.00016048394559112626, + "loss": 0.847, + "step": 25370 + }, + { + "epoch": 0.36, + "grad_norm": 0.765625, + "learning_rate": 0.00016046400618085214, + "loss": 0.9054, + "step": 25375 + }, + { + "epoch": 0.36, + "grad_norm": 0.5859375, + "learning_rate": 0.00016044406298062526, + "loss": 0.9974, + "step": 25380 + }, + { + "epoch": 0.36, + "grad_norm": 0.5625, + "learning_rate": 0.00016042411599169563, + "loss": 1.1149, + "step": 25385 + }, + { + "epoch": 0.36, + "grad_norm": 0.5859375, + "learning_rate": 0.0001604041652153136, + "loss": 0.9138, + "step": 25390 + }, + { + "epoch": 0.36, + "grad_norm": 0.53125, + "learning_rate": 0.0001603842106527297, + "loss": 0.8508, + "step": 25395 + }, + { + "epoch": 0.36, + "grad_norm": 0.65625, + "learning_rate": 0.00016036425230519475, + "loss": 0.9259, + "step": 25400 + }, + { + "epoch": 0.36, + "grad_norm": 0.55078125, + "learning_rate": 0.00016034429017395966, + "loss": 0.8668, + "step": 25405 + }, + { + "epoch": 0.36, + "grad_norm": 0.640625, + "learning_rate": 0.00016032432426027577, + "loss": 1.0463, + "step": 25410 + }, + { + "epoch": 0.36, + "grad_norm": 0.6015625, + "learning_rate": 0.00016030435456539452, + "loss": 0.9397, + "step": 25415 + }, + { + "epoch": 0.36, + "grad_norm": 0.58203125, + "learning_rate": 0.00016028438109056762, + "loss": 0.9873, + "step": 25420 + }, + { + "epoch": 0.36, + "grad_norm": 0.55859375, + "learning_rate": 0.00016026440383704708, + "loss": 1.0089, + "step": 25425 + }, + { + "epoch": 0.36, + "grad_norm": 0.51953125, + "learning_rate": 0.00016024442280608507, + "loss": 1.0219, + "step": 25430 + }, + { + "epoch": 0.36, + "grad_norm": 0.609375, + "learning_rate": 0.00016022443799893404, + "loss": 0.9601, + "step": 25435 + }, + { + "epoch": 0.36, + "grad_norm": 0.55078125, + "learning_rate": 0.00016020444941684662, + "loss": 1.0713, + "step": 25440 + }, + { + "epoch": 0.36, + "grad_norm": 0.52734375, + "learning_rate": 0.00016018445706107576, + "loss": 0.8197, + "step": 25445 + }, + { + "epoch": 0.37, + "grad_norm": 0.60546875, + "learning_rate": 0.00016016446093287457, + "loss": 0.9444, + "step": 25450 + }, + { + "epoch": 0.37, + "grad_norm": 0.58203125, + "learning_rate": 0.00016014446103349648, + "loss": 0.9886, + "step": 25455 + }, + { + "epoch": 0.37, + "grad_norm": 0.5234375, + "learning_rate": 0.00016012445736419503, + "loss": 0.8165, + "step": 25460 + }, + { + "epoch": 0.37, + "grad_norm": 0.5703125, + "learning_rate": 0.00016010444992622415, + "loss": 1.0564, + "step": 25465 + }, + { + "epoch": 0.37, + "grad_norm": 0.59375, + "learning_rate": 0.00016008443872083788, + "loss": 0.9417, + "step": 25470 + }, + { + "epoch": 0.37, + "grad_norm": 0.59765625, + "learning_rate": 0.00016006442374929058, + "loss": 0.9726, + "step": 25475 + }, + { + "epoch": 0.37, + "grad_norm": 0.5625, + "learning_rate": 0.00016004440501283677, + "loss": 0.9835, + "step": 25480 + }, + { + "epoch": 0.37, + "grad_norm": 0.5625, + "learning_rate": 0.00016002438251273127, + "loss": 0.9092, + "step": 25485 + }, + { + "epoch": 0.37, + "grad_norm": 0.53515625, + "learning_rate": 0.00016000435625022913, + "loss": 0.926, + "step": 25490 + }, + { + "epoch": 0.37, + "grad_norm": 0.53515625, + "learning_rate": 0.00015998432622658557, + "loss": 0.939, + "step": 25495 + }, + { + "epoch": 0.37, + "grad_norm": 0.55078125, + "learning_rate": 0.00015996429244305617, + "loss": 0.9014, + "step": 25500 + }, + { + "epoch": 0.37, + "grad_norm": 0.54296875, + "learning_rate": 0.00015994425490089659, + "loss": 0.9478, + "step": 25505 + }, + { + "epoch": 0.37, + "grad_norm": 0.6953125, + "learning_rate": 0.00015992421360136282, + "loss": 1.0787, + "step": 25510 + }, + { + "epoch": 0.37, + "grad_norm": 0.58203125, + "learning_rate": 0.00015990416854571115, + "loss": 0.8994, + "step": 25515 + }, + { + "epoch": 0.37, + "grad_norm": 0.578125, + "learning_rate": 0.00015988411973519794, + "loss": 0.9534, + "step": 25520 + }, + { + "epoch": 0.37, + "grad_norm": 0.58984375, + "learning_rate": 0.0001598640671710799, + "loss": 1.0001, + "step": 25525 + }, + { + "epoch": 0.37, + "grad_norm": 0.63671875, + "learning_rate": 0.00015984401085461397, + "loss": 0.9634, + "step": 25530 + }, + { + "epoch": 0.37, + "grad_norm": 0.6015625, + "learning_rate": 0.00015982395078705729, + "loss": 0.9582, + "step": 25535 + }, + { + "epoch": 0.37, + "grad_norm": 0.5625, + "learning_rate": 0.00015980388696966723, + "loss": 0.8748, + "step": 25540 + }, + { + "epoch": 0.37, + "grad_norm": 0.4765625, + "learning_rate": 0.0001597838194037014, + "loss": 0.9613, + "step": 25545 + }, + { + "epoch": 0.37, + "grad_norm": 0.53125, + "learning_rate": 0.0001597637480904177, + "loss": 1.0516, + "step": 25550 + }, + { + "epoch": 0.37, + "grad_norm": 0.84765625, + "learning_rate": 0.0001597436730310742, + "loss": 0.9361, + "step": 25555 + }, + { + "epoch": 0.37, + "grad_norm": 0.54296875, + "learning_rate": 0.00015972359422692923, + "loss": 0.9223, + "step": 25560 + }, + { + "epoch": 0.37, + "grad_norm": 0.53125, + "learning_rate": 0.00015970351167924138, + "loss": 0.98, + "step": 25565 + }, + { + "epoch": 0.37, + "grad_norm": 0.546875, + "learning_rate": 0.0001596834253892694, + "loss": 1.0118, + "step": 25570 + }, + { + "epoch": 0.37, + "grad_norm": 0.61328125, + "learning_rate": 0.00015966333535827234, + "loss": 1.0899, + "step": 25575 + }, + { + "epoch": 0.37, + "grad_norm": 0.51171875, + "learning_rate": 0.00015964324158750947, + "loss": 0.8592, + "step": 25580 + }, + { + "epoch": 0.37, + "grad_norm": 0.6171875, + "learning_rate": 0.0001596231440782403, + "loss": 0.9544, + "step": 25585 + }, + { + "epoch": 0.37, + "grad_norm": 0.53125, + "learning_rate": 0.00015960304283172452, + "loss": 1.0262, + "step": 25590 + }, + { + "epoch": 0.37, + "grad_norm": 0.59375, + "learning_rate": 0.00015958293784922218, + "loss": 0.9641, + "step": 25595 + }, + { + "epoch": 0.37, + "grad_norm": 0.58984375, + "learning_rate": 0.0001595628291319934, + "loss": 1.0267, + "step": 25600 + }, + { + "epoch": 0.37, + "grad_norm": 0.609375, + "learning_rate": 0.0001595427166812987, + "loss": 0.9957, + "step": 25605 + }, + { + "epoch": 0.37, + "grad_norm": 0.52734375, + "learning_rate": 0.00015952260049839864, + "loss": 1.0398, + "step": 25610 + }, + { + "epoch": 0.37, + "grad_norm": 0.51953125, + "learning_rate": 0.00015950248058455423, + "loss": 1.001, + "step": 25615 + }, + { + "epoch": 0.37, + "grad_norm": 0.55859375, + "learning_rate": 0.0001594823569410266, + "loss": 1.0848, + "step": 25620 + }, + { + "epoch": 0.37, + "grad_norm": 0.60546875, + "learning_rate": 0.00015946222956907704, + "loss": 0.9839, + "step": 25625 + }, + { + "epoch": 0.37, + "grad_norm": 0.54296875, + "learning_rate": 0.00015944209846996722, + "loss": 0.8242, + "step": 25630 + }, + { + "epoch": 0.37, + "grad_norm": 0.51953125, + "learning_rate": 0.00015942196364495897, + "loss": 0.8995, + "step": 25635 + }, + { + "epoch": 0.37, + "grad_norm": 0.63671875, + "learning_rate": 0.00015940182509531435, + "loss": 0.9686, + "step": 25640 + }, + { + "epoch": 0.37, + "grad_norm": 0.71875, + "learning_rate": 0.00015938168282229572, + "loss": 1.0651, + "step": 25645 + }, + { + "epoch": 0.37, + "grad_norm": 0.5859375, + "learning_rate": 0.00015936153682716557, + "loss": 1.0298, + "step": 25650 + }, + { + "epoch": 0.37, + "grad_norm": 0.62890625, + "learning_rate": 0.00015934138711118666, + "loss": 0.9333, + "step": 25655 + }, + { + "epoch": 0.37, + "grad_norm": 0.55859375, + "learning_rate": 0.000159321233675622, + "loss": 0.9606, + "step": 25660 + }, + { + "epoch": 0.37, + "grad_norm": 0.53515625, + "learning_rate": 0.00015930107652173492, + "loss": 0.8906, + "step": 25665 + }, + { + "epoch": 0.37, + "grad_norm": 0.58984375, + "learning_rate": 0.00015928091565078879, + "loss": 0.9417, + "step": 25670 + }, + { + "epoch": 0.37, + "grad_norm": 0.63671875, + "learning_rate": 0.00015926075106404737, + "loss": 0.9536, + "step": 25675 + }, + { + "epoch": 0.37, + "grad_norm": 0.55078125, + "learning_rate": 0.00015924058276277453, + "loss": 0.925, + "step": 25680 + }, + { + "epoch": 0.37, + "grad_norm": 0.61328125, + "learning_rate": 0.00015922041074823456, + "loss": 0.987, + "step": 25685 + }, + { + "epoch": 0.37, + "grad_norm": 0.52734375, + "learning_rate": 0.00015920023502169174, + "loss": 1.0728, + "step": 25690 + }, + { + "epoch": 0.37, + "grad_norm": 0.5859375, + "learning_rate": 0.00015918005558441078, + "loss": 0.9732, + "step": 25695 + }, + { + "epoch": 0.37, + "grad_norm": 0.6015625, + "learning_rate": 0.00015915987243765657, + "loss": 1.0096, + "step": 25700 + }, + { + "epoch": 0.37, + "grad_norm": 0.578125, + "learning_rate": 0.00015913968558269414, + "loss": 0.9989, + "step": 25705 + }, + { + "epoch": 0.37, + "grad_norm": 0.53125, + "learning_rate": 0.0001591194950207889, + "loss": 1.0489, + "step": 25710 + }, + { + "epoch": 0.37, + "grad_norm": 0.6484375, + "learning_rate": 0.00015909930075320633, + "loss": 0.9951, + "step": 25715 + }, + { + "epoch": 0.37, + "grad_norm": 0.54296875, + "learning_rate": 0.00015907910278121232, + "loss": 0.8418, + "step": 25720 + }, + { + "epoch": 0.37, + "grad_norm": 0.55078125, + "learning_rate": 0.00015905890110607285, + "loss": 1.0019, + "step": 25725 + }, + { + "epoch": 0.37, + "grad_norm": 0.5546875, + "learning_rate": 0.00015903869572905422, + "loss": 1.1367, + "step": 25730 + }, + { + "epoch": 0.37, + "grad_norm": 0.56640625, + "learning_rate": 0.00015901848665142288, + "loss": 0.9247, + "step": 25735 + }, + { + "epoch": 0.37, + "grad_norm": 0.54296875, + "learning_rate": 0.00015899827387444554, + "loss": 0.9569, + "step": 25740 + }, + { + "epoch": 0.37, + "grad_norm": 0.53515625, + "learning_rate": 0.00015897805739938927, + "loss": 0.8595, + "step": 25745 + }, + { + "epoch": 0.37, + "grad_norm": 0.515625, + "learning_rate": 0.00015895783722752116, + "loss": 0.9867, + "step": 25750 + }, + { + "epoch": 0.37, + "grad_norm": 0.84375, + "learning_rate": 0.00015893761336010866, + "loss": 1.0285, + "step": 25755 + }, + { + "epoch": 0.37, + "grad_norm": 0.578125, + "learning_rate": 0.00015891738579841943, + "loss": 1.0182, + "step": 25760 + }, + { + "epoch": 0.37, + "grad_norm": 0.61328125, + "learning_rate": 0.00015889715454372137, + "loss": 0.9272, + "step": 25765 + }, + { + "epoch": 0.37, + "grad_norm": 0.6328125, + "learning_rate": 0.00015887691959728256, + "loss": 1.0613, + "step": 25770 + }, + { + "epoch": 0.37, + "grad_norm": 0.65234375, + "learning_rate": 0.0001588566809603714, + "loss": 0.9419, + "step": 25775 + }, + { + "epoch": 0.37, + "grad_norm": 0.5859375, + "learning_rate": 0.00015883643863425645, + "loss": 0.8621, + "step": 25780 + }, + { + "epoch": 0.37, + "grad_norm": 0.51953125, + "learning_rate": 0.0001588161926202065, + "loss": 0.9175, + "step": 25785 + }, + { + "epoch": 0.37, + "grad_norm": 0.54296875, + "learning_rate": 0.0001587959429194906, + "loss": 0.9916, + "step": 25790 + }, + { + "epoch": 0.37, + "grad_norm": 0.6484375, + "learning_rate": 0.00015877568953337806, + "loss": 1.0484, + "step": 25795 + }, + { + "epoch": 0.37, + "grad_norm": 0.61328125, + "learning_rate": 0.00015875543246313836, + "loss": 1.063, + "step": 25800 + }, + { + "epoch": 0.37, + "grad_norm": 0.53515625, + "learning_rate": 0.00015873517171004125, + "loss": 1.0483, + "step": 25805 + }, + { + "epoch": 0.37, + "grad_norm": 0.59765625, + "learning_rate": 0.00015871490727535666, + "loss": 1.0646, + "step": 25810 + }, + { + "epoch": 0.37, + "grad_norm": 0.64453125, + "learning_rate": 0.0001586946391603548, + "loss": 0.9879, + "step": 25815 + }, + { + "epoch": 0.37, + "grad_norm": 0.60546875, + "learning_rate": 0.0001586743673663061, + "loss": 0.9607, + "step": 25820 + }, + { + "epoch": 0.37, + "grad_norm": 0.53125, + "learning_rate": 0.00015865409189448127, + "loss": 0.9211, + "step": 25825 + }, + { + "epoch": 0.37, + "grad_norm": 0.55078125, + "learning_rate": 0.00015863381274615115, + "loss": 1.0857, + "step": 25830 + }, + { + "epoch": 0.37, + "grad_norm": 0.49609375, + "learning_rate": 0.00015861352992258684, + "loss": 0.8483, + "step": 25835 + }, + { + "epoch": 0.37, + "grad_norm": 0.466796875, + "learning_rate": 0.00015859324342505974, + "loss": 1.0105, + "step": 25840 + }, + { + "epoch": 0.37, + "grad_norm": 0.5625, + "learning_rate": 0.0001585729532548414, + "loss": 0.9075, + "step": 25845 + }, + { + "epoch": 0.37, + "grad_norm": 0.6171875, + "learning_rate": 0.00015855265941320366, + "loss": 1.1377, + "step": 25850 + }, + { + "epoch": 0.37, + "grad_norm": 0.57421875, + "learning_rate": 0.00015853236190141855, + "loss": 0.8974, + "step": 25855 + }, + { + "epoch": 0.37, + "grad_norm": 0.54296875, + "learning_rate": 0.00015851206072075829, + "loss": 0.9576, + "step": 25860 + }, + { + "epoch": 0.37, + "grad_norm": 0.5234375, + "learning_rate": 0.00015849175587249545, + "loss": 1.062, + "step": 25865 + }, + { + "epoch": 0.37, + "grad_norm": 0.6328125, + "learning_rate": 0.0001584714473579027, + "loss": 0.9231, + "step": 25870 + }, + { + "epoch": 0.37, + "grad_norm": 0.5234375, + "learning_rate": 0.00015845113517825313, + "loss": 0.9307, + "step": 25875 + }, + { + "epoch": 0.37, + "grad_norm": 0.52734375, + "learning_rate": 0.00015843081933481976, + "loss": 0.942, + "step": 25880 + }, + { + "epoch": 0.37, + "grad_norm": 0.54296875, + "learning_rate": 0.0001584104998288761, + "loss": 1.075, + "step": 25885 + }, + { + "epoch": 0.37, + "grad_norm": 0.52734375, + "learning_rate": 0.0001583901766616958, + "loss": 0.9113, + "step": 25890 + }, + { + "epoch": 0.37, + "grad_norm": 0.5078125, + "learning_rate": 0.0001583698498345527, + "loss": 0.8825, + "step": 25895 + }, + { + "epoch": 0.37, + "grad_norm": 0.55078125, + "learning_rate": 0.00015834951934872094, + "loss": 0.9867, + "step": 25900 + }, + { + "epoch": 0.37, + "grad_norm": 0.52734375, + "learning_rate": 0.00015832918520547487, + "loss": 0.9442, + "step": 25905 + }, + { + "epoch": 0.37, + "grad_norm": 0.62890625, + "learning_rate": 0.00015830884740608906, + "loss": 1.0392, + "step": 25910 + }, + { + "epoch": 0.37, + "grad_norm": 0.60546875, + "learning_rate": 0.00015828850595183823, + "loss": 1.052, + "step": 25915 + }, + { + "epoch": 0.37, + "grad_norm": 0.546875, + "learning_rate": 0.0001582681608439975, + "loss": 0.8458, + "step": 25920 + }, + { + "epoch": 0.37, + "grad_norm": 0.58984375, + "learning_rate": 0.00015824781208384207, + "loss": 0.9396, + "step": 25925 + }, + { + "epoch": 0.37, + "grad_norm": 0.55078125, + "learning_rate": 0.00015822745967264745, + "loss": 1.1154, + "step": 25930 + }, + { + "epoch": 0.37, + "grad_norm": 0.5078125, + "learning_rate": 0.00015820710361168935, + "loss": 0.9944, + "step": 25935 + }, + { + "epoch": 0.37, + "grad_norm": 0.5859375, + "learning_rate": 0.0001581867439022437, + "loss": 0.8879, + "step": 25940 + }, + { + "epoch": 0.37, + "grad_norm": 0.55078125, + "learning_rate": 0.00015816638054558666, + "loss": 0.9119, + "step": 25945 + }, + { + "epoch": 0.37, + "grad_norm": 0.703125, + "learning_rate": 0.00015814601354299462, + "loss": 0.8787, + "step": 25950 + }, + { + "epoch": 0.37, + "grad_norm": 0.55078125, + "learning_rate": 0.00015812564289574427, + "loss": 1.0819, + "step": 25955 + }, + { + "epoch": 0.37, + "grad_norm": 0.578125, + "learning_rate": 0.00015810526860511243, + "loss": 0.9735, + "step": 25960 + }, + { + "epoch": 0.37, + "grad_norm": 0.65625, + "learning_rate": 0.00015808489067237614, + "loss": 1.1039, + "step": 25965 + }, + { + "epoch": 0.37, + "grad_norm": 0.578125, + "learning_rate": 0.00015806450909881277, + "loss": 0.9072, + "step": 25970 + }, + { + "epoch": 0.37, + "grad_norm": 0.490234375, + "learning_rate": 0.00015804412388569986, + "loss": 0.8738, + "step": 25975 + }, + { + "epoch": 0.37, + "grad_norm": 0.5703125, + "learning_rate": 0.00015802373503431513, + "loss": 0.9162, + "step": 25980 + }, + { + "epoch": 0.37, + "grad_norm": 0.58203125, + "learning_rate": 0.00015800334254593661, + "loss": 0.9719, + "step": 25985 + }, + { + "epoch": 0.37, + "grad_norm": 0.578125, + "learning_rate": 0.00015798294642184251, + "loss": 0.9771, + "step": 25990 + }, + { + "epoch": 0.37, + "grad_norm": 0.5703125, + "learning_rate": 0.00015796254666331131, + "loss": 1.0905, + "step": 25995 + }, + { + "epoch": 0.37, + "grad_norm": 0.578125, + "learning_rate": 0.00015794214327162167, + "loss": 0.8707, + "step": 26000 + }, + { + "epoch": 0.37, + "grad_norm": 0.609375, + "learning_rate": 0.00015792173624805245, + "loss": 0.8273, + "step": 26005 + }, + { + "epoch": 0.37, + "grad_norm": 0.5703125, + "learning_rate": 0.00015790132559388291, + "loss": 0.9846, + "step": 26010 + }, + { + "epoch": 0.37, + "grad_norm": 0.734375, + "learning_rate": 0.0001578809113103923, + "loss": 0.9594, + "step": 26015 + }, + { + "epoch": 0.37, + "grad_norm": 0.546875, + "learning_rate": 0.00015786049339886024, + "loss": 0.9845, + "step": 26020 + }, + { + "epoch": 0.37, + "grad_norm": 0.494140625, + "learning_rate": 0.00015784007186056656, + "loss": 0.9394, + "step": 26025 + }, + { + "epoch": 0.37, + "grad_norm": 0.57421875, + "learning_rate": 0.00015781964669679132, + "loss": 1.0991, + "step": 26030 + }, + { + "epoch": 0.37, + "grad_norm": 0.5703125, + "learning_rate": 0.00015779921790881474, + "loss": 1.0305, + "step": 26035 + }, + { + "epoch": 0.37, + "grad_norm": 0.5078125, + "learning_rate": 0.0001577787854979174, + "loss": 1.0352, + "step": 26040 + }, + { + "epoch": 0.37, + "grad_norm": 0.4921875, + "learning_rate": 0.00015775834946537995, + "loss": 0.9475, + "step": 26045 + }, + { + "epoch": 0.37, + "grad_norm": 0.6171875, + "learning_rate": 0.0001577379098124834, + "loss": 1.0782, + "step": 26050 + }, + { + "epoch": 0.37, + "grad_norm": 0.52734375, + "learning_rate": 0.00015771746654050887, + "loss": 0.9941, + "step": 26055 + }, + { + "epoch": 0.37, + "grad_norm": 0.58203125, + "learning_rate": 0.00015769701965073782, + "loss": 0.9338, + "step": 26060 + }, + { + "epoch": 0.37, + "grad_norm": 0.59375, + "learning_rate": 0.00015767656914445188, + "loss": 1.0494, + "step": 26065 + }, + { + "epoch": 0.37, + "grad_norm": 0.70703125, + "learning_rate": 0.0001576561150229329, + "loss": 0.9931, + "step": 26070 + }, + { + "epoch": 0.37, + "grad_norm": 0.58203125, + "learning_rate": 0.00015763565728746292, + "loss": 0.8672, + "step": 26075 + }, + { + "epoch": 0.37, + "grad_norm": 0.55859375, + "learning_rate": 0.00015761519593932434, + "loss": 0.6515, + "step": 26080 + }, + { + "epoch": 0.37, + "grad_norm": 0.5546875, + "learning_rate": 0.00015759473097979964, + "loss": 0.9082, + "step": 26085 + }, + { + "epoch": 0.37, + "grad_norm": 0.5859375, + "learning_rate": 0.00015757426241017161, + "loss": 0.9968, + "step": 26090 + }, + { + "epoch": 0.37, + "grad_norm": 0.5546875, + "learning_rate": 0.00015755379023172327, + "loss": 0.8977, + "step": 26095 + }, + { + "epoch": 0.37, + "grad_norm": 0.64453125, + "learning_rate": 0.00015753331444573777, + "loss": 1.0706, + "step": 26100 + }, + { + "epoch": 0.37, + "grad_norm": 0.58203125, + "learning_rate": 0.00015751283505349863, + "loss": 0.8743, + "step": 26105 + }, + { + "epoch": 0.37, + "grad_norm": 0.51953125, + "learning_rate": 0.00015749235205628946, + "loss": 1.0556, + "step": 26110 + }, + { + "epoch": 0.37, + "grad_norm": 0.5390625, + "learning_rate": 0.00015747186545539418, + "loss": 0.9144, + "step": 26115 + }, + { + "epoch": 0.37, + "grad_norm": 0.609375, + "learning_rate": 0.00015745137525209694, + "loss": 0.929, + "step": 26120 + }, + { + "epoch": 0.37, + "grad_norm": 0.5625, + "learning_rate": 0.00015743088144768209, + "loss": 0.8177, + "step": 26125 + }, + { + "epoch": 0.37, + "grad_norm": 0.55078125, + "learning_rate": 0.00015741038404343412, + "loss": 0.9324, + "step": 26130 + }, + { + "epoch": 0.37, + "grad_norm": 0.52734375, + "learning_rate": 0.00015738988304063792, + "loss": 0.805, + "step": 26135 + }, + { + "epoch": 0.37, + "grad_norm": 0.58984375, + "learning_rate": 0.00015736937844057852, + "loss": 0.9338, + "step": 26140 + }, + { + "epoch": 0.38, + "grad_norm": 0.765625, + "learning_rate": 0.00015734887024454111, + "loss": 1.0461, + "step": 26145 + }, + { + "epoch": 0.38, + "grad_norm": 0.66015625, + "learning_rate": 0.0001573283584538112, + "loss": 1.0292, + "step": 26150 + }, + { + "epoch": 0.38, + "grad_norm": 0.56640625, + "learning_rate": 0.0001573078430696745, + "loss": 0.8662, + "step": 26155 + }, + { + "epoch": 0.38, + "grad_norm": 0.53125, + "learning_rate": 0.0001572873240934169, + "loss": 0.9804, + "step": 26160 + }, + { + "epoch": 0.38, + "grad_norm": 0.5078125, + "learning_rate": 0.00015726680152632462, + "loss": 1.1208, + "step": 26165 + }, + { + "epoch": 0.38, + "grad_norm": 0.62109375, + "learning_rate": 0.000157246275369684, + "loss": 1.0409, + "step": 26170 + }, + { + "epoch": 0.38, + "grad_norm": 0.5703125, + "learning_rate": 0.0001572257456247816, + "loss": 0.8872, + "step": 26175 + }, + { + "epoch": 0.38, + "grad_norm": 0.59375, + "learning_rate": 0.00015720521229290434, + "loss": 0.8249, + "step": 26180 + }, + { + "epoch": 0.38, + "grad_norm": 0.58203125, + "learning_rate": 0.0001571846753753392, + "loss": 0.7681, + "step": 26185 + }, + { + "epoch": 0.38, + "grad_norm": 0.59765625, + "learning_rate": 0.00015716413487337346, + "loss": 0.9822, + "step": 26190 + }, + { + "epoch": 0.38, + "grad_norm": 0.5234375, + "learning_rate": 0.00015714359078829467, + "loss": 0.9559, + "step": 26195 + }, + { + "epoch": 0.38, + "grad_norm": 0.55859375, + "learning_rate": 0.0001571230431213905, + "loss": 0.9754, + "step": 26200 + }, + { + "epoch": 0.38, + "grad_norm": 0.71875, + "learning_rate": 0.00015710249187394896, + "loss": 0.8946, + "step": 26205 + }, + { + "epoch": 0.38, + "grad_norm": 0.62109375, + "learning_rate": 0.00015708193704725817, + "loss": 0.9902, + "step": 26210 + }, + { + "epoch": 0.38, + "grad_norm": 0.54296875, + "learning_rate": 0.0001570613786426066, + "loss": 0.9934, + "step": 26215 + }, + { + "epoch": 0.38, + "grad_norm": 0.56640625, + "learning_rate": 0.00015704081666128276, + "loss": 1.0759, + "step": 26220 + }, + { + "epoch": 0.38, + "grad_norm": 0.6171875, + "learning_rate": 0.00015702025110457562, + "loss": 0.9217, + "step": 26225 + }, + { + "epoch": 0.38, + "grad_norm": 0.5625, + "learning_rate": 0.0001569996819737742, + "loss": 0.9059, + "step": 26230 + }, + { + "epoch": 0.38, + "grad_norm": 0.45703125, + "learning_rate": 0.00015697910927016775, + "loss": 0.9285, + "step": 26235 + }, + { + "epoch": 0.38, + "grad_norm": 0.609375, + "learning_rate": 0.00015695853299504587, + "loss": 1.0308, + "step": 26240 + }, + { + "epoch": 0.38, + "grad_norm": 0.53515625, + "learning_rate": 0.00015693795314969825, + "loss": 0.9831, + "step": 26245 + }, + { + "epoch": 0.38, + "grad_norm": 0.55078125, + "learning_rate": 0.00015691736973541493, + "loss": 0.9238, + "step": 26250 + }, + { + "epoch": 0.38, + "grad_norm": 0.53515625, + "learning_rate": 0.000156896782753486, + "loss": 0.9768, + "step": 26255 + }, + { + "epoch": 0.38, + "grad_norm": 0.5859375, + "learning_rate": 0.00015687619220520194, + "loss": 0.9166, + "step": 26260 + }, + { + "epoch": 0.38, + "grad_norm": 0.54296875, + "learning_rate": 0.0001568555980918534, + "loss": 0.8379, + "step": 26265 + }, + { + "epoch": 0.38, + "grad_norm": 0.78125, + "learning_rate": 0.0001568350004147312, + "loss": 0.9155, + "step": 26270 + }, + { + "epoch": 0.38, + "grad_norm": 0.58984375, + "learning_rate": 0.00015681439917512646, + "loss": 0.8975, + "step": 26275 + }, + { + "epoch": 0.38, + "grad_norm": 0.61328125, + "learning_rate": 0.00015679379437433046, + "loss": 0.9756, + "step": 26280 + }, + { + "epoch": 0.38, + "grad_norm": 0.58203125, + "learning_rate": 0.00015677318601363472, + "loss": 1.0602, + "step": 26285 + }, + { + "epoch": 0.38, + "grad_norm": 0.57421875, + "learning_rate": 0.00015675257409433107, + "loss": 0.8572, + "step": 26290 + }, + { + "epoch": 0.38, + "grad_norm": 0.5390625, + "learning_rate": 0.00015673195861771143, + "loss": 0.849, + "step": 26295 + }, + { + "epoch": 0.38, + "grad_norm": 0.56640625, + "learning_rate": 0.000156711339585068, + "loss": 1.04, + "step": 26300 + }, + { + "epoch": 0.38, + "grad_norm": 0.53515625, + "learning_rate": 0.0001566907169976932, + "loss": 0.9551, + "step": 26305 + }, + { + "epoch": 0.38, + "grad_norm": 0.546875, + "learning_rate": 0.00015667009085687972, + "loss": 1.0188, + "step": 26310 + }, + { + "epoch": 0.38, + "grad_norm": 0.6796875, + "learning_rate": 0.0001566494611639204, + "loss": 0.8871, + "step": 26315 + }, + { + "epoch": 0.38, + "grad_norm": 0.5703125, + "learning_rate": 0.00015662882792010828, + "loss": 0.8827, + "step": 26320 + }, + { + "epoch": 0.38, + "grad_norm": 0.58984375, + "learning_rate": 0.00015660819112673678, + "loss": 1.01, + "step": 26325 + }, + { + "epoch": 0.38, + "grad_norm": 0.5625, + "learning_rate": 0.0001565875507850994, + "loss": 0.8856, + "step": 26330 + }, + { + "epoch": 0.38, + "grad_norm": 0.59375, + "learning_rate": 0.0001565669068964899, + "loss": 1.0583, + "step": 26335 + }, + { + "epoch": 0.38, + "grad_norm": 1.21875, + "learning_rate": 0.0001565462594622022, + "loss": 1.0725, + "step": 26340 + }, + { + "epoch": 0.38, + "grad_norm": 0.83984375, + "learning_rate": 0.00015652560848353057, + "loss": 1.0632, + "step": 26345 + }, + { + "epoch": 0.38, + "grad_norm": 0.5234375, + "learning_rate": 0.0001565049539617695, + "loss": 1.0049, + "step": 26350 + }, + { + "epoch": 0.38, + "grad_norm": 0.59375, + "learning_rate": 0.00015648429589821348, + "loss": 1.0311, + "step": 26355 + }, + { + "epoch": 0.38, + "grad_norm": 0.51953125, + "learning_rate": 0.00015646363429415748, + "loss": 0.9113, + "step": 26360 + }, + { + "epoch": 0.38, + "grad_norm": 0.578125, + "learning_rate": 0.00015644296915089657, + "loss": 0.9195, + "step": 26365 + }, + { + "epoch": 0.38, + "grad_norm": 0.703125, + "learning_rate": 0.00015642230046972606, + "loss": 0.891, + "step": 26370 + }, + { + "epoch": 0.38, + "grad_norm": 0.55078125, + "learning_rate": 0.00015640162825194151, + "loss": 1.0039, + "step": 26375 + }, + { + "epoch": 0.38, + "grad_norm": 0.70703125, + "learning_rate": 0.0001563809524988387, + "loss": 1.0073, + "step": 26380 + }, + { + "epoch": 0.38, + "grad_norm": 0.62109375, + "learning_rate": 0.00015636027321171353, + "loss": 1.0067, + "step": 26385 + }, + { + "epoch": 0.38, + "grad_norm": 0.56640625, + "learning_rate": 0.00015633959039186227, + "loss": 1.0397, + "step": 26390 + }, + { + "epoch": 0.38, + "grad_norm": 0.61328125, + "learning_rate": 0.0001563189040405813, + "loss": 0.9546, + "step": 26395 + }, + { + "epoch": 0.38, + "grad_norm": 0.921875, + "learning_rate": 0.0001562982141591673, + "loss": 0.8314, + "step": 26400 + }, + { + "epoch": 0.38, + "grad_norm": 0.49609375, + "learning_rate": 0.0001562775207489171, + "loss": 0.9185, + "step": 26405 + }, + { + "epoch": 0.38, + "grad_norm": 0.578125, + "learning_rate": 0.0001562568238111278, + "loss": 1.0778, + "step": 26410 + }, + { + "epoch": 0.38, + "grad_norm": 0.5859375, + "learning_rate": 0.00015623612334709675, + "loss": 1.086, + "step": 26415 + }, + { + "epoch": 0.38, + "grad_norm": 0.5, + "learning_rate": 0.0001562154193581214, + "loss": 1.0063, + "step": 26420 + }, + { + "epoch": 0.38, + "grad_norm": 0.66015625, + "learning_rate": 0.00015619471184549955, + "loss": 0.8892, + "step": 26425 + }, + { + "epoch": 0.38, + "grad_norm": 0.50390625, + "learning_rate": 0.0001561740008105292, + "loss": 0.8712, + "step": 26430 + }, + { + "epoch": 0.38, + "grad_norm": 0.59375, + "learning_rate": 0.00015615328625450848, + "loss": 1.0723, + "step": 26435 + }, + { + "epoch": 0.38, + "grad_norm": 0.546875, + "learning_rate": 0.0001561325681787358, + "loss": 0.9601, + "step": 26440 + }, + { + "epoch": 0.38, + "grad_norm": 0.73828125, + "learning_rate": 0.00015611184658450983, + "loss": 0.9405, + "step": 26445 + }, + { + "epoch": 0.38, + "grad_norm": 0.609375, + "learning_rate": 0.0001560911214731294, + "loss": 0.8906, + "step": 26450 + }, + { + "epoch": 0.38, + "grad_norm": 0.625, + "learning_rate": 0.0001560703928458936, + "loss": 1.0349, + "step": 26455 + }, + { + "epoch": 0.38, + "grad_norm": 0.5390625, + "learning_rate": 0.00015604966070410176, + "loss": 0.9879, + "step": 26460 + }, + { + "epoch": 0.38, + "grad_norm": 0.6875, + "learning_rate": 0.0001560289250490533, + "loss": 0.9159, + "step": 26465 + }, + { + "epoch": 0.38, + "grad_norm": 0.57421875, + "learning_rate": 0.00015600818588204805, + "loss": 0.9955, + "step": 26470 + }, + { + "epoch": 0.38, + "grad_norm": 0.74609375, + "learning_rate": 0.00015598744320438588, + "loss": 0.9993, + "step": 26475 + }, + { + "epoch": 0.38, + "grad_norm": 0.53515625, + "learning_rate": 0.000155966697017367, + "loss": 0.868, + "step": 26480 + }, + { + "epoch": 0.38, + "grad_norm": 0.57421875, + "learning_rate": 0.00015594594732229187, + "loss": 0.9074, + "step": 26485 + }, + { + "epoch": 0.38, + "grad_norm": 0.51171875, + "learning_rate": 0.00015592519412046098, + "loss": 0.9526, + "step": 26490 + }, + { + "epoch": 0.38, + "grad_norm": 0.55078125, + "learning_rate": 0.00015590443741317524, + "loss": 0.8759, + "step": 26495 + }, + { + "epoch": 0.38, + "grad_norm": 0.59765625, + "learning_rate": 0.0001558836772017357, + "loss": 0.9871, + "step": 26500 + }, + { + "epoch": 0.38, + "grad_norm": 0.78125, + "learning_rate": 0.00015586291348744364, + "loss": 1.1237, + "step": 26505 + }, + { + "epoch": 0.38, + "grad_norm": 0.53515625, + "learning_rate": 0.00015584214627160055, + "loss": 0.9317, + "step": 26510 + }, + { + "epoch": 0.38, + "grad_norm": 0.62109375, + "learning_rate": 0.0001558213755555081, + "loss": 0.8633, + "step": 26515 + }, + { + "epoch": 0.38, + "grad_norm": 0.6015625, + "learning_rate": 0.0001558006013404683, + "loss": 1.0021, + "step": 26520 + }, + { + "epoch": 0.38, + "grad_norm": 0.625, + "learning_rate": 0.0001557798236277832, + "loss": 0.9625, + "step": 26525 + }, + { + "epoch": 0.38, + "grad_norm": 0.55859375, + "learning_rate": 0.00015575904241875525, + "loss": 0.8798, + "step": 26530 + }, + { + "epoch": 0.38, + "grad_norm": 0.72265625, + "learning_rate": 0.00015573825771468704, + "loss": 1.0856, + "step": 26535 + }, + { + "epoch": 0.38, + "grad_norm": 0.58984375, + "learning_rate": 0.00015571746951688136, + "loss": 0.9614, + "step": 26540 + }, + { + "epoch": 0.38, + "grad_norm": 0.51171875, + "learning_rate": 0.00015569667782664118, + "loss": 0.8792, + "step": 26545 + }, + { + "epoch": 0.38, + "grad_norm": 0.5625, + "learning_rate": 0.00015567588264526984, + "loss": 1.0623, + "step": 26550 + }, + { + "epoch": 0.38, + "grad_norm": 0.546875, + "learning_rate": 0.0001556550839740708, + "loss": 1.0637, + "step": 26555 + }, + { + "epoch": 0.38, + "grad_norm": 0.5546875, + "learning_rate": 0.00015563428181434764, + "loss": 1.0017, + "step": 26560 + }, + { + "epoch": 0.38, + "grad_norm": 0.51171875, + "learning_rate": 0.00015561347616740436, + "loss": 1.1512, + "step": 26565 + }, + { + "epoch": 0.38, + "grad_norm": 0.59765625, + "learning_rate": 0.00015559266703454508, + "loss": 0.9056, + "step": 26570 + }, + { + "epoch": 0.38, + "grad_norm": 0.5625, + "learning_rate": 0.00015557185441707406, + "loss": 0.9349, + "step": 26575 + }, + { + "epoch": 0.38, + "grad_norm": 0.490234375, + "learning_rate": 0.00015555103831629597, + "loss": 0.8511, + "step": 26580 + }, + { + "epoch": 0.38, + "grad_norm": 0.57421875, + "learning_rate": 0.0001555302187335155, + "loss": 0.9237, + "step": 26585 + }, + { + "epoch": 0.38, + "grad_norm": 0.4921875, + "learning_rate": 0.00015550939567003771, + "loss": 0.9405, + "step": 26590 + }, + { + "epoch": 0.38, + "grad_norm": 0.5703125, + "learning_rate": 0.00015548856912716774, + "loss": 0.9342, + "step": 26595 + }, + { + "epoch": 0.38, + "grad_norm": 0.58203125, + "learning_rate": 0.00015546773910621106, + "loss": 0.9176, + "step": 26600 + }, + { + "epoch": 0.38, + "grad_norm": 0.54296875, + "learning_rate": 0.00015544690560847336, + "loss": 1.0353, + "step": 26605 + }, + { + "epoch": 0.38, + "grad_norm": 0.80078125, + "learning_rate": 0.0001554260686352604, + "loss": 0.9468, + "step": 26610 + }, + { + "epoch": 0.38, + "grad_norm": 0.515625, + "learning_rate": 0.0001554052281878784, + "loss": 0.8511, + "step": 26615 + }, + { + "epoch": 0.38, + "grad_norm": 0.546875, + "learning_rate": 0.0001553843842676336, + "loss": 0.9141, + "step": 26620 + }, + { + "epoch": 0.38, + "grad_norm": 0.55078125, + "learning_rate": 0.00015536353687583247, + "loss": 1.1151, + "step": 26625 + }, + { + "epoch": 0.38, + "grad_norm": 0.61328125, + "learning_rate": 0.0001553426860137818, + "loss": 1.0005, + "step": 26630 + }, + { + "epoch": 0.38, + "grad_norm": 0.59765625, + "learning_rate": 0.00015532183168278854, + "loss": 1.0243, + "step": 26635 + }, + { + "epoch": 0.38, + "grad_norm": 0.5078125, + "learning_rate": 0.0001553009738841599, + "loss": 0.9182, + "step": 26640 + }, + { + "epoch": 0.38, + "grad_norm": 0.53125, + "learning_rate": 0.0001552801126192032, + "loss": 0.9837, + "step": 26645 + }, + { + "epoch": 0.38, + "grad_norm": 0.482421875, + "learning_rate": 0.0001552592478892261, + "loss": 0.8765, + "step": 26650 + }, + { + "epoch": 0.38, + "grad_norm": 0.76171875, + "learning_rate": 0.00015523837969553644, + "loss": 1.1424, + "step": 26655 + }, + { + "epoch": 0.38, + "grad_norm": 0.66796875, + "learning_rate": 0.00015521750803944214, + "loss": 0.9813, + "step": 26660 + }, + { + "epoch": 0.38, + "grad_norm": 0.5546875, + "learning_rate": 0.0001551966329222516, + "loss": 0.903, + "step": 26665 + }, + { + "epoch": 0.38, + "grad_norm": 0.625, + "learning_rate": 0.0001551757543452733, + "loss": 0.9145, + "step": 26670 + }, + { + "epoch": 0.38, + "grad_norm": 0.5703125, + "learning_rate": 0.0001551548723098158, + "loss": 1.0128, + "step": 26675 + }, + { + "epoch": 0.38, + "grad_norm": 0.57421875, + "learning_rate": 0.0001551339868171881, + "loss": 0.8879, + "step": 26680 + }, + { + "epoch": 0.38, + "grad_norm": 0.54296875, + "learning_rate": 0.00015511309786869935, + "loss": 1.0045, + "step": 26685 + }, + { + "epoch": 0.38, + "grad_norm": 0.578125, + "learning_rate": 0.00015509220546565882, + "loss": 0.9818, + "step": 26690 + }, + { + "epoch": 0.38, + "grad_norm": 0.56640625, + "learning_rate": 0.00015507130960937612, + "loss": 0.9332, + "step": 26695 + }, + { + "epoch": 0.38, + "grad_norm": 0.5703125, + "learning_rate": 0.00015505041030116102, + "loss": 0.9466, + "step": 26700 + }, + { + "epoch": 0.38, + "grad_norm": 0.625, + "learning_rate": 0.00015502950754232349, + "loss": 1.0231, + "step": 26705 + }, + { + "epoch": 0.38, + "grad_norm": 0.59375, + "learning_rate": 0.00015500860133417374, + "loss": 0.9721, + "step": 26710 + }, + { + "epoch": 0.38, + "grad_norm": 0.609375, + "learning_rate": 0.00015498769167802222, + "loss": 0.9158, + "step": 26715 + }, + { + "epoch": 0.38, + "grad_norm": 0.5390625, + "learning_rate": 0.0001549667785751796, + "loss": 0.9728, + "step": 26720 + }, + { + "epoch": 0.38, + "grad_norm": 0.50390625, + "learning_rate": 0.00015494586202695665, + "loss": 0.9821, + "step": 26725 + }, + { + "epoch": 0.38, + "grad_norm": 0.54296875, + "learning_rate": 0.00015492494203466452, + "loss": 1.0645, + "step": 26730 + }, + { + "epoch": 0.38, + "grad_norm": 0.51953125, + "learning_rate": 0.00015490401859961445, + "loss": 0.8899, + "step": 26735 + }, + { + "epoch": 0.38, + "grad_norm": 0.59375, + "learning_rate": 0.00015488309172311798, + "loss": 0.9968, + "step": 26740 + }, + { + "epoch": 0.38, + "grad_norm": 0.51171875, + "learning_rate": 0.00015486216140648678, + "loss": 0.873, + "step": 26745 + }, + { + "epoch": 0.38, + "grad_norm": 0.69921875, + "learning_rate": 0.00015484122765103286, + "loss": 1.048, + "step": 26750 + }, + { + "epoch": 0.38, + "grad_norm": 0.453125, + "learning_rate": 0.0001548202904580683, + "loss": 0.8578, + "step": 26755 + }, + { + "epoch": 0.38, + "grad_norm": 0.76953125, + "learning_rate": 0.00015479934982890551, + "loss": 0.9117, + "step": 26760 + }, + { + "epoch": 0.38, + "grad_norm": 0.53125, + "learning_rate": 0.0001547784057648571, + "loss": 0.9955, + "step": 26765 + }, + { + "epoch": 0.38, + "grad_norm": 0.52734375, + "learning_rate": 0.00015475745826723576, + "loss": 0.9677, + "step": 26770 + }, + { + "epoch": 0.38, + "grad_norm": 0.53515625, + "learning_rate": 0.00015473650733735463, + "loss": 0.9753, + "step": 26775 + }, + { + "epoch": 0.38, + "grad_norm": 0.54296875, + "learning_rate": 0.00015471555297652686, + "loss": 0.9088, + "step": 26780 + }, + { + "epoch": 0.38, + "grad_norm": 0.5625, + "learning_rate": 0.0001546945951860659, + "loss": 0.9215, + "step": 26785 + }, + { + "epoch": 0.38, + "grad_norm": 0.5, + "learning_rate": 0.00015467363396728543, + "loss": 0.8744, + "step": 26790 + }, + { + "epoch": 0.38, + "grad_norm": 0.5625, + "learning_rate": 0.00015465266932149932, + "loss": 0.9558, + "step": 26795 + }, + { + "epoch": 0.38, + "grad_norm": 0.5625, + "learning_rate": 0.00015463170125002166, + "loss": 0.9333, + "step": 26800 + }, + { + "epoch": 0.38, + "grad_norm": 0.5546875, + "learning_rate": 0.00015461072975416675, + "loss": 0.951, + "step": 26805 + }, + { + "epoch": 0.38, + "grad_norm": 0.61328125, + "learning_rate": 0.00015458975483524905, + "loss": 0.8719, + "step": 26810 + }, + { + "epoch": 0.38, + "grad_norm": 0.52734375, + "learning_rate": 0.0001545687764945834, + "loss": 0.8961, + "step": 26815 + }, + { + "epoch": 0.38, + "grad_norm": 0.51953125, + "learning_rate": 0.00015454779473348467, + "loss": 0.9622, + "step": 26820 + }, + { + "epoch": 0.38, + "grad_norm": 0.61328125, + "learning_rate": 0.00015452680955326803, + "loss": 0.9103, + "step": 26825 + }, + { + "epoch": 0.38, + "grad_norm": 0.50390625, + "learning_rate": 0.00015450582095524892, + "loss": 0.9518, + "step": 26830 + }, + { + "epoch": 0.38, + "grad_norm": 0.5625, + "learning_rate": 0.00015448482894074282, + "loss": 0.9383, + "step": 26835 + }, + { + "epoch": 0.39, + "grad_norm": 0.6015625, + "learning_rate": 0.00015446383351106562, + "loss": 1.0018, + "step": 26840 + }, + { + "epoch": 0.39, + "grad_norm": 0.515625, + "learning_rate": 0.0001544428346675333, + "loss": 0.9988, + "step": 26845 + }, + { + "epoch": 0.39, + "grad_norm": 0.5703125, + "learning_rate": 0.00015442183241146207, + "loss": 0.96, + "step": 26850 + }, + { + "epoch": 0.39, + "grad_norm": 0.58984375, + "learning_rate": 0.00015440082674416842, + "loss": 0.9955, + "step": 26855 + }, + { + "epoch": 0.39, + "grad_norm": 0.5390625, + "learning_rate": 0.000154379817666969, + "loss": 0.9501, + "step": 26860 + }, + { + "epoch": 0.39, + "grad_norm": 0.55078125, + "learning_rate": 0.00015435880518118066, + "loss": 1.0952, + "step": 26865 + }, + { + "epoch": 0.39, + "grad_norm": 0.5703125, + "learning_rate": 0.0001543377892881205, + "loss": 0.866, + "step": 26870 + }, + { + "epoch": 0.39, + "grad_norm": 0.59765625, + "learning_rate": 0.00015431676998910586, + "loss": 0.9214, + "step": 26875 + }, + { + "epoch": 0.39, + "grad_norm": 0.5625, + "learning_rate": 0.00015429574728545418, + "loss": 1.0176, + "step": 26880 + }, + { + "epoch": 0.39, + "grad_norm": 0.625, + "learning_rate": 0.00015427472117848323, + "loss": 0.9703, + "step": 26885 + }, + { + "epoch": 0.39, + "grad_norm": 0.56640625, + "learning_rate": 0.00015425369166951095, + "loss": 0.9798, + "step": 26890 + }, + { + "epoch": 0.39, + "grad_norm": 0.65234375, + "learning_rate": 0.0001542326587598555, + "loss": 0.9244, + "step": 26895 + }, + { + "epoch": 0.39, + "grad_norm": 0.53515625, + "learning_rate": 0.0001542116224508352, + "loss": 1.0506, + "step": 26900 + }, + { + "epoch": 0.39, + "grad_norm": 0.58984375, + "learning_rate": 0.00015419058274376867, + "loss": 0.8334, + "step": 26905 + }, + { + "epoch": 0.39, + "grad_norm": 0.62890625, + "learning_rate": 0.00015416953963997472, + "loss": 1.0585, + "step": 26910 + }, + { + "epoch": 0.39, + "grad_norm": 0.8125, + "learning_rate": 0.0001541484931407723, + "loss": 1.1043, + "step": 26915 + }, + { + "epoch": 0.39, + "grad_norm": 0.5625, + "learning_rate": 0.00015412744324748068, + "loss": 0.9439, + "step": 26920 + }, + { + "epoch": 0.39, + "grad_norm": 0.494140625, + "learning_rate": 0.00015410638996141927, + "loss": 0.8591, + "step": 26925 + }, + { + "epoch": 0.39, + "grad_norm": 0.515625, + "learning_rate": 0.0001540853332839077, + "loss": 0.9574, + "step": 26930 + }, + { + "epoch": 0.39, + "grad_norm": 0.54296875, + "learning_rate": 0.00015406427321626586, + "loss": 1.0304, + "step": 26935 + }, + { + "epoch": 0.39, + "grad_norm": 0.53125, + "learning_rate": 0.00015404320975981382, + "loss": 0.8397, + "step": 26940 + }, + { + "epoch": 0.39, + "grad_norm": 0.67578125, + "learning_rate": 0.0001540221429158718, + "loss": 1.142, + "step": 26945 + }, + { + "epoch": 0.39, + "grad_norm": 0.5859375, + "learning_rate": 0.00015400107268576037, + "loss": 1.2038, + "step": 26950 + }, + { + "epoch": 0.39, + "grad_norm": 0.5625, + "learning_rate": 0.00015397999907080015, + "loss": 1.0749, + "step": 26955 + }, + { + "epoch": 0.39, + "grad_norm": 0.53515625, + "learning_rate": 0.00015395892207231216, + "loss": 0.9764, + "step": 26960 + }, + { + "epoch": 0.39, + "grad_norm": 0.51171875, + "learning_rate": 0.00015393784169161746, + "loss": 0.8951, + "step": 26965 + }, + { + "epoch": 0.39, + "grad_norm": 0.59765625, + "learning_rate": 0.00015391675793003742, + "loss": 1.0228, + "step": 26970 + }, + { + "epoch": 0.39, + "grad_norm": 0.5625, + "learning_rate": 0.00015389567078889356, + "loss": 0.9191, + "step": 26975 + }, + { + "epoch": 0.39, + "grad_norm": 0.6796875, + "learning_rate": 0.0001538745802695077, + "loss": 1.1133, + "step": 26980 + }, + { + "epoch": 0.39, + "grad_norm": 0.5859375, + "learning_rate": 0.00015385348637320182, + "loss": 0.9428, + "step": 26985 + }, + { + "epoch": 0.39, + "grad_norm": 0.62890625, + "learning_rate": 0.00015383238910129804, + "loss": 0.8891, + "step": 26990 + }, + { + "epoch": 0.39, + "grad_norm": 0.6875, + "learning_rate": 0.0001538112884551188, + "loss": 0.9961, + "step": 26995 + }, + { + "epoch": 0.39, + "grad_norm": 0.58984375, + "learning_rate": 0.00015379018443598672, + "loss": 0.9033, + "step": 27000 + }, + { + "epoch": 0.39, + "grad_norm": 0.6171875, + "learning_rate": 0.00015376907704522464, + "loss": 0.9199, + "step": 27005 + }, + { + "epoch": 0.39, + "grad_norm": 0.5546875, + "learning_rate": 0.00015374796628415556, + "loss": 1.0566, + "step": 27010 + }, + { + "epoch": 0.39, + "grad_norm": 0.5234375, + "learning_rate": 0.00015372685215410273, + "loss": 1.057, + "step": 27015 + }, + { + "epoch": 0.39, + "grad_norm": 0.57421875, + "learning_rate": 0.00015370573465638962, + "loss": 1.0201, + "step": 27020 + }, + { + "epoch": 0.39, + "grad_norm": 0.53515625, + "learning_rate": 0.00015368461379233986, + "loss": 0.9872, + "step": 27025 + }, + { + "epoch": 0.39, + "grad_norm": 0.703125, + "learning_rate": 0.00015366348956327743, + "loss": 0.9782, + "step": 27030 + }, + { + "epoch": 0.39, + "grad_norm": 0.52734375, + "learning_rate": 0.00015364236197052634, + "loss": 0.9006, + "step": 27035 + }, + { + "epoch": 0.39, + "grad_norm": 0.57421875, + "learning_rate": 0.0001536212310154109, + "loss": 0.9762, + "step": 27040 + }, + { + "epoch": 0.39, + "grad_norm": 0.53125, + "learning_rate": 0.00015360009669925564, + "loss": 0.8898, + "step": 27045 + }, + { + "epoch": 0.39, + "grad_norm": 0.54296875, + "learning_rate": 0.00015357895902338523, + "loss": 0.9673, + "step": 27050 + }, + { + "epoch": 0.39, + "grad_norm": 0.62109375, + "learning_rate": 0.00015355781798912467, + "loss": 1.0248, + "step": 27055 + }, + { + "epoch": 0.39, + "grad_norm": 0.6640625, + "learning_rate": 0.00015353667359779908, + "loss": 1.0253, + "step": 27060 + }, + { + "epoch": 0.39, + "grad_norm": 0.6328125, + "learning_rate": 0.00015351552585073384, + "loss": 0.9211, + "step": 27065 + }, + { + "epoch": 0.39, + "grad_norm": 0.55078125, + "learning_rate": 0.0001534943747492545, + "loss": 0.7949, + "step": 27070 + }, + { + "epoch": 0.39, + "grad_norm": 0.55859375, + "learning_rate": 0.0001534732202946868, + "loss": 0.9069, + "step": 27075 + }, + { + "epoch": 0.39, + "grad_norm": 0.5078125, + "learning_rate": 0.00015345206248835674, + "loss": 1.0244, + "step": 27080 + }, + { + "epoch": 0.39, + "grad_norm": 0.5703125, + "learning_rate": 0.00015343090133159053, + "loss": 0.9724, + "step": 27085 + }, + { + "epoch": 0.39, + "grad_norm": 0.5546875, + "learning_rate": 0.00015340973682571459, + "loss": 0.9931, + "step": 27090 + }, + { + "epoch": 0.39, + "grad_norm": 0.52734375, + "learning_rate": 0.00015338856897205552, + "loss": 0.9437, + "step": 27095 + }, + { + "epoch": 0.39, + "grad_norm": 0.609375, + "learning_rate": 0.00015336739777194013, + "loss": 0.9188, + "step": 27100 + }, + { + "epoch": 0.39, + "grad_norm": 0.70703125, + "learning_rate": 0.00015334622322669543, + "loss": 1.1199, + "step": 27105 + }, + { + "epoch": 0.39, + "grad_norm": 0.515625, + "learning_rate": 0.00015332504533764876, + "loss": 0.9538, + "step": 27110 + }, + { + "epoch": 0.39, + "grad_norm": 0.48828125, + "learning_rate": 0.0001533038641061275, + "loss": 0.9649, + "step": 27115 + }, + { + "epoch": 0.39, + "grad_norm": 0.66015625, + "learning_rate": 0.00015328267953345934, + "loss": 1.0045, + "step": 27120 + }, + { + "epoch": 0.39, + "grad_norm": 0.5390625, + "learning_rate": 0.00015326149162097212, + "loss": 1.0143, + "step": 27125 + }, + { + "epoch": 0.39, + "grad_norm": 0.4921875, + "learning_rate": 0.00015324030036999395, + "loss": 0.969, + "step": 27130 + }, + { + "epoch": 0.39, + "grad_norm": 0.455078125, + "learning_rate": 0.0001532191057818531, + "loss": 0.9579, + "step": 27135 + }, + { + "epoch": 0.39, + "grad_norm": 0.5078125, + "learning_rate": 0.0001531979078578781, + "loss": 0.8333, + "step": 27140 + }, + { + "epoch": 0.39, + "grad_norm": 0.5078125, + "learning_rate": 0.00015317670659939768, + "loss": 0.941, + "step": 27145 + }, + { + "epoch": 0.39, + "grad_norm": 0.59375, + "learning_rate": 0.00015315550200774072, + "loss": 0.9879, + "step": 27150 + }, + { + "epoch": 0.39, + "grad_norm": 0.52734375, + "learning_rate": 0.00015313429408423632, + "loss": 0.8847, + "step": 27155 + }, + { + "epoch": 0.39, + "grad_norm": 0.5703125, + "learning_rate": 0.00015311308283021385, + "loss": 0.9423, + "step": 27160 + }, + { + "epoch": 0.39, + "grad_norm": 0.54296875, + "learning_rate": 0.0001530918682470029, + "loss": 0.9994, + "step": 27165 + }, + { + "epoch": 0.39, + "grad_norm": 0.578125, + "learning_rate": 0.00015307065033593316, + "loss": 0.9144, + "step": 27170 + }, + { + "epoch": 0.39, + "grad_norm": 0.458984375, + "learning_rate": 0.00015304942909833463, + "loss": 0.9032, + "step": 27175 + }, + { + "epoch": 0.39, + "grad_norm": 0.53515625, + "learning_rate": 0.00015302820453553745, + "loss": 0.9647, + "step": 27180 + }, + { + "epoch": 0.39, + "grad_norm": 0.59375, + "learning_rate": 0.000153006976648872, + "loss": 0.9738, + "step": 27185 + }, + { + "epoch": 0.39, + "grad_norm": 0.50390625, + "learning_rate": 0.0001529857454396689, + "loss": 0.9856, + "step": 27190 + }, + { + "epoch": 0.39, + "grad_norm": 0.671875, + "learning_rate": 0.00015296451090925897, + "loss": 0.8408, + "step": 27195 + }, + { + "epoch": 0.39, + "grad_norm": 0.57421875, + "learning_rate": 0.0001529432730589731, + "loss": 0.9164, + "step": 27200 + }, + { + "epoch": 0.39, + "grad_norm": 0.6015625, + "learning_rate": 0.00015292203189014262, + "loss": 1.1075, + "step": 27205 + }, + { + "epoch": 0.39, + "grad_norm": 0.59375, + "learning_rate": 0.0001529007874040989, + "loss": 0.8321, + "step": 27210 + }, + { + "epoch": 0.39, + "grad_norm": 0.5859375, + "learning_rate": 0.00015287953960217357, + "loss": 0.9266, + "step": 27215 + }, + { + "epoch": 0.39, + "grad_norm": 0.51171875, + "learning_rate": 0.0001528582884856985, + "loss": 1.0955, + "step": 27220 + }, + { + "epoch": 0.39, + "grad_norm": 0.53515625, + "learning_rate": 0.0001528370340560057, + "loss": 0.8284, + "step": 27225 + }, + { + "epoch": 0.39, + "grad_norm": 0.5703125, + "learning_rate": 0.0001528157763144274, + "loss": 0.9035, + "step": 27230 + }, + { + "epoch": 0.39, + "grad_norm": 0.51953125, + "learning_rate": 0.0001527945152622961, + "loss": 0.7278, + "step": 27235 + }, + { + "epoch": 0.39, + "grad_norm": 0.65234375, + "learning_rate": 0.00015277325090094443, + "loss": 0.9253, + "step": 27240 + }, + { + "epoch": 0.39, + "grad_norm": 0.49609375, + "learning_rate": 0.00015275198323170535, + "loss": 0.8693, + "step": 27245 + }, + { + "epoch": 0.39, + "grad_norm": 0.58203125, + "learning_rate": 0.00015273071225591187, + "loss": 0.9612, + "step": 27250 + }, + { + "epoch": 0.39, + "grad_norm": 0.54296875, + "learning_rate": 0.00015270943797489724, + "loss": 0.9629, + "step": 27255 + }, + { + "epoch": 0.39, + "grad_norm": 0.54296875, + "learning_rate": 0.00015268816038999504, + "loss": 0.9402, + "step": 27260 + }, + { + "epoch": 0.39, + "grad_norm": 0.53515625, + "learning_rate": 0.00015266687950253894, + "loss": 0.9483, + "step": 27265 + }, + { + "epoch": 0.39, + "grad_norm": 0.57421875, + "learning_rate": 0.00015264559531386285, + "loss": 0.9645, + "step": 27270 + }, + { + "epoch": 0.39, + "grad_norm": 0.53125, + "learning_rate": 0.00015262430782530092, + "loss": 1.0064, + "step": 27275 + }, + { + "epoch": 0.39, + "grad_norm": 0.58203125, + "learning_rate": 0.0001526030170381874, + "loss": 0.9229, + "step": 27280 + }, + { + "epoch": 0.39, + "grad_norm": 0.6015625, + "learning_rate": 0.00015258172295385688, + "loss": 1.114, + "step": 27285 + }, + { + "epoch": 0.39, + "grad_norm": 0.55859375, + "learning_rate": 0.00015256042557364405, + "loss": 1.021, + "step": 27290 + }, + { + "epoch": 0.39, + "grad_norm": 0.6015625, + "learning_rate": 0.0001525391248988839, + "loss": 1.0266, + "step": 27295 + }, + { + "epoch": 0.39, + "grad_norm": 0.51953125, + "learning_rate": 0.0001525178209309116, + "loss": 1.1505, + "step": 27300 + }, + { + "epoch": 0.39, + "grad_norm": 0.5859375, + "learning_rate": 0.0001524965136710624, + "loss": 0.9701, + "step": 27305 + }, + { + "epoch": 0.39, + "grad_norm": 0.64453125, + "learning_rate": 0.00015247520312067198, + "loss": 1.1152, + "step": 27310 + }, + { + "epoch": 0.39, + "grad_norm": 0.5, + "learning_rate": 0.00015245388928107606, + "loss": 0.8663, + "step": 27315 + }, + { + "epoch": 0.39, + "grad_norm": 0.55078125, + "learning_rate": 0.00015243257215361063, + "loss": 0.8906, + "step": 27320 + }, + { + "epoch": 0.39, + "grad_norm": 0.4921875, + "learning_rate": 0.00015241125173961185, + "loss": 0.9771, + "step": 27325 + }, + { + "epoch": 0.39, + "grad_norm": 0.52734375, + "learning_rate": 0.00015238992804041612, + "loss": 0.9428, + "step": 27330 + }, + { + "epoch": 0.39, + "grad_norm": 0.6015625, + "learning_rate": 0.00015236860105736003, + "loss": 0.9696, + "step": 27335 + }, + { + "epoch": 0.39, + "grad_norm": 0.578125, + "learning_rate": 0.00015234727079178038, + "loss": 0.9693, + "step": 27340 + }, + { + "epoch": 0.39, + "grad_norm": 0.61328125, + "learning_rate": 0.00015232593724501419, + "loss": 0.8225, + "step": 27345 + }, + { + "epoch": 0.39, + "grad_norm": 0.59375, + "learning_rate": 0.00015230460041839862, + "loss": 1.0572, + "step": 27350 + }, + { + "epoch": 0.39, + "grad_norm": 0.546875, + "learning_rate": 0.0001522832603132712, + "loss": 0.923, + "step": 27355 + }, + { + "epoch": 0.39, + "grad_norm": 0.53515625, + "learning_rate": 0.00015226191693096944, + "loss": 0.9713, + "step": 27360 + }, + { + "epoch": 0.39, + "grad_norm": 0.578125, + "learning_rate": 0.00015224057027283117, + "loss": 1.0176, + "step": 27365 + }, + { + "epoch": 0.39, + "grad_norm": 0.62109375, + "learning_rate": 0.0001522192203401945, + "loss": 0.9373, + "step": 27370 + }, + { + "epoch": 0.39, + "grad_norm": 0.5625, + "learning_rate": 0.00015219786713439762, + "loss": 1.1225, + "step": 27375 + }, + { + "epoch": 0.39, + "grad_norm": 0.5625, + "learning_rate": 0.00015217651065677898, + "loss": 0.8477, + "step": 27380 + }, + { + "epoch": 0.39, + "grad_norm": 0.53515625, + "learning_rate": 0.00015215515090867722, + "loss": 0.9262, + "step": 27385 + }, + { + "epoch": 0.39, + "grad_norm": 0.6015625, + "learning_rate": 0.0001521337878914312, + "loss": 0.8049, + "step": 27390 + }, + { + "epoch": 0.39, + "grad_norm": 0.5703125, + "learning_rate": 0.00015211242160637997, + "loss": 0.942, + "step": 27395 + }, + { + "epoch": 0.39, + "grad_norm": 0.59375, + "learning_rate": 0.00015209105205486284, + "loss": 0.9257, + "step": 27400 + }, + { + "epoch": 0.39, + "grad_norm": 0.54296875, + "learning_rate": 0.00015206967923821923, + "loss": 0.9311, + "step": 27405 + }, + { + "epoch": 0.39, + "grad_norm": 0.62109375, + "learning_rate": 0.0001520483031577888, + "loss": 0.9391, + "step": 27410 + }, + { + "epoch": 0.39, + "grad_norm": 0.6875, + "learning_rate": 0.00015202692381491146, + "loss": 0.9197, + "step": 27415 + }, + { + "epoch": 0.39, + "grad_norm": 0.6640625, + "learning_rate": 0.00015200554121092726, + "loss": 1.159, + "step": 27420 + }, + { + "epoch": 0.39, + "grad_norm": 0.58984375, + "learning_rate": 0.00015198415534717653, + "loss": 0.9105, + "step": 27425 + }, + { + "epoch": 0.39, + "grad_norm": 0.66015625, + "learning_rate": 0.00015196276622499977, + "loss": 0.933, + "step": 27430 + }, + { + "epoch": 0.39, + "grad_norm": 0.46484375, + "learning_rate": 0.0001519413738457376, + "loss": 0.8771, + "step": 27435 + }, + { + "epoch": 0.39, + "grad_norm": 0.55859375, + "learning_rate": 0.00015191997821073098, + "loss": 0.9563, + "step": 27440 + }, + { + "epoch": 0.39, + "grad_norm": 0.59375, + "learning_rate": 0.00015189857932132098, + "loss": 0.9751, + "step": 27445 + }, + { + "epoch": 0.39, + "grad_norm": 0.53125, + "learning_rate": 0.00015187717717884893, + "loss": 0.8955, + "step": 27450 + }, + { + "epoch": 0.39, + "grad_norm": 0.5234375, + "learning_rate": 0.00015185577178465631, + "loss": 0.8752, + "step": 27455 + }, + { + "epoch": 0.39, + "grad_norm": 0.51171875, + "learning_rate": 0.00015183436314008487, + "loss": 0.8825, + "step": 27460 + }, + { + "epoch": 0.39, + "grad_norm": 0.6328125, + "learning_rate": 0.00015181295124647653, + "loss": 0.9836, + "step": 27465 + }, + { + "epoch": 0.39, + "grad_norm": 0.51953125, + "learning_rate": 0.00015179153610517338, + "loss": 1.0022, + "step": 27470 + }, + { + "epoch": 0.39, + "grad_norm": 0.625, + "learning_rate": 0.00015177011771751777, + "loss": 0.8779, + "step": 27475 + }, + { + "epoch": 0.39, + "grad_norm": 0.640625, + "learning_rate": 0.0001517486960848522, + "loss": 0.9912, + "step": 27480 + }, + { + "epoch": 0.39, + "grad_norm": 0.53125, + "learning_rate": 0.00015172727120851947, + "loss": 0.9619, + "step": 27485 + }, + { + "epoch": 0.39, + "grad_norm": 0.6171875, + "learning_rate": 0.0001517058430898624, + "loss": 1.092, + "step": 27490 + }, + { + "epoch": 0.39, + "grad_norm": 0.609375, + "learning_rate": 0.00015168441173022426, + "loss": 0.928, + "step": 27495 + }, + { + "epoch": 0.39, + "grad_norm": 0.53125, + "learning_rate": 0.00015166297713094828, + "loss": 0.8879, + "step": 27500 + }, + { + "epoch": 0.39, + "grad_norm": 0.5703125, + "learning_rate": 0.0001516415392933781, + "loss": 1.0288, + "step": 27505 + }, + { + "epoch": 0.39, + "grad_norm": 0.5234375, + "learning_rate": 0.00015162009821885738, + "loss": 0.9421, + "step": 27510 + }, + { + "epoch": 0.39, + "grad_norm": 0.57421875, + "learning_rate": 0.00015159865390873014, + "loss": 0.9683, + "step": 27515 + }, + { + "epoch": 0.39, + "grad_norm": 0.5703125, + "learning_rate": 0.0001515772063643405, + "loss": 0.8493, + "step": 27520 + }, + { + "epoch": 0.39, + "grad_norm": 0.498046875, + "learning_rate": 0.00015155575558703282, + "loss": 1.0029, + "step": 27525 + }, + { + "epoch": 0.39, + "grad_norm": 0.5625, + "learning_rate": 0.00015153430157815168, + "loss": 0.9784, + "step": 27530 + }, + { + "epoch": 0.39, + "grad_norm": 0.5859375, + "learning_rate": 0.0001515128443390418, + "loss": 0.9032, + "step": 27535 + }, + { + "epoch": 0.4, + "grad_norm": 0.6328125, + "learning_rate": 0.00015149138387104817, + "loss": 0.9295, + "step": 27540 + }, + { + "epoch": 0.4, + "grad_norm": 0.578125, + "learning_rate": 0.00015146992017551597, + "loss": 0.9388, + "step": 27545 + }, + { + "epoch": 0.4, + "grad_norm": 0.56640625, + "learning_rate": 0.00015144845325379053, + "loss": 0.9346, + "step": 27550 + }, + { + "epoch": 0.4, + "grad_norm": 0.59375, + "learning_rate": 0.00015142698310721748, + "loss": 1.0505, + "step": 27555 + }, + { + "epoch": 0.4, + "grad_norm": 0.5625, + "learning_rate": 0.00015140550973714251, + "loss": 1.0346, + "step": 27560 + }, + { + "epoch": 0.4, + "grad_norm": 0.62109375, + "learning_rate": 0.0001513840331449117, + "loss": 0.9626, + "step": 27565 + }, + { + "epoch": 0.4, + "grad_norm": 0.4765625, + "learning_rate": 0.00015136255333187115, + "loss": 1.002, + "step": 27570 + }, + { + "epoch": 0.4, + "grad_norm": 0.46484375, + "learning_rate": 0.00015134107029936725, + "loss": 0.8865, + "step": 27575 + }, + { + "epoch": 0.4, + "grad_norm": 0.578125, + "learning_rate": 0.0001513195840487466, + "loss": 0.9616, + "step": 27580 + }, + { + "epoch": 0.4, + "grad_norm": 0.57421875, + "learning_rate": 0.00015129809458135597, + "loss": 0.9437, + "step": 27585 + }, + { + "epoch": 0.4, + "grad_norm": 0.54296875, + "learning_rate": 0.00015127660189854237, + "loss": 1.066, + "step": 27590 + }, + { + "epoch": 0.4, + "grad_norm": 0.5625, + "learning_rate": 0.00015125510600165295, + "loss": 0.8881, + "step": 27595 + }, + { + "epoch": 0.4, + "grad_norm": 0.478515625, + "learning_rate": 0.00015123360689203507, + "loss": 0.8513, + "step": 27600 + }, + { + "epoch": 0.4, + "grad_norm": 0.55078125, + "learning_rate": 0.00015121210457103642, + "loss": 0.9547, + "step": 27605 + }, + { + "epoch": 0.4, + "grad_norm": 0.5859375, + "learning_rate": 0.00015119059904000466, + "loss": 0.9742, + "step": 27610 + }, + { + "epoch": 0.4, + "grad_norm": 0.58984375, + "learning_rate": 0.00015116909030028793, + "loss": 0.8072, + "step": 27615 + }, + { + "epoch": 0.4, + "grad_norm": 0.56640625, + "learning_rate": 0.0001511475783532343, + "loss": 0.8026, + "step": 27620 + }, + { + "epoch": 0.4, + "grad_norm": 0.52734375, + "learning_rate": 0.0001511260632001922, + "loss": 0.9882, + "step": 27625 + }, + { + "epoch": 0.4, + "grad_norm": 0.54296875, + "learning_rate": 0.00015110454484251027, + "loss": 0.8986, + "step": 27630 + }, + { + "epoch": 0.4, + "grad_norm": 0.61328125, + "learning_rate": 0.0001510830232815372, + "loss": 0.9226, + "step": 27635 + }, + { + "epoch": 0.4, + "grad_norm": 0.53125, + "learning_rate": 0.00015106149851862213, + "loss": 0.954, + "step": 27640 + }, + { + "epoch": 0.4, + "grad_norm": 0.52734375, + "learning_rate": 0.00015103997055511414, + "loss": 0.9185, + "step": 27645 + }, + { + "epoch": 0.4, + "grad_norm": 0.5703125, + "learning_rate": 0.00015101843939236263, + "loss": 0.8623, + "step": 27650 + }, + { + "epoch": 0.4, + "grad_norm": 0.515625, + "learning_rate": 0.0001509969050317173, + "loss": 1.0706, + "step": 27655 + }, + { + "epoch": 0.4, + "grad_norm": 0.5625, + "learning_rate": 0.00015097536747452785, + "loss": 1.0242, + "step": 27660 + }, + { + "epoch": 0.4, + "grad_norm": 0.498046875, + "learning_rate": 0.00015095382672214428, + "loss": 0.8957, + "step": 27665 + }, + { + "epoch": 0.4, + "grad_norm": 0.60546875, + "learning_rate": 0.00015093228277591688, + "loss": 0.8766, + "step": 27670 + }, + { + "epoch": 0.4, + "grad_norm": 0.5078125, + "learning_rate": 0.00015091073563719596, + "loss": 0.874, + "step": 27675 + }, + { + "epoch": 0.4, + "grad_norm": 0.53125, + "learning_rate": 0.00015088918530733217, + "loss": 0.8828, + "step": 27680 + }, + { + "epoch": 0.4, + "grad_norm": 0.58984375, + "learning_rate": 0.00015086763178767627, + "loss": 0.8816, + "step": 27685 + }, + { + "epoch": 0.4, + "grad_norm": 0.5, + "learning_rate": 0.00015084607507957924, + "loss": 0.8533, + "step": 27690 + }, + { + "epoch": 0.4, + "grad_norm": 0.54296875, + "learning_rate": 0.00015082451518439238, + "loss": 0.9149, + "step": 27695 + }, + { + "epoch": 0.4, + "grad_norm": 0.58984375, + "learning_rate": 0.000150802952103467, + "loss": 1.0048, + "step": 27700 + }, + { + "epoch": 0.4, + "grad_norm": 0.58203125, + "learning_rate": 0.0001507813858381547, + "loss": 0.9428, + "step": 27705 + }, + { + "epoch": 0.4, + "grad_norm": 0.53125, + "learning_rate": 0.00015075981638980733, + "loss": 0.8444, + "step": 27710 + }, + { + "epoch": 0.4, + "grad_norm": 0.578125, + "learning_rate": 0.0001507382437597768, + "loss": 0.9639, + "step": 27715 + }, + { + "epoch": 0.4, + "grad_norm": 0.58984375, + "learning_rate": 0.00015071666794941544, + "loss": 0.9995, + "step": 27720 + }, + { + "epoch": 0.4, + "grad_norm": 0.515625, + "learning_rate": 0.00015069508896007553, + "loss": 0.9617, + "step": 27725 + }, + { + "epoch": 0.4, + "grad_norm": 0.55859375, + "learning_rate": 0.00015067350679310971, + "loss": 0.901, + "step": 27730 + }, + { + "epoch": 0.4, + "grad_norm": 0.6953125, + "learning_rate": 0.00015065192144987077, + "loss": 1.0665, + "step": 27735 + }, + { + "epoch": 0.4, + "grad_norm": 0.52734375, + "learning_rate": 0.00015063033293171173, + "loss": 1.0087, + "step": 27740 + }, + { + "epoch": 0.4, + "grad_norm": 0.58984375, + "learning_rate": 0.00015060874123998575, + "loss": 1.0189, + "step": 27745 + }, + { + "epoch": 0.4, + "grad_norm": 0.55078125, + "learning_rate": 0.00015058714637604623, + "loss": 0.858, + "step": 27750 + }, + { + "epoch": 0.4, + "grad_norm": 0.55859375, + "learning_rate": 0.00015056554834124675, + "loss": 0.9466, + "step": 27755 + }, + { + "epoch": 0.4, + "grad_norm": 0.57421875, + "learning_rate": 0.0001505439471369411, + "loss": 0.9885, + "step": 27760 + }, + { + "epoch": 0.4, + "grad_norm": 0.55078125, + "learning_rate": 0.0001505223427644833, + "loss": 0.9319, + "step": 27765 + }, + { + "epoch": 0.4, + "grad_norm": 0.76171875, + "learning_rate": 0.00015050073522522751, + "loss": 1.2127, + "step": 27770 + }, + { + "epoch": 0.4, + "grad_norm": 0.5546875, + "learning_rate": 0.00015047912452052813, + "loss": 1.0039, + "step": 27775 + }, + { + "epoch": 0.4, + "grad_norm": 0.51953125, + "learning_rate": 0.00015045751065173972, + "loss": 0.9694, + "step": 27780 + }, + { + "epoch": 0.4, + "grad_norm": 0.5859375, + "learning_rate": 0.00015043589362021708, + "loss": 1.0176, + "step": 27785 + }, + { + "epoch": 0.4, + "grad_norm": 0.57421875, + "learning_rate": 0.0001504142734273152, + "loss": 1.0114, + "step": 27790 + }, + { + "epoch": 0.4, + "grad_norm": 0.55078125, + "learning_rate": 0.0001503926500743892, + "loss": 0.8299, + "step": 27795 + }, + { + "epoch": 0.4, + "grad_norm": 0.5546875, + "learning_rate": 0.00015037102356279457, + "loss": 0.961, + "step": 27800 + }, + { + "epoch": 0.4, + "grad_norm": 0.67578125, + "learning_rate": 0.00015034939389388678, + "loss": 1.0191, + "step": 27805 + }, + { + "epoch": 0.4, + "grad_norm": 0.67578125, + "learning_rate": 0.0001503277610690216, + "loss": 0.8792, + "step": 27810 + }, + { + "epoch": 0.4, + "grad_norm": 0.56640625, + "learning_rate": 0.0001503061250895551, + "loss": 0.9465, + "step": 27815 + }, + { + "epoch": 0.4, + "grad_norm": 0.55859375, + "learning_rate": 0.00015028448595684336, + "loss": 1.059, + "step": 27820 + }, + { + "epoch": 0.4, + "grad_norm": 0.6171875, + "learning_rate": 0.00015026284367224276, + "loss": 0.9291, + "step": 27825 + }, + { + "epoch": 0.4, + "grad_norm": 0.58203125, + "learning_rate": 0.00015024119823710987, + "loss": 0.9444, + "step": 27830 + }, + { + "epoch": 0.4, + "grad_norm": 0.447265625, + "learning_rate": 0.00015021954965280148, + "loss": 0.8182, + "step": 27835 + }, + { + "epoch": 0.4, + "grad_norm": 0.5546875, + "learning_rate": 0.0001501978979206745, + "loss": 0.9046, + "step": 27840 + }, + { + "epoch": 0.4, + "grad_norm": 0.54296875, + "learning_rate": 0.0001501762430420861, + "loss": 1.1551, + "step": 27845 + }, + { + "epoch": 0.4, + "grad_norm": 0.53515625, + "learning_rate": 0.00015015458501839367, + "loss": 0.9656, + "step": 27850 + }, + { + "epoch": 0.4, + "grad_norm": 0.5625, + "learning_rate": 0.00015013292385095475, + "loss": 0.9725, + "step": 27855 + }, + { + "epoch": 0.4, + "grad_norm": 0.484375, + "learning_rate": 0.000150111259541127, + "loss": 0.9957, + "step": 27860 + }, + { + "epoch": 0.4, + "grad_norm": 0.55078125, + "learning_rate": 0.00015008959209026848, + "loss": 1.0478, + "step": 27865 + }, + { + "epoch": 0.4, + "grad_norm": 0.5546875, + "learning_rate": 0.0001500679214997373, + "loss": 0.9713, + "step": 27870 + }, + { + "epoch": 0.4, + "grad_norm": 0.56640625, + "learning_rate": 0.0001500462477708917, + "loss": 0.948, + "step": 27875 + }, + { + "epoch": 0.4, + "grad_norm": 0.5234375, + "learning_rate": 0.00015002457090509033, + "loss": 1.055, + "step": 27880 + }, + { + "epoch": 0.4, + "grad_norm": 0.5078125, + "learning_rate": 0.0001500028909036919, + "loss": 0.9306, + "step": 27885 + }, + { + "epoch": 0.4, + "grad_norm": 0.5625, + "learning_rate": 0.0001499812077680553, + "loss": 0.8554, + "step": 27890 + }, + { + "epoch": 0.4, + "grad_norm": 0.5703125, + "learning_rate": 0.0001499595214995397, + "loss": 0.9094, + "step": 27895 + }, + { + "epoch": 0.4, + "grad_norm": 0.50390625, + "learning_rate": 0.00014993783209950437, + "loss": 0.9488, + "step": 27900 + }, + { + "epoch": 0.4, + "grad_norm": 0.5234375, + "learning_rate": 0.00014991613956930885, + "loss": 0.8167, + "step": 27905 + }, + { + "epoch": 0.4, + "grad_norm": 0.466796875, + "learning_rate": 0.00014989444391031283, + "loss": 0.8708, + "step": 27910 + }, + { + "epoch": 0.4, + "grad_norm": 0.55859375, + "learning_rate": 0.00014987274512387631, + "loss": 0.9122, + "step": 27915 + }, + { + "epoch": 0.4, + "grad_norm": 0.60546875, + "learning_rate": 0.00014985104321135927, + "loss": 1.0521, + "step": 27920 + }, + { + "epoch": 0.4, + "grad_norm": 0.6015625, + "learning_rate": 0.00014982933817412207, + "loss": 1.0315, + "step": 27925 + }, + { + "epoch": 0.4, + "grad_norm": 0.62890625, + "learning_rate": 0.00014980763001352522, + "loss": 1.0003, + "step": 27930 + }, + { + "epoch": 0.4, + "grad_norm": 0.5703125, + "learning_rate": 0.00014978591873092938, + "loss": 0.9905, + "step": 27935 + }, + { + "epoch": 0.4, + "grad_norm": 0.56640625, + "learning_rate": 0.00014976420432769545, + "loss": 0.9145, + "step": 27940 + }, + { + "epoch": 0.4, + "grad_norm": 0.54296875, + "learning_rate": 0.00014974248680518453, + "loss": 0.8594, + "step": 27945 + }, + { + "epoch": 0.4, + "grad_norm": 0.625, + "learning_rate": 0.00014972076616475785, + "loss": 0.9862, + "step": 27950 + }, + { + "epoch": 0.4, + "grad_norm": 0.5703125, + "learning_rate": 0.00014969904240777696, + "loss": 1.0253, + "step": 27955 + }, + { + "epoch": 0.4, + "grad_norm": 0.6328125, + "learning_rate": 0.0001496773155356035, + "loss": 0.9986, + "step": 27960 + }, + { + "epoch": 0.4, + "grad_norm": 0.5859375, + "learning_rate": 0.0001496555855495993, + "loss": 1.0846, + "step": 27965 + }, + { + "epoch": 0.4, + "grad_norm": 0.6875, + "learning_rate": 0.00014963385245112643, + "loss": 0.9269, + "step": 27970 + }, + { + "epoch": 0.4, + "grad_norm": 0.53515625, + "learning_rate": 0.00014961211624154715, + "loss": 1.0928, + "step": 27975 + }, + { + "epoch": 0.4, + "grad_norm": 0.5625, + "learning_rate": 0.00014959037692222396, + "loss": 0.9256, + "step": 27980 + }, + { + "epoch": 0.4, + "grad_norm": 0.5546875, + "learning_rate": 0.00014956863449451947, + "loss": 0.8836, + "step": 27985 + }, + { + "epoch": 0.4, + "grad_norm": 0.53515625, + "learning_rate": 0.0001495468889597965, + "loss": 0.9389, + "step": 27990 + }, + { + "epoch": 0.4, + "grad_norm": 0.50390625, + "learning_rate": 0.0001495251403194181, + "loss": 0.932, + "step": 27995 + }, + { + "epoch": 0.4, + "grad_norm": 0.55859375, + "learning_rate": 0.00014950338857474751, + "loss": 1.0042, + "step": 28000 + }, + { + "epoch": 0.4, + "grad_norm": 0.5390625, + "learning_rate": 0.00014948163372714812, + "loss": 0.9303, + "step": 28005 + }, + { + "epoch": 0.4, + "grad_norm": 0.59765625, + "learning_rate": 0.0001494598757779836, + "loss": 1.0224, + "step": 28010 + }, + { + "epoch": 0.4, + "grad_norm": 0.53515625, + "learning_rate": 0.0001494381147286177, + "loss": 0.9624, + "step": 28015 + }, + { + "epoch": 0.4, + "grad_norm": 0.5625, + "learning_rate": 0.0001494163505804145, + "loss": 1.1779, + "step": 28020 + }, + { + "epoch": 0.4, + "grad_norm": 0.54296875, + "learning_rate": 0.00014939458333473814, + "loss": 0.8101, + "step": 28025 + }, + { + "epoch": 0.4, + "grad_norm": 0.5234375, + "learning_rate": 0.00014937281299295306, + "loss": 0.8563, + "step": 28030 + }, + { + "epoch": 0.4, + "grad_norm": 0.515625, + "learning_rate": 0.00014935103955642385, + "loss": 0.9414, + "step": 28035 + }, + { + "epoch": 0.4, + "grad_norm": 0.5625, + "learning_rate": 0.00014932926302651525, + "loss": 1.0134, + "step": 28040 + }, + { + "epoch": 0.4, + "grad_norm": 0.53125, + "learning_rate": 0.00014930748340459223, + "loss": 1.0407, + "step": 28045 + }, + { + "epoch": 0.4, + "grad_norm": 0.53515625, + "learning_rate": 0.00014928570069202004, + "loss": 0.9893, + "step": 28050 + }, + { + "epoch": 0.4, + "grad_norm": 0.59765625, + "learning_rate": 0.000149263914890164, + "loss": 0.8799, + "step": 28055 + }, + { + "epoch": 0.4, + "grad_norm": 0.69140625, + "learning_rate": 0.00014924212600038962, + "loss": 1.0822, + "step": 28060 + }, + { + "epoch": 0.4, + "grad_norm": 0.53515625, + "learning_rate": 0.00014922033402406273, + "loss": 0.9435, + "step": 28065 + }, + { + "epoch": 0.4, + "grad_norm": 0.5390625, + "learning_rate": 0.00014919853896254925, + "loss": 0.9677, + "step": 28070 + }, + { + "epoch": 0.4, + "grad_norm": 0.58203125, + "learning_rate": 0.00014917674081721532, + "loss": 0.9603, + "step": 28075 + }, + { + "epoch": 0.4, + "grad_norm": 0.66015625, + "learning_rate": 0.00014915493958942726, + "loss": 0.8813, + "step": 28080 + }, + { + "epoch": 0.4, + "grad_norm": 0.6171875, + "learning_rate": 0.0001491331352805516, + "loss": 0.9499, + "step": 28085 + }, + { + "epoch": 0.4, + "grad_norm": 0.5390625, + "learning_rate": 0.00014911132789195507, + "loss": 1.0815, + "step": 28090 + }, + { + "epoch": 0.4, + "grad_norm": 0.6015625, + "learning_rate": 0.0001490895174250046, + "loss": 0.9011, + "step": 28095 + }, + { + "epoch": 0.4, + "grad_norm": 0.53515625, + "learning_rate": 0.00014906770388106722, + "loss": 0.9064, + "step": 28100 + }, + { + "epoch": 0.4, + "grad_norm": 0.58203125, + "learning_rate": 0.0001490458872615103, + "loss": 0.936, + "step": 28105 + }, + { + "epoch": 0.4, + "grad_norm": 0.58984375, + "learning_rate": 0.00014902406756770131, + "loss": 1.0428, + "step": 28110 + }, + { + "epoch": 0.4, + "grad_norm": 0.640625, + "learning_rate": 0.00014900224480100794, + "loss": 1.0521, + "step": 28115 + }, + { + "epoch": 0.4, + "grad_norm": 0.466796875, + "learning_rate": 0.00014898041896279805, + "loss": 0.8189, + "step": 28120 + }, + { + "epoch": 0.4, + "grad_norm": 0.5859375, + "learning_rate": 0.0001489585900544397, + "loss": 0.8784, + "step": 28125 + }, + { + "epoch": 0.4, + "grad_norm": 0.59765625, + "learning_rate": 0.00014893675807730117, + "loss": 0.9528, + "step": 28130 + }, + { + "epoch": 0.4, + "grad_norm": 0.4765625, + "learning_rate": 0.0001489149230327509, + "loss": 1.0401, + "step": 28135 + }, + { + "epoch": 0.4, + "grad_norm": 0.6015625, + "learning_rate": 0.00014889308492215756, + "loss": 0.9957, + "step": 28140 + }, + { + "epoch": 0.4, + "grad_norm": 0.62109375, + "learning_rate": 0.00014887124374688999, + "loss": 1.0581, + "step": 28145 + }, + { + "epoch": 0.4, + "grad_norm": 0.63671875, + "learning_rate": 0.00014884939950831716, + "loss": 0.9572, + "step": 28150 + }, + { + "epoch": 0.4, + "grad_norm": 0.62109375, + "learning_rate": 0.00014882755220780837, + "loss": 0.8592, + "step": 28155 + }, + { + "epoch": 0.4, + "grad_norm": 0.515625, + "learning_rate": 0.000148805701846733, + "loss": 0.8136, + "step": 28160 + }, + { + "epoch": 0.4, + "grad_norm": 0.6171875, + "learning_rate": 0.0001487838484264606, + "loss": 0.9009, + "step": 28165 + }, + { + "epoch": 0.4, + "grad_norm": 0.5625, + "learning_rate": 0.0001487619919483611, + "loss": 0.9096, + "step": 28170 + }, + { + "epoch": 0.4, + "grad_norm": 0.68359375, + "learning_rate": 0.00014874013241380436, + "loss": 0.9505, + "step": 28175 + }, + { + "epoch": 0.4, + "grad_norm": 0.625, + "learning_rate": 0.00014871826982416062, + "loss": 0.9748, + "step": 28180 + }, + { + "epoch": 0.4, + "grad_norm": 0.69140625, + "learning_rate": 0.00014869640418080024, + "loss": 1.0135, + "step": 28185 + }, + { + "epoch": 0.4, + "grad_norm": 0.515625, + "learning_rate": 0.00014867453548509377, + "loss": 0.9244, + "step": 28190 + }, + { + "epoch": 0.4, + "grad_norm": 0.578125, + "learning_rate": 0.00014865266373841204, + "loss": 1.0503, + "step": 28195 + }, + { + "epoch": 0.4, + "grad_norm": 0.66015625, + "learning_rate": 0.00014863078894212587, + "loss": 0.9883, + "step": 28200 + }, + { + "epoch": 0.4, + "grad_norm": 0.5703125, + "learning_rate": 0.00014860891109760646, + "loss": 1.0211, + "step": 28205 + }, + { + "epoch": 0.4, + "grad_norm": 0.458984375, + "learning_rate": 0.0001485870302062252, + "loss": 0.9979, + "step": 28210 + }, + { + "epoch": 0.4, + "grad_norm": 0.55078125, + "learning_rate": 0.0001485651462693535, + "loss": 0.9707, + "step": 28215 + }, + { + "epoch": 0.4, + "grad_norm": 0.51953125, + "learning_rate": 0.00014854325928836314, + "loss": 0.8423, + "step": 28220 + }, + { + "epoch": 0.4, + "grad_norm": 0.5234375, + "learning_rate": 0.00014852136926462602, + "loss": 0.878, + "step": 28225 + }, + { + "epoch": 0.4, + "grad_norm": 0.5234375, + "learning_rate": 0.0001484994761995142, + "loss": 0.9854, + "step": 28230 + }, + { + "epoch": 0.41, + "grad_norm": 0.6171875, + "learning_rate": 0.00014847758009439995, + "loss": 1.0014, + "step": 28235 + }, + { + "epoch": 0.41, + "grad_norm": 0.57421875, + "learning_rate": 0.00014845568095065578, + "loss": 0.9161, + "step": 28240 + }, + { + "epoch": 0.41, + "grad_norm": 0.5546875, + "learning_rate": 0.00014843377876965437, + "loss": 0.9832, + "step": 28245 + }, + { + "epoch": 0.41, + "grad_norm": 0.54296875, + "learning_rate": 0.00014841187355276852, + "loss": 1.0405, + "step": 28250 + }, + { + "epoch": 0.41, + "grad_norm": 0.54296875, + "learning_rate": 0.0001483899653013713, + "loss": 0.9107, + "step": 28255 + }, + { + "epoch": 0.41, + "grad_norm": 0.54296875, + "learning_rate": 0.00014836805401683597, + "loss": 0.843, + "step": 28260 + }, + { + "epoch": 0.41, + "grad_norm": 0.498046875, + "learning_rate": 0.00014834613970053588, + "loss": 0.9375, + "step": 28265 + }, + { + "epoch": 0.41, + "grad_norm": 0.62890625, + "learning_rate": 0.0001483242223538447, + "loss": 1.0245, + "step": 28270 + }, + { + "epoch": 0.41, + "grad_norm": 0.58984375, + "learning_rate": 0.00014830230197813627, + "loss": 0.9011, + "step": 28275 + }, + { + "epoch": 0.41, + "grad_norm": 0.58203125, + "learning_rate": 0.00014828037857478451, + "loss": 0.8082, + "step": 28280 + }, + { + "epoch": 0.41, + "grad_norm": 0.51171875, + "learning_rate": 0.00014825845214516364, + "loss": 0.9006, + "step": 28285 + }, + { + "epoch": 0.41, + "grad_norm": 0.58984375, + "learning_rate": 0.00014823652269064803, + "loss": 0.9635, + "step": 28290 + }, + { + "epoch": 0.41, + "grad_norm": 0.59375, + "learning_rate": 0.00014821459021261224, + "loss": 0.9608, + "step": 28295 + }, + { + "epoch": 0.41, + "grad_norm": 1.0625, + "learning_rate": 0.00014819265471243103, + "loss": 1.0338, + "step": 28300 + }, + { + "epoch": 0.41, + "grad_norm": 0.57421875, + "learning_rate": 0.00014817071619147932, + "loss": 1.0203, + "step": 28305 + }, + { + "epoch": 0.41, + "grad_norm": 0.55078125, + "learning_rate": 0.00014814877465113227, + "loss": 1.1001, + "step": 28310 + }, + { + "epoch": 0.41, + "grad_norm": 0.5390625, + "learning_rate": 0.00014812683009276517, + "loss": 0.944, + "step": 28315 + }, + { + "epoch": 0.41, + "grad_norm": 0.59765625, + "learning_rate": 0.00014810488251775357, + "loss": 1.0509, + "step": 28320 + }, + { + "epoch": 0.41, + "grad_norm": 0.51171875, + "learning_rate": 0.00014808293192747313, + "loss": 0.9797, + "step": 28325 + }, + { + "epoch": 0.41, + "grad_norm": 0.66796875, + "learning_rate": 0.00014806097832329975, + "loss": 0.88, + "step": 28330 + }, + { + "epoch": 0.41, + "grad_norm": 0.62890625, + "learning_rate": 0.00014803902170660953, + "loss": 1.0054, + "step": 28335 + }, + { + "epoch": 0.41, + "grad_norm": 0.5859375, + "learning_rate": 0.0001480170620787787, + "loss": 1.0066, + "step": 28340 + }, + { + "epoch": 0.41, + "grad_norm": 0.54296875, + "learning_rate": 0.00014799509944118374, + "loss": 1.0437, + "step": 28345 + }, + { + "epoch": 0.41, + "grad_norm": 0.58984375, + "learning_rate": 0.00014797313379520132, + "loss": 0.9357, + "step": 28350 + }, + { + "epoch": 0.41, + "grad_norm": 0.55078125, + "learning_rate": 0.00014795116514220818, + "loss": 0.9221, + "step": 28355 + }, + { + "epoch": 0.41, + "grad_norm": 0.56640625, + "learning_rate": 0.00014792919348358144, + "loss": 0.913, + "step": 28360 + }, + { + "epoch": 0.41, + "grad_norm": 0.5078125, + "learning_rate": 0.00014790721882069823, + "loss": 0.8986, + "step": 28365 + }, + { + "epoch": 0.41, + "grad_norm": 0.66796875, + "learning_rate": 0.000147885241154936, + "loss": 0.9824, + "step": 28370 + }, + { + "epoch": 0.41, + "grad_norm": 0.62890625, + "learning_rate": 0.00014786326048767232, + "loss": 0.9492, + "step": 28375 + }, + { + "epoch": 0.41, + "grad_norm": 0.5546875, + "learning_rate": 0.000147841276820285, + "loss": 0.865, + "step": 28380 + }, + { + "epoch": 0.41, + "grad_norm": 0.546875, + "learning_rate": 0.0001478192901541519, + "loss": 1.1203, + "step": 28385 + }, + { + "epoch": 0.41, + "grad_norm": 0.55859375, + "learning_rate": 0.00014779730049065124, + "loss": 0.8508, + "step": 28390 + }, + { + "epoch": 0.41, + "grad_norm": 0.55078125, + "learning_rate": 0.00014777530783116136, + "loss": 0.9426, + "step": 28395 + }, + { + "epoch": 0.41, + "grad_norm": 0.5, + "learning_rate": 0.00014775331217706077, + "loss": 1.1074, + "step": 28400 + }, + { + "epoch": 0.41, + "grad_norm": 0.60546875, + "learning_rate": 0.0001477313135297282, + "loss": 0.9565, + "step": 28405 + }, + { + "epoch": 0.41, + "grad_norm": 0.53125, + "learning_rate": 0.00014770931189054252, + "loss": 0.9497, + "step": 28410 + }, + { + "epoch": 0.41, + "grad_norm": 0.5625, + "learning_rate": 0.00014768730726088286, + "loss": 0.8835, + "step": 28415 + }, + { + "epoch": 0.41, + "grad_norm": 0.59375, + "learning_rate": 0.00014766529964212844, + "loss": 1.0526, + "step": 28420 + }, + { + "epoch": 0.41, + "grad_norm": 0.484375, + "learning_rate": 0.00014764328903565875, + "loss": 0.9905, + "step": 28425 + }, + { + "epoch": 0.41, + "grad_norm": 0.50390625, + "learning_rate": 0.0001476212754428535, + "loss": 0.9627, + "step": 28430 + }, + { + "epoch": 0.41, + "grad_norm": 0.6171875, + "learning_rate": 0.00014759925886509241, + "loss": 0.8762, + "step": 28435 + }, + { + "epoch": 0.41, + "grad_norm": 0.63671875, + "learning_rate": 0.00014757723930375555, + "loss": 0.9423, + "step": 28440 + }, + { + "epoch": 0.41, + "grad_norm": 0.5546875, + "learning_rate": 0.0001475552167602232, + "loss": 1.1222, + "step": 28445 + }, + { + "epoch": 0.41, + "grad_norm": 0.64453125, + "learning_rate": 0.00014753319123587567, + "loss": 1.0916, + "step": 28450 + }, + { + "epoch": 0.41, + "grad_norm": 0.50390625, + "learning_rate": 0.00014751116273209358, + "loss": 0.9933, + "step": 28455 + }, + { + "epoch": 0.41, + "grad_norm": 0.55859375, + "learning_rate": 0.00014748913125025773, + "loss": 0.9168, + "step": 28460 + }, + { + "epoch": 0.41, + "grad_norm": 0.56640625, + "learning_rate": 0.000147467096791749, + "loss": 0.9554, + "step": 28465 + }, + { + "epoch": 0.41, + "grad_norm": 0.5078125, + "learning_rate": 0.00014744505935794858, + "loss": 0.8522, + "step": 28470 + }, + { + "epoch": 0.41, + "grad_norm": 0.5625, + "learning_rate": 0.00014742301895023785, + "loss": 1.1012, + "step": 28475 + }, + { + "epoch": 0.41, + "grad_norm": 0.51171875, + "learning_rate": 0.00014740097556999824, + "loss": 0.9742, + "step": 28480 + }, + { + "epoch": 0.41, + "grad_norm": 0.5625, + "learning_rate": 0.00014737892921861155, + "loss": 1.1805, + "step": 28485 + }, + { + "epoch": 0.41, + "grad_norm": 0.56640625, + "learning_rate": 0.00014735687989745957, + "loss": 0.9158, + "step": 28490 + }, + { + "epoch": 0.41, + "grad_norm": 0.59765625, + "learning_rate": 0.0001473348276079244, + "loss": 1.0106, + "step": 28495 + }, + { + "epoch": 0.41, + "grad_norm": 0.56640625, + "learning_rate": 0.00014731277235138839, + "loss": 1.0168, + "step": 28500 + }, + { + "epoch": 0.41, + "grad_norm": 0.60546875, + "learning_rate": 0.0001472907141292339, + "loss": 0.8944, + "step": 28505 + }, + { + "epoch": 0.41, + "grad_norm": 0.5390625, + "learning_rate": 0.00014726865294284358, + "loss": 0.8638, + "step": 28510 + }, + { + "epoch": 0.41, + "grad_norm": 0.52734375, + "learning_rate": 0.00014724658879360027, + "loss": 0.7973, + "step": 28515 + }, + { + "epoch": 0.41, + "grad_norm": 0.486328125, + "learning_rate": 0.00014722452168288694, + "loss": 0.7767, + "step": 28520 + }, + { + "epoch": 0.41, + "grad_norm": 0.5859375, + "learning_rate": 0.00014720245161208682, + "loss": 0.9628, + "step": 28525 + }, + { + "epoch": 0.41, + "grad_norm": 0.53515625, + "learning_rate": 0.0001471803785825833, + "loss": 0.9037, + "step": 28530 + }, + { + "epoch": 0.41, + "grad_norm": 0.5703125, + "learning_rate": 0.00014715830259575988, + "loss": 1.0658, + "step": 28535 + }, + { + "epoch": 0.41, + "grad_norm": 0.8046875, + "learning_rate": 0.00014713622365300038, + "loss": 0.971, + "step": 28540 + }, + { + "epoch": 0.41, + "grad_norm": 0.51171875, + "learning_rate": 0.00014711414175568865, + "loss": 1.1279, + "step": 28545 + }, + { + "epoch": 0.41, + "grad_norm": 0.58984375, + "learning_rate": 0.00014709205690520888, + "loss": 0.9409, + "step": 28550 + }, + { + "epoch": 0.41, + "grad_norm": 0.51171875, + "learning_rate": 0.00014706996910294535, + "loss": 1.0696, + "step": 28555 + }, + { + "epoch": 0.41, + "grad_norm": 0.62890625, + "learning_rate": 0.00014704787835028257, + "loss": 0.8597, + "step": 28560 + }, + { + "epoch": 0.41, + "grad_norm": 0.5859375, + "learning_rate": 0.00014702578464860516, + "loss": 0.9773, + "step": 28565 + }, + { + "epoch": 0.41, + "grad_norm": 0.6171875, + "learning_rate": 0.00014700368799929804, + "loss": 0.9193, + "step": 28570 + }, + { + "epoch": 0.41, + "grad_norm": 0.54296875, + "learning_rate": 0.00014698158840374619, + "loss": 0.9545, + "step": 28575 + }, + { + "epoch": 0.41, + "grad_norm": 0.5390625, + "learning_rate": 0.0001469594858633349, + "loss": 0.9189, + "step": 28580 + }, + { + "epoch": 0.41, + "grad_norm": 0.63671875, + "learning_rate": 0.00014693738037944954, + "loss": 0.9593, + "step": 28585 + }, + { + "epoch": 0.41, + "grad_norm": 0.6015625, + "learning_rate": 0.00014691527195347573, + "loss": 0.9682, + "step": 28590 + }, + { + "epoch": 0.41, + "grad_norm": 0.5, + "learning_rate": 0.00014689316058679922, + "loss": 0.8859, + "step": 28595 + }, + { + "epoch": 0.41, + "grad_norm": 0.55078125, + "learning_rate": 0.000146871046280806, + "loss": 1.1101, + "step": 28600 + }, + { + "epoch": 0.41, + "grad_norm": 0.58203125, + "learning_rate": 0.00014684892903688224, + "loss": 0.9146, + "step": 28605 + }, + { + "epoch": 0.41, + "grad_norm": 0.490234375, + "learning_rate": 0.00014682680885641424, + "loss": 0.989, + "step": 28610 + }, + { + "epoch": 0.41, + "grad_norm": 0.5703125, + "learning_rate": 0.00014680468574078853, + "loss": 0.9693, + "step": 28615 + }, + { + "epoch": 0.41, + "grad_norm": 0.55078125, + "learning_rate": 0.00014678255969139184, + "loss": 0.9098, + "step": 28620 + }, + { + "epoch": 0.41, + "grad_norm": 0.53125, + "learning_rate": 0.00014676043070961097, + "loss": 0.9663, + "step": 28625 + }, + { + "epoch": 0.41, + "grad_norm": 0.494140625, + "learning_rate": 0.0001467382987968331, + "loss": 0.8683, + "step": 28630 + }, + { + "epoch": 0.41, + "grad_norm": 0.5546875, + "learning_rate": 0.0001467161639544454, + "loss": 0.9827, + "step": 28635 + }, + { + "epoch": 0.41, + "grad_norm": 0.56640625, + "learning_rate": 0.00014669402618383535, + "loss": 1.0172, + "step": 28640 + }, + { + "epoch": 0.41, + "grad_norm": 0.5703125, + "learning_rate": 0.00014667188548639056, + "loss": 1.003, + "step": 28645 + }, + { + "epoch": 0.41, + "grad_norm": 0.65234375, + "learning_rate": 0.00014664974186349883, + "loss": 1.0974, + "step": 28650 + }, + { + "epoch": 0.41, + "grad_norm": 0.51171875, + "learning_rate": 0.00014662759531654812, + "loss": 0.8167, + "step": 28655 + }, + { + "epoch": 0.41, + "grad_norm": 0.51953125, + "learning_rate": 0.0001466054458469267, + "loss": 0.959, + "step": 28660 + }, + { + "epoch": 0.41, + "grad_norm": 0.6484375, + "learning_rate": 0.00014658329345602282, + "loss": 1.1323, + "step": 28665 + }, + { + "epoch": 0.41, + "grad_norm": 0.5546875, + "learning_rate": 0.00014656113814522502, + "loss": 0.9222, + "step": 28670 + }, + { + "epoch": 0.41, + "grad_norm": 0.48828125, + "learning_rate": 0.0001465389799159221, + "loss": 0.926, + "step": 28675 + }, + { + "epoch": 0.41, + "grad_norm": 0.52734375, + "learning_rate": 0.00014651681876950287, + "loss": 1.0413, + "step": 28680 + }, + { + "epoch": 0.41, + "grad_norm": 0.6328125, + "learning_rate": 0.0001464946547073565, + "loss": 0.9231, + "step": 28685 + }, + { + "epoch": 0.41, + "grad_norm": 0.5625, + "learning_rate": 0.00014647248773087219, + "loss": 0.9352, + "step": 28690 + }, + { + "epoch": 0.41, + "grad_norm": 0.546875, + "learning_rate": 0.00014645031784143946, + "loss": 0.8001, + "step": 28695 + }, + { + "epoch": 0.41, + "grad_norm": 0.54296875, + "learning_rate": 0.00014642814504044787, + "loss": 0.9051, + "step": 28700 + }, + { + "epoch": 0.41, + "grad_norm": 0.53515625, + "learning_rate": 0.0001464059693292873, + "loss": 0.9376, + "step": 28705 + }, + { + "epoch": 0.41, + "grad_norm": 0.60546875, + "learning_rate": 0.00014638379070934767, + "loss": 1.1241, + "step": 28710 + }, + { + "epoch": 0.41, + "grad_norm": 0.5390625, + "learning_rate": 0.00014636160918201927, + "loss": 0.9326, + "step": 28715 + }, + { + "epoch": 0.41, + "grad_norm": 0.59375, + "learning_rate": 0.0001463394247486924, + "loss": 0.9006, + "step": 28720 + }, + { + "epoch": 0.41, + "grad_norm": 0.484375, + "learning_rate": 0.00014631723741075759, + "loss": 1.0671, + "step": 28725 + }, + { + "epoch": 0.41, + "grad_norm": 0.53515625, + "learning_rate": 0.00014629504716960558, + "loss": 1.1036, + "step": 28730 + }, + { + "epoch": 0.41, + "grad_norm": 0.6875, + "learning_rate": 0.0001462728540266273, + "loss": 1.0322, + "step": 28735 + }, + { + "epoch": 0.41, + "grad_norm": 0.62109375, + "learning_rate": 0.00014625065798321382, + "loss": 0.9795, + "step": 28740 + }, + { + "epoch": 0.41, + "grad_norm": 0.57421875, + "learning_rate": 0.00014622845904075643, + "loss": 0.9386, + "step": 28745 + }, + { + "epoch": 0.41, + "grad_norm": 0.6171875, + "learning_rate": 0.00014620625720064657, + "loss": 1.0119, + "step": 28750 + }, + { + "epoch": 0.41, + "grad_norm": 0.640625, + "learning_rate": 0.00014618405246427592, + "loss": 1.0182, + "step": 28755 + }, + { + "epoch": 0.41, + "grad_norm": 0.6328125, + "learning_rate": 0.00014616184483303622, + "loss": 0.9796, + "step": 28760 + }, + { + "epoch": 0.41, + "grad_norm": 0.5078125, + "learning_rate": 0.00014613963430831948, + "loss": 0.9254, + "step": 28765 + }, + { + "epoch": 0.41, + "grad_norm": 0.5234375, + "learning_rate": 0.000146117420891518, + "loss": 0.8252, + "step": 28770 + }, + { + "epoch": 0.41, + "grad_norm": 0.62890625, + "learning_rate": 0.00014609520458402404, + "loss": 0.8693, + "step": 28775 + }, + { + "epoch": 0.41, + "grad_norm": 0.478515625, + "learning_rate": 0.0001460729853872301, + "loss": 0.9317, + "step": 28780 + }, + { + "epoch": 0.41, + "grad_norm": 0.53125, + "learning_rate": 0.000146050763302529, + "loss": 0.8887, + "step": 28785 + }, + { + "epoch": 0.41, + "grad_norm": 0.5859375, + "learning_rate": 0.00014602853833131361, + "loss": 0.9777, + "step": 28790 + }, + { + "epoch": 0.41, + "grad_norm": 0.57421875, + "learning_rate": 0.00014600631047497698, + "loss": 0.9747, + "step": 28795 + }, + { + "epoch": 0.41, + "grad_norm": 0.55859375, + "learning_rate": 0.00014598407973491248, + "loss": 1.0291, + "step": 28800 + }, + { + "epoch": 0.41, + "grad_norm": 0.6953125, + "learning_rate": 0.00014596184611251345, + "loss": 0.9252, + "step": 28805 + }, + { + "epoch": 0.41, + "grad_norm": 0.5234375, + "learning_rate": 0.00014593960960917354, + "loss": 0.8716, + "step": 28810 + }, + { + "epoch": 0.41, + "grad_norm": 0.64453125, + "learning_rate": 0.00014591737022628663, + "loss": 1.051, + "step": 28815 + }, + { + "epoch": 0.41, + "grad_norm": 0.5625, + "learning_rate": 0.0001458951279652466, + "loss": 0.9231, + "step": 28820 + }, + { + "epoch": 0.41, + "grad_norm": 0.63671875, + "learning_rate": 0.00014587288282744774, + "loss": 0.9758, + "step": 28825 + }, + { + "epoch": 0.41, + "grad_norm": 0.61328125, + "learning_rate": 0.0001458506348142843, + "loss": 0.9687, + "step": 28830 + }, + { + "epoch": 0.41, + "grad_norm": 0.451171875, + "learning_rate": 0.00014582838392715087, + "loss": 0.8625, + "step": 28835 + }, + { + "epoch": 0.41, + "grad_norm": 0.56640625, + "learning_rate": 0.00014580613016744213, + "loss": 0.9746, + "step": 28840 + }, + { + "epoch": 0.41, + "grad_norm": 0.51953125, + "learning_rate": 0.00014578387353655296, + "loss": 0.9406, + "step": 28845 + }, + { + "epoch": 0.41, + "grad_norm": 0.53515625, + "learning_rate": 0.0001457616140358785, + "loss": 0.8996, + "step": 28850 + }, + { + "epoch": 0.41, + "grad_norm": 0.578125, + "learning_rate": 0.00014573935166681392, + "loss": 0.9932, + "step": 28855 + }, + { + "epoch": 0.41, + "grad_norm": 0.5859375, + "learning_rate": 0.00014571708643075468, + "loss": 0.8784, + "step": 28860 + }, + { + "epoch": 0.41, + "grad_norm": 0.5625, + "learning_rate": 0.0001456948183290964, + "loss": 1.002, + "step": 28865 + }, + { + "epoch": 0.41, + "grad_norm": 0.55859375, + "learning_rate": 0.0001456725473632349, + "loss": 0.9869, + "step": 28870 + }, + { + "epoch": 0.41, + "grad_norm": 0.609375, + "learning_rate": 0.00014565027353456608, + "loss": 0.9852, + "step": 28875 + }, + { + "epoch": 0.41, + "grad_norm": 0.5625, + "learning_rate": 0.00014562799684448617, + "loss": 0.8989, + "step": 28880 + }, + { + "epoch": 0.41, + "grad_norm": 0.5703125, + "learning_rate": 0.00014560571729439138, + "loss": 0.9583, + "step": 28885 + }, + { + "epoch": 0.41, + "grad_norm": 0.55859375, + "learning_rate": 0.00014558343488567835, + "loss": 0.9679, + "step": 28890 + }, + { + "epoch": 0.41, + "grad_norm": 0.63671875, + "learning_rate": 0.00014556114961974368, + "loss": 1.1131, + "step": 28895 + }, + { + "epoch": 0.41, + "grad_norm": 0.56640625, + "learning_rate": 0.00014553886149798424, + "loss": 1.0294, + "step": 28900 + }, + { + "epoch": 0.41, + "grad_norm": 0.60546875, + "learning_rate": 0.00014551657052179712, + "loss": 0.971, + "step": 28905 + }, + { + "epoch": 0.41, + "grad_norm": 0.6796875, + "learning_rate": 0.00014549427669257955, + "loss": 0.9676, + "step": 28910 + }, + { + "epoch": 0.41, + "grad_norm": 0.6171875, + "learning_rate": 0.00014547198001172885, + "loss": 0.9163, + "step": 28915 + }, + { + "epoch": 0.41, + "grad_norm": 0.53125, + "learning_rate": 0.00014544968048064267, + "loss": 1.026, + "step": 28920 + }, + { + "epoch": 0.41, + "grad_norm": 0.58984375, + "learning_rate": 0.00014542737810071879, + "loss": 0.8377, + "step": 28925 + }, + { + "epoch": 0.41, + "grad_norm": 0.54296875, + "learning_rate": 0.00014540507287335506, + "loss": 0.9355, + "step": 28930 + }, + { + "epoch": 0.42, + "grad_norm": 0.6015625, + "learning_rate": 0.0001453827647999497, + "loss": 0.8847, + "step": 28935 + }, + { + "epoch": 0.42, + "grad_norm": 0.484375, + "learning_rate": 0.00014536045388190093, + "loss": 1.0317, + "step": 28940 + }, + { + "epoch": 0.42, + "grad_norm": 0.578125, + "learning_rate": 0.0001453381401206072, + "loss": 0.8708, + "step": 28945 + }, + { + "epoch": 0.42, + "grad_norm": 0.55859375, + "learning_rate": 0.0001453158235174673, + "loss": 0.9982, + "step": 28950 + }, + { + "epoch": 0.42, + "grad_norm": 0.64453125, + "learning_rate": 0.00014529350407387995, + "loss": 0.9613, + "step": 28955 + }, + { + "epoch": 0.42, + "grad_norm": 0.5390625, + "learning_rate": 0.00014527118179124415, + "loss": 0.8559, + "step": 28960 + }, + { + "epoch": 0.42, + "grad_norm": 0.478515625, + "learning_rate": 0.00014524885667095914, + "loss": 1.0255, + "step": 28965 + }, + { + "epoch": 0.42, + "grad_norm": 0.58203125, + "learning_rate": 0.00014522652871442425, + "loss": 0.9109, + "step": 28970 + }, + { + "epoch": 0.42, + "grad_norm": 0.54296875, + "learning_rate": 0.000145204197923039, + "loss": 0.8796, + "step": 28975 + }, + { + "epoch": 0.42, + "grad_norm": 0.65625, + "learning_rate": 0.0001451818642982032, + "loss": 1.0481, + "step": 28980 + }, + { + "epoch": 0.42, + "grad_norm": 0.5703125, + "learning_rate": 0.00014515952784131665, + "loss": 0.9254, + "step": 28985 + }, + { + "epoch": 0.42, + "grad_norm": 0.578125, + "learning_rate": 0.00014513718855377949, + "loss": 1.0509, + "step": 28990 + }, + { + "epoch": 0.42, + "grad_norm": 0.490234375, + "learning_rate": 0.0001451148464369919, + "loss": 0.9491, + "step": 28995 + }, + { + "epoch": 0.42, + "grad_norm": 0.5390625, + "learning_rate": 0.00014509250149235438, + "loss": 0.9705, + "step": 29000 + }, + { + "epoch": 0.42, + "grad_norm": 0.50390625, + "learning_rate": 0.00014507015372126753, + "loss": 0.9703, + "step": 29005 + }, + { + "epoch": 0.42, + "grad_norm": 0.640625, + "learning_rate": 0.00014504780312513208, + "loss": 1.0045, + "step": 29010 + }, + { + "epoch": 0.42, + "grad_norm": 0.5, + "learning_rate": 0.00014502544970534906, + "loss": 0.9571, + "step": 29015 + }, + { + "epoch": 0.42, + "grad_norm": 0.671875, + "learning_rate": 0.00014500309346331954, + "loss": 1.0786, + "step": 29020 + }, + { + "epoch": 0.42, + "grad_norm": 0.59765625, + "learning_rate": 0.00014498073440044487, + "loss": 0.9729, + "step": 29025 + }, + { + "epoch": 0.42, + "grad_norm": 0.640625, + "learning_rate": 0.00014495837251812655, + "loss": 0.9124, + "step": 29030 + }, + { + "epoch": 0.42, + "grad_norm": 0.5625, + "learning_rate": 0.00014493600781776626, + "loss": 0.8797, + "step": 29035 + }, + { + "epoch": 0.42, + "grad_norm": 0.51171875, + "learning_rate": 0.0001449136403007658, + "loss": 0.8734, + "step": 29040 + }, + { + "epoch": 0.42, + "grad_norm": 0.5625, + "learning_rate": 0.0001448912699685272, + "loss": 1.0155, + "step": 29045 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.00014486889682245268, + "loss": 0.9959, + "step": 29050 + }, + { + "epoch": 0.42, + "grad_norm": 0.48046875, + "learning_rate": 0.0001448465208639446, + "loss": 0.9737, + "step": 29055 + }, + { + "epoch": 0.42, + "grad_norm": 0.5234375, + "learning_rate": 0.00014482414209440555, + "loss": 0.7726, + "step": 29060 + }, + { + "epoch": 0.42, + "grad_norm": 0.486328125, + "learning_rate": 0.0001448017605152382, + "loss": 0.8841, + "step": 29065 + }, + { + "epoch": 0.42, + "grad_norm": 0.53515625, + "learning_rate": 0.0001447793761278455, + "loss": 0.8884, + "step": 29070 + }, + { + "epoch": 0.42, + "grad_norm": 0.53515625, + "learning_rate": 0.00014475698893363044, + "loss": 0.896, + "step": 29075 + }, + { + "epoch": 0.42, + "grad_norm": 0.5625, + "learning_rate": 0.00014473459893399638, + "loss": 1.1103, + "step": 29080 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.00014471220613034675, + "loss": 1.0756, + "step": 29085 + }, + { + "epoch": 0.42, + "grad_norm": 0.65234375, + "learning_rate": 0.00014468981052408506, + "loss": 1.0535, + "step": 29090 + }, + { + "epoch": 0.42, + "grad_norm": 0.515625, + "learning_rate": 0.00014466741211661516, + "loss": 0.8998, + "step": 29095 + }, + { + "epoch": 0.42, + "grad_norm": 0.458984375, + "learning_rate": 0.000144645010909341, + "loss": 0.9208, + "step": 29100 + }, + { + "epoch": 0.42, + "grad_norm": 1.0078125, + "learning_rate": 0.00014462260690366668, + "loss": 1.0528, + "step": 29105 + }, + { + "epoch": 0.42, + "grad_norm": 0.61328125, + "learning_rate": 0.0001446002001009966, + "loss": 1.0644, + "step": 29110 + }, + { + "epoch": 0.42, + "grad_norm": 0.578125, + "learning_rate": 0.0001445777905027352, + "loss": 0.9569, + "step": 29115 + }, + { + "epoch": 0.42, + "grad_norm": 0.55859375, + "learning_rate": 0.00014455537811028702, + "loss": 0.9952, + "step": 29120 + }, + { + "epoch": 0.42, + "grad_norm": 0.6484375, + "learning_rate": 0.00014453296292505708, + "loss": 1.0159, + "step": 29125 + }, + { + "epoch": 0.42, + "grad_norm": 0.61328125, + "learning_rate": 0.00014451054494845025, + "loss": 0.8709, + "step": 29130 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.0001444881241818718, + "loss": 0.92, + "step": 29135 + }, + { + "epoch": 0.42, + "grad_norm": 0.6640625, + "learning_rate": 0.00014446570062672705, + "loss": 1.0585, + "step": 29140 + }, + { + "epoch": 0.42, + "grad_norm": 0.53515625, + "learning_rate": 0.00014444327428442155, + "loss": 1.0007, + "step": 29145 + }, + { + "epoch": 0.42, + "grad_norm": 0.60546875, + "learning_rate": 0.000144420845156361, + "loss": 0.9254, + "step": 29150 + }, + { + "epoch": 0.42, + "grad_norm": 0.6640625, + "learning_rate": 0.0001443984132439513, + "loss": 0.9686, + "step": 29155 + }, + { + "epoch": 0.42, + "grad_norm": 0.57421875, + "learning_rate": 0.00014437597854859846, + "loss": 0.9258, + "step": 29160 + }, + { + "epoch": 0.42, + "grad_norm": 0.5234375, + "learning_rate": 0.00014435354107170876, + "loss": 0.8689, + "step": 29165 + }, + { + "epoch": 0.42, + "grad_norm": 0.6328125, + "learning_rate": 0.00014433110081468859, + "loss": 0.9698, + "step": 29170 + }, + { + "epoch": 0.42, + "grad_norm": 0.65625, + "learning_rate": 0.00014430865777894454, + "loss": 1.0518, + "step": 29175 + }, + { + "epoch": 0.42, + "grad_norm": 0.58203125, + "learning_rate": 0.00014428621196588336, + "loss": 0.9741, + "step": 29180 + }, + { + "epoch": 0.42, + "grad_norm": 0.609375, + "learning_rate": 0.00014426376337691195, + "loss": 0.8741, + "step": 29185 + }, + { + "epoch": 0.42, + "grad_norm": 0.5859375, + "learning_rate": 0.00014424131201343747, + "loss": 0.9883, + "step": 29190 + }, + { + "epoch": 0.42, + "grad_norm": 0.60546875, + "learning_rate": 0.00014421885787686714, + "loss": 0.9213, + "step": 29195 + }, + { + "epoch": 0.42, + "grad_norm": 0.58984375, + "learning_rate": 0.00014419640096860844, + "loss": 0.9058, + "step": 29200 + }, + { + "epoch": 0.42, + "grad_norm": 0.54296875, + "learning_rate": 0.000144173941290069, + "loss": 0.892, + "step": 29205 + }, + { + "epoch": 0.42, + "grad_norm": 0.6796875, + "learning_rate": 0.0001441514788426566, + "loss": 0.9142, + "step": 29210 + }, + { + "epoch": 0.42, + "grad_norm": 0.6171875, + "learning_rate": 0.00014412901362777922, + "loss": 1.0063, + "step": 29215 + }, + { + "epoch": 0.42, + "grad_norm": 0.60546875, + "learning_rate": 0.00014410654564684505, + "loss": 1.0145, + "step": 29220 + }, + { + "epoch": 0.42, + "grad_norm": 0.57421875, + "learning_rate": 0.00014408407490126233, + "loss": 0.9077, + "step": 29225 + }, + { + "epoch": 0.42, + "grad_norm": 0.5546875, + "learning_rate": 0.00014406160139243958, + "loss": 1.0044, + "step": 29230 + }, + { + "epoch": 0.42, + "grad_norm": 0.69921875, + "learning_rate": 0.0001440391251217855, + "loss": 1.1243, + "step": 29235 + }, + { + "epoch": 0.42, + "grad_norm": 0.5859375, + "learning_rate": 0.00014401664609070889, + "loss": 1.1431, + "step": 29240 + }, + { + "epoch": 0.42, + "grad_norm": 0.69140625, + "learning_rate": 0.00014399416430061875, + "loss": 0.789, + "step": 29245 + }, + { + "epoch": 0.42, + "grad_norm": 0.5703125, + "learning_rate": 0.0001439716797529243, + "loss": 0.8925, + "step": 29250 + }, + { + "epoch": 0.42, + "grad_norm": 0.53515625, + "learning_rate": 0.0001439491924490349, + "loss": 0.9749, + "step": 29255 + }, + { + "epoch": 0.42, + "grad_norm": 0.6640625, + "learning_rate": 0.00014392670239036004, + "loss": 1.0446, + "step": 29260 + }, + { + "epoch": 0.42, + "grad_norm": 0.60546875, + "learning_rate": 0.00014390420957830947, + "loss": 0.8864, + "step": 29265 + }, + { + "epoch": 0.42, + "grad_norm": 0.61328125, + "learning_rate": 0.00014388171401429306, + "loss": 0.9329, + "step": 29270 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.00014385921569972079, + "loss": 0.843, + "step": 29275 + }, + { + "epoch": 0.42, + "grad_norm": 0.62890625, + "learning_rate": 0.00014383671463600293, + "loss": 0.8605, + "step": 29280 + }, + { + "epoch": 0.42, + "grad_norm": 0.67578125, + "learning_rate": 0.00014381421082454988, + "loss": 0.9682, + "step": 29285 + }, + { + "epoch": 0.42, + "grad_norm": 0.5390625, + "learning_rate": 0.0001437917042667722, + "loss": 0.8813, + "step": 29290 + }, + { + "epoch": 0.42, + "grad_norm": 0.578125, + "learning_rate": 0.00014376919496408063, + "loss": 1.0273, + "step": 29295 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.00014374668291788608, + "loss": 1.1245, + "step": 29300 + }, + { + "epoch": 0.42, + "grad_norm": 0.55859375, + "learning_rate": 0.0001437241681295996, + "loss": 0.9684, + "step": 29305 + }, + { + "epoch": 0.42, + "grad_norm": 0.75, + "learning_rate": 0.00014370165060063246, + "loss": 0.858, + "step": 29310 + }, + { + "epoch": 0.42, + "grad_norm": 0.55078125, + "learning_rate": 0.0001436791303323961, + "loss": 0.9644, + "step": 29315 + }, + { + "epoch": 0.42, + "grad_norm": 0.5546875, + "learning_rate": 0.0001436566073263021, + "loss": 1.017, + "step": 29320 + }, + { + "epoch": 0.42, + "grad_norm": 0.5390625, + "learning_rate": 0.00014363408158376225, + "loss": 0.9339, + "step": 29325 + }, + { + "epoch": 0.42, + "grad_norm": 0.6015625, + "learning_rate": 0.00014361155310618846, + "loss": 1.0671, + "step": 29330 + }, + { + "epoch": 0.42, + "grad_norm": 0.6171875, + "learning_rate": 0.00014358902189499283, + "loss": 0.9671, + "step": 29335 + }, + { + "epoch": 0.42, + "grad_norm": 0.57421875, + "learning_rate": 0.00014356648795158766, + "loss": 0.9803, + "step": 29340 + }, + { + "epoch": 0.42, + "grad_norm": 0.609375, + "learning_rate": 0.00014354395127738544, + "loss": 1.1102, + "step": 29345 + }, + { + "epoch": 0.42, + "grad_norm": 0.7109375, + "learning_rate": 0.00014352141187379876, + "loss": 1.1862, + "step": 29350 + }, + { + "epoch": 0.42, + "grad_norm": 0.5625, + "learning_rate": 0.0001434988697422404, + "loss": 0.9587, + "step": 29355 + }, + { + "epoch": 0.42, + "grad_norm": 0.546875, + "learning_rate": 0.00014347632488412337, + "loss": 1.0016, + "step": 29360 + }, + { + "epoch": 0.42, + "grad_norm": 0.7578125, + "learning_rate": 0.00014345377730086076, + "loss": 0.9902, + "step": 29365 + }, + { + "epoch": 0.42, + "grad_norm": 0.578125, + "learning_rate": 0.00014343122699386589, + "loss": 0.9437, + "step": 29370 + }, + { + "epoch": 0.42, + "grad_norm": 0.60546875, + "learning_rate": 0.00014340867396455228, + "loss": 0.9887, + "step": 29375 + }, + { + "epoch": 0.42, + "grad_norm": 0.6171875, + "learning_rate": 0.0001433861182143335, + "loss": 0.8093, + "step": 29380 + }, + { + "epoch": 0.42, + "grad_norm": 0.57421875, + "learning_rate": 0.00014336355974462346, + "loss": 1.0003, + "step": 29385 + }, + { + "epoch": 0.42, + "grad_norm": 0.5, + "learning_rate": 0.00014334099855683606, + "loss": 0.871, + "step": 29390 + }, + { + "epoch": 0.42, + "grad_norm": 0.53125, + "learning_rate": 0.00014331843465238556, + "loss": 1.0591, + "step": 29395 + }, + { + "epoch": 0.42, + "grad_norm": 0.5703125, + "learning_rate": 0.00014329586803268625, + "loss": 0.9311, + "step": 29400 + }, + { + "epoch": 0.42, + "grad_norm": 0.54296875, + "learning_rate": 0.0001432732986991526, + "loss": 0.9105, + "step": 29405 + }, + { + "epoch": 0.42, + "grad_norm": 0.55859375, + "learning_rate": 0.00014325072665319927, + "loss": 0.9017, + "step": 29410 + }, + { + "epoch": 0.42, + "grad_norm": 0.68359375, + "learning_rate": 0.00014322815189624118, + "loss": 1.0109, + "step": 29415 + }, + { + "epoch": 0.42, + "grad_norm": 0.58203125, + "learning_rate": 0.00014320557442969325, + "loss": 0.9553, + "step": 29420 + }, + { + "epoch": 0.42, + "grad_norm": 0.546875, + "learning_rate": 0.00014318299425497075, + "loss": 0.9934, + "step": 29425 + }, + { + "epoch": 0.42, + "grad_norm": 0.65625, + "learning_rate": 0.00014316041137348896, + "loss": 1.0326, + "step": 29430 + }, + { + "epoch": 0.42, + "grad_norm": 0.51953125, + "learning_rate": 0.00014313782578666346, + "loss": 0.8656, + "step": 29435 + }, + { + "epoch": 0.42, + "grad_norm": 0.59765625, + "learning_rate": 0.0001431152374959099, + "loss": 1.0418, + "step": 29440 + }, + { + "epoch": 0.42, + "grad_norm": 0.59765625, + "learning_rate": 0.00014309264650264414, + "loss": 1.0044, + "step": 29445 + }, + { + "epoch": 0.42, + "grad_norm": 0.546875, + "learning_rate": 0.0001430700528082822, + "loss": 0.8477, + "step": 29450 + }, + { + "epoch": 0.42, + "grad_norm": 0.515625, + "learning_rate": 0.00014304745641424035, + "loss": 1.0616, + "step": 29455 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.00014302485732193486, + "loss": 0.9678, + "step": 29460 + }, + { + "epoch": 0.42, + "grad_norm": 0.50390625, + "learning_rate": 0.00014300225553278234, + "loss": 0.8745, + "step": 29465 + }, + { + "epoch": 0.42, + "grad_norm": 0.5390625, + "learning_rate": 0.00014297965104819946, + "loss": 0.8087, + "step": 29470 + }, + { + "epoch": 0.42, + "grad_norm": 0.6875, + "learning_rate": 0.00014295704386960308, + "loss": 1.0838, + "step": 29475 + }, + { + "epoch": 0.42, + "grad_norm": 0.5859375, + "learning_rate": 0.0001429344339984103, + "loss": 0.9963, + "step": 29480 + }, + { + "epoch": 0.42, + "grad_norm": 0.59765625, + "learning_rate": 0.00014291182143603826, + "loss": 0.8762, + "step": 29485 + }, + { + "epoch": 0.42, + "grad_norm": 0.54296875, + "learning_rate": 0.0001428892061839044, + "loss": 0.839, + "step": 29490 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.00014286658824342626, + "loss": 0.8925, + "step": 29495 + }, + { + "epoch": 0.42, + "grad_norm": 0.412109375, + "learning_rate": 0.00014284396761602152, + "loss": 0.8431, + "step": 29500 + }, + { + "epoch": 0.42, + "grad_norm": 0.490234375, + "learning_rate": 0.00014282134430310814, + "loss": 0.9033, + "step": 29505 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.00014279871830610413, + "loss": 0.9588, + "step": 29510 + }, + { + "epoch": 0.42, + "grad_norm": 0.59765625, + "learning_rate": 0.00014277608962642765, + "loss": 0.9106, + "step": 29515 + }, + { + "epoch": 0.42, + "grad_norm": 0.53515625, + "learning_rate": 0.00014275345826549718, + "loss": 0.951, + "step": 29520 + }, + { + "epoch": 0.42, + "grad_norm": 0.51171875, + "learning_rate": 0.0001427308242247313, + "loss": 0.8358, + "step": 29525 + }, + { + "epoch": 0.42, + "grad_norm": 0.609375, + "learning_rate": 0.00014270818750554862, + "loss": 1.019, + "step": 29530 + }, + { + "epoch": 0.42, + "grad_norm": 0.578125, + "learning_rate": 0.00014268554810936818, + "loss": 1.0055, + "step": 29535 + }, + { + "epoch": 0.42, + "grad_norm": 0.498046875, + "learning_rate": 0.00014266290603760892, + "loss": 0.8804, + "step": 29540 + }, + { + "epoch": 0.42, + "grad_norm": 0.5234375, + "learning_rate": 0.00014264026129169014, + "loss": 0.9749, + "step": 29545 + }, + { + "epoch": 0.42, + "grad_norm": 0.52734375, + "learning_rate": 0.0001426176138730312, + "loss": 0.8841, + "step": 29550 + }, + { + "epoch": 0.42, + "grad_norm": 0.490234375, + "learning_rate": 0.0001425949637830517, + "loss": 0.943, + "step": 29555 + }, + { + "epoch": 0.42, + "grad_norm": 0.6796875, + "learning_rate": 0.00014257231102317137, + "loss": 1.0354, + "step": 29560 + }, + { + "epoch": 0.42, + "grad_norm": 0.5546875, + "learning_rate": 0.00014254965559481005, + "loss": 0.8461, + "step": 29565 + }, + { + "epoch": 0.42, + "grad_norm": 0.58203125, + "learning_rate": 0.0001425269974993879, + "loss": 0.959, + "step": 29570 + }, + { + "epoch": 0.42, + "grad_norm": 0.58203125, + "learning_rate": 0.00014250433673832513, + "loss": 1.0894, + "step": 29575 + }, + { + "epoch": 0.42, + "grad_norm": 0.5234375, + "learning_rate": 0.0001424816733130421, + "loss": 0.9041, + "step": 29580 + }, + { + "epoch": 0.42, + "grad_norm": 0.59765625, + "learning_rate": 0.0001424590072249594, + "loss": 0.9017, + "step": 29585 + }, + { + "epoch": 0.42, + "grad_norm": 0.5, + "learning_rate": 0.00014243633847549778, + "loss": 0.9902, + "step": 29590 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.0001424136670660781, + "loss": 1.0025, + "step": 29595 + }, + { + "epoch": 0.42, + "grad_norm": 0.5859375, + "learning_rate": 0.0001423909929981215, + "loss": 0.8774, + "step": 29600 + }, + { + "epoch": 0.42, + "grad_norm": 0.61328125, + "learning_rate": 0.00014236831627304918, + "loss": 0.9068, + "step": 29605 + }, + { + "epoch": 0.42, + "grad_norm": 0.59375, + "learning_rate": 0.00014234563689228253, + "loss": 0.8929, + "step": 29610 + }, + { + "epoch": 0.42, + "grad_norm": 0.52734375, + "learning_rate": 0.00014232295485724314, + "loss": 0.9433, + "step": 29615 + }, + { + "epoch": 0.42, + "grad_norm": 0.53515625, + "learning_rate": 0.00014230027016935273, + "loss": 0.8688, + "step": 29620 + }, + { + "epoch": 0.42, + "grad_norm": 0.59765625, + "learning_rate": 0.0001422775828300332, + "loss": 0.9854, + "step": 29625 + }, + { + "epoch": 0.43, + "grad_norm": 0.5078125, + "learning_rate": 0.00014225489284070667, + "loss": 0.9152, + "step": 29630 + }, + { + "epoch": 0.43, + "grad_norm": 0.5078125, + "learning_rate": 0.0001422322002027953, + "loss": 0.9432, + "step": 29635 + }, + { + "epoch": 0.43, + "grad_norm": 0.60546875, + "learning_rate": 0.0001422095049177215, + "loss": 1.1606, + "step": 29640 + }, + { + "epoch": 0.43, + "grad_norm": 0.58984375, + "learning_rate": 0.00014218680698690792, + "loss": 0.9514, + "step": 29645 + }, + { + "epoch": 0.43, + "grad_norm": 0.58203125, + "learning_rate": 0.00014216410641177717, + "loss": 0.87, + "step": 29650 + }, + { + "epoch": 0.43, + "grad_norm": 0.50390625, + "learning_rate": 0.00014214140319375224, + "loss": 0.9848, + "step": 29655 + }, + { + "epoch": 0.43, + "grad_norm": 0.5625, + "learning_rate": 0.00014211869733425615, + "loss": 0.904, + "step": 29660 + }, + { + "epoch": 0.43, + "grad_norm": 0.69140625, + "learning_rate": 0.00014209598883471215, + "loss": 0.9153, + "step": 29665 + }, + { + "epoch": 0.43, + "grad_norm": 0.55859375, + "learning_rate": 0.00014207327769654365, + "loss": 0.9631, + "step": 29670 + }, + { + "epoch": 0.43, + "grad_norm": 0.6328125, + "learning_rate": 0.00014205056392117414, + "loss": 1.0254, + "step": 29675 + }, + { + "epoch": 0.43, + "grad_norm": 0.59765625, + "learning_rate": 0.00014202784751002744, + "loss": 0.898, + "step": 29680 + }, + { + "epoch": 0.43, + "grad_norm": 0.60546875, + "learning_rate": 0.00014200512846452735, + "loss": 1.1391, + "step": 29685 + }, + { + "epoch": 0.43, + "grad_norm": 0.59375, + "learning_rate": 0.000141982406786098, + "loss": 0.9663, + "step": 29690 + }, + { + "epoch": 0.43, + "grad_norm": 0.50390625, + "learning_rate": 0.00014195968247616357, + "loss": 0.9682, + "step": 29695 + }, + { + "epoch": 0.43, + "grad_norm": 0.63671875, + "learning_rate": 0.00014193695553614845, + "loss": 1.0978, + "step": 29700 + }, + { + "epoch": 0.43, + "grad_norm": 0.56640625, + "learning_rate": 0.00014191422596747716, + "loss": 0.8681, + "step": 29705 + }, + { + "epoch": 0.43, + "grad_norm": 0.57421875, + "learning_rate": 0.00014189149377157452, + "loss": 0.8885, + "step": 29710 + }, + { + "epoch": 0.43, + "grad_norm": 0.55078125, + "learning_rate": 0.0001418687589498653, + "loss": 1.0428, + "step": 29715 + }, + { + "epoch": 0.43, + "grad_norm": 0.62109375, + "learning_rate": 0.00014184602150377456, + "loss": 0.814, + "step": 29720 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.0001418232814347276, + "loss": 0.9806, + "step": 29725 + }, + { + "epoch": 0.43, + "grad_norm": 0.546875, + "learning_rate": 0.00014180053874414967, + "loss": 0.907, + "step": 29730 + }, + { + "epoch": 0.43, + "grad_norm": 0.5234375, + "learning_rate": 0.00014177779343346638, + "loss": 0.9666, + "step": 29735 + }, + { + "epoch": 0.43, + "grad_norm": 0.578125, + "learning_rate": 0.00014175504550410342, + "loss": 1.0781, + "step": 29740 + }, + { + "epoch": 0.43, + "grad_norm": 0.578125, + "learning_rate": 0.00014173229495748665, + "loss": 0.9353, + "step": 29745 + }, + { + "epoch": 0.43, + "grad_norm": 0.546875, + "learning_rate": 0.00014170954179504213, + "loss": 0.982, + "step": 29750 + }, + { + "epoch": 0.43, + "grad_norm": 0.6484375, + "learning_rate": 0.00014168678601819598, + "loss": 0.9841, + "step": 29755 + }, + { + "epoch": 0.43, + "grad_norm": 0.5625, + "learning_rate": 0.00014166402762837464, + "loss": 0.9569, + "step": 29760 + }, + { + "epoch": 0.43, + "grad_norm": 0.54296875, + "learning_rate": 0.00014164126662700457, + "loss": 1.0546, + "step": 29765 + }, + { + "epoch": 0.43, + "grad_norm": 0.57421875, + "learning_rate": 0.0001416185030155125, + "loss": 0.869, + "step": 29770 + }, + { + "epoch": 0.43, + "grad_norm": 0.55078125, + "learning_rate": 0.00014159573679532525, + "loss": 0.9614, + "step": 29775 + }, + { + "epoch": 0.43, + "grad_norm": 0.55859375, + "learning_rate": 0.00014157296796786989, + "loss": 0.9047, + "step": 29780 + }, + { + "epoch": 0.43, + "grad_norm": 0.5234375, + "learning_rate": 0.0001415501965345735, + "loss": 0.9159, + "step": 29785 + }, + { + "epoch": 0.43, + "grad_norm": 0.65234375, + "learning_rate": 0.00014152742249686347, + "loss": 0.896, + "step": 29790 + }, + { + "epoch": 0.43, + "grad_norm": 0.5, + "learning_rate": 0.00014150464585616733, + "loss": 0.9605, + "step": 29795 + }, + { + "epoch": 0.43, + "grad_norm": 0.5390625, + "learning_rate": 0.0001414818666139127, + "loss": 0.9356, + "step": 29800 + }, + { + "epoch": 0.43, + "grad_norm": 0.55859375, + "learning_rate": 0.00014145908477152742, + "loss": 0.9055, + "step": 29805 + }, + { + "epoch": 0.43, + "grad_norm": 0.671875, + "learning_rate": 0.00014143630033043952, + "loss": 1.1045, + "step": 29810 + }, + { + "epoch": 0.43, + "grad_norm": 0.58984375, + "learning_rate": 0.00014141351329207706, + "loss": 1.0658, + "step": 29815 + }, + { + "epoch": 0.43, + "grad_norm": 0.58984375, + "learning_rate": 0.0001413907236578685, + "loss": 0.9089, + "step": 29820 + }, + { + "epoch": 0.43, + "grad_norm": 0.59765625, + "learning_rate": 0.00014136793142924218, + "loss": 1.1593, + "step": 29825 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.00014134513660762682, + "loss": 0.8836, + "step": 29830 + }, + { + "epoch": 0.43, + "grad_norm": 0.515625, + "learning_rate": 0.00014132233919445124, + "loss": 0.8684, + "step": 29835 + }, + { + "epoch": 0.43, + "grad_norm": 0.6328125, + "learning_rate": 0.00014129953919114435, + "loss": 0.9364, + "step": 29840 + }, + { + "epoch": 0.43, + "grad_norm": 0.640625, + "learning_rate": 0.0001412767365991353, + "loss": 1.017, + "step": 29845 + }, + { + "epoch": 0.43, + "grad_norm": 0.53515625, + "learning_rate": 0.00014125393141985342, + "loss": 0.9966, + "step": 29850 + }, + { + "epoch": 0.43, + "grad_norm": 0.52734375, + "learning_rate": 0.00014123112365472808, + "loss": 1.0908, + "step": 29855 + }, + { + "epoch": 0.43, + "grad_norm": 0.59375, + "learning_rate": 0.000141208313305189, + "loss": 0.9723, + "step": 29860 + }, + { + "epoch": 0.43, + "grad_norm": 0.5859375, + "learning_rate": 0.0001411855003726659, + "loss": 0.927, + "step": 29865 + }, + { + "epoch": 0.43, + "grad_norm": 0.62890625, + "learning_rate": 0.0001411626848585887, + "loss": 0.9745, + "step": 29870 + }, + { + "epoch": 0.43, + "grad_norm": 0.4921875, + "learning_rate": 0.0001411398667643876, + "loss": 1.08, + "step": 29875 + }, + { + "epoch": 0.43, + "grad_norm": 0.5234375, + "learning_rate": 0.00014111704609149275, + "loss": 0.7581, + "step": 29880 + }, + { + "epoch": 0.43, + "grad_norm": 0.51171875, + "learning_rate": 0.0001410942228413346, + "loss": 0.9267, + "step": 29885 + }, + { + "epoch": 0.43, + "grad_norm": 0.59765625, + "learning_rate": 0.0001410713970153438, + "loss": 0.961, + "step": 29890 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.00014104856861495103, + "loss": 0.9943, + "step": 29895 + }, + { + "epoch": 0.43, + "grad_norm": 0.58203125, + "learning_rate": 0.0001410257376415873, + "loss": 0.9378, + "step": 29900 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.00014100290409668352, + "loss": 0.9556, + "step": 29905 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.00014098006798167104, + "loss": 1.1233, + "step": 29910 + }, + { + "epoch": 0.43, + "grad_norm": 0.451171875, + "learning_rate": 0.00014095722929798122, + "loss": 0.7322, + "step": 29915 + }, + { + "epoch": 0.43, + "grad_norm": 0.60546875, + "learning_rate": 0.00014093438804704567, + "loss": 0.925, + "step": 29920 + }, + { + "epoch": 0.43, + "grad_norm": 0.703125, + "learning_rate": 0.00014091154423029602, + "loss": 0.9481, + "step": 29925 + }, + { + "epoch": 0.43, + "grad_norm": 0.6015625, + "learning_rate": 0.00014088869784916418, + "loss": 0.9175, + "step": 29930 + }, + { + "epoch": 0.43, + "grad_norm": 0.53515625, + "learning_rate": 0.0001408658489050822, + "loss": 0.9337, + "step": 29935 + }, + { + "epoch": 0.43, + "grad_norm": 0.56640625, + "learning_rate": 0.00014084299739948225, + "loss": 0.8176, + "step": 29940 + }, + { + "epoch": 0.43, + "grad_norm": 0.62109375, + "learning_rate": 0.00014082014333379673, + "loss": 0.9413, + "step": 29945 + }, + { + "epoch": 0.43, + "grad_norm": 0.54296875, + "learning_rate": 0.0001407972867094581, + "loss": 0.8769, + "step": 29950 + }, + { + "epoch": 0.43, + "grad_norm": 0.5234375, + "learning_rate": 0.00014077442752789913, + "loss": 0.8575, + "step": 29955 + }, + { + "epoch": 0.43, + "grad_norm": 0.578125, + "learning_rate": 0.0001407515657905526, + "loss": 0.9906, + "step": 29960 + }, + { + "epoch": 0.43, + "grad_norm": 0.609375, + "learning_rate": 0.00014072870149885148, + "loss": 0.9162, + "step": 29965 + }, + { + "epoch": 0.43, + "grad_norm": 0.57421875, + "learning_rate": 0.00014070583465422898, + "loss": 0.9681, + "step": 29970 + }, + { + "epoch": 0.43, + "grad_norm": 0.69921875, + "learning_rate": 0.00014068296525811838, + "loss": 0.9826, + "step": 29975 + }, + { + "epoch": 0.43, + "grad_norm": 0.64453125, + "learning_rate": 0.0001406600933119532, + "loss": 0.9846, + "step": 29980 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.00014063721881716707, + "loss": 0.9939, + "step": 29985 + }, + { + "epoch": 0.43, + "grad_norm": 0.52734375, + "learning_rate": 0.00014061434177519376, + "loss": 0.9893, + "step": 29990 + }, + { + "epoch": 0.43, + "grad_norm": 0.66796875, + "learning_rate": 0.00014059146218746728, + "loss": 0.979, + "step": 29995 + }, + { + "epoch": 0.43, + "grad_norm": 0.53125, + "learning_rate": 0.00014056858005542173, + "loss": 0.9693, + "step": 30000 + }, + { + "epoch": 0.43, + "grad_norm": 0.57421875, + "learning_rate": 0.00014054569538049134, + "loss": 1.0135, + "step": 30005 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.0001405228081641106, + "loss": 0.9971, + "step": 30010 + }, + { + "epoch": 0.43, + "grad_norm": 0.65625, + "learning_rate": 0.0001404999184077141, + "loss": 1.0018, + "step": 30015 + }, + { + "epoch": 0.43, + "grad_norm": 0.6328125, + "learning_rate": 0.00014047702611273658, + "loss": 0.9322, + "step": 30020 + }, + { + "epoch": 0.43, + "grad_norm": 0.66796875, + "learning_rate": 0.000140454131280613, + "loss": 0.9966, + "step": 30025 + }, + { + "epoch": 0.43, + "grad_norm": 0.58984375, + "learning_rate": 0.00014043123391277836, + "loss": 1.0644, + "step": 30030 + }, + { + "epoch": 0.43, + "grad_norm": 0.5546875, + "learning_rate": 0.00014040833401066793, + "loss": 1.1402, + "step": 30035 + }, + { + "epoch": 0.43, + "grad_norm": 0.60546875, + "learning_rate": 0.00014038543157571712, + "loss": 0.9642, + "step": 30040 + }, + { + "epoch": 0.43, + "grad_norm": 0.62890625, + "learning_rate": 0.00014036252660936142, + "loss": 0.9608, + "step": 30045 + }, + { + "epoch": 0.43, + "grad_norm": 0.53515625, + "learning_rate": 0.00014033961911303665, + "loss": 0.9696, + "step": 30050 + }, + { + "epoch": 0.43, + "grad_norm": 0.51953125, + "learning_rate": 0.00014031670908817856, + "loss": 0.9089, + "step": 30055 + }, + { + "epoch": 0.43, + "grad_norm": 0.6015625, + "learning_rate": 0.00014029379653622326, + "loss": 0.993, + "step": 30060 + }, + { + "epoch": 0.43, + "grad_norm": 0.63671875, + "learning_rate": 0.0001402708814586069, + "loss": 0.8867, + "step": 30065 + }, + { + "epoch": 0.43, + "grad_norm": 0.515625, + "learning_rate": 0.0001402479638567658, + "loss": 0.9627, + "step": 30070 + }, + { + "epoch": 0.43, + "grad_norm": 0.66796875, + "learning_rate": 0.0001402250437321365, + "loss": 0.9215, + "step": 30075 + }, + { + "epoch": 0.43, + "grad_norm": 0.65234375, + "learning_rate": 0.00014020212108615564, + "loss": 0.9507, + "step": 30080 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.00014017919592026006, + "loss": 0.9175, + "step": 30085 + }, + { + "epoch": 0.43, + "grad_norm": 0.5546875, + "learning_rate": 0.0001401562682358867, + "loss": 0.9142, + "step": 30090 + }, + { + "epoch": 0.43, + "grad_norm": 0.546875, + "learning_rate": 0.00014013333803447275, + "loss": 1.0266, + "step": 30095 + }, + { + "epoch": 0.43, + "grad_norm": 0.546875, + "learning_rate": 0.00014011040531745542, + "loss": 0.8784, + "step": 30100 + }, + { + "epoch": 0.43, + "grad_norm": 0.51171875, + "learning_rate": 0.0001400874700862722, + "loss": 0.8888, + "step": 30105 + }, + { + "epoch": 0.43, + "grad_norm": 0.55078125, + "learning_rate": 0.00014006453234236075, + "loss": 1.0052, + "step": 30110 + }, + { + "epoch": 0.43, + "grad_norm": 0.48046875, + "learning_rate": 0.00014004159208715874, + "loss": 0.9737, + "step": 30115 + }, + { + "epoch": 0.43, + "grad_norm": 0.859375, + "learning_rate": 0.00014001864932210417, + "loss": 1.165, + "step": 30120 + }, + { + "epoch": 0.43, + "grad_norm": 0.56640625, + "learning_rate": 0.00013999570404863503, + "loss": 1.0133, + "step": 30125 + }, + { + "epoch": 0.43, + "grad_norm": 0.5390625, + "learning_rate": 0.00013997275626818965, + "loss": 0.9089, + "step": 30130 + }, + { + "epoch": 0.43, + "grad_norm": 0.6015625, + "learning_rate": 0.0001399498059822064, + "loss": 1.0132, + "step": 30135 + }, + { + "epoch": 0.43, + "grad_norm": 0.55859375, + "learning_rate": 0.00013992685319212376, + "loss": 0.8784, + "step": 30140 + }, + { + "epoch": 0.43, + "grad_norm": 0.5078125, + "learning_rate": 0.00013990389789938053, + "loss": 0.9172, + "step": 30145 + }, + { + "epoch": 0.43, + "grad_norm": 0.55859375, + "learning_rate": 0.0001398809401054155, + "loss": 1.0431, + "step": 30150 + }, + { + "epoch": 0.43, + "grad_norm": 0.5, + "learning_rate": 0.00013985797981166774, + "loss": 1.0053, + "step": 30155 + }, + { + "epoch": 0.43, + "grad_norm": 0.62890625, + "learning_rate": 0.0001398350170195764, + "loss": 1.0129, + "step": 30160 + }, + { + "epoch": 0.43, + "grad_norm": 0.6328125, + "learning_rate": 0.00013981205173058082, + "loss": 1.0763, + "step": 30165 + }, + { + "epoch": 0.43, + "grad_norm": 0.72265625, + "learning_rate": 0.00013978908394612053, + "loss": 1.1376, + "step": 30170 + }, + { + "epoch": 0.43, + "grad_norm": 0.59375, + "learning_rate": 0.00013976611366763514, + "loss": 0.9536, + "step": 30175 + }, + { + "epoch": 0.43, + "grad_norm": 0.56640625, + "learning_rate": 0.0001397431408965644, + "loss": 1.0176, + "step": 30180 + }, + { + "epoch": 0.43, + "grad_norm": 0.52734375, + "learning_rate": 0.00013972016563434838, + "loss": 1.008, + "step": 30185 + }, + { + "epoch": 0.43, + "grad_norm": 0.58203125, + "learning_rate": 0.00013969718788242713, + "loss": 1.0744, + "step": 30190 + }, + { + "epoch": 0.43, + "grad_norm": 0.6171875, + "learning_rate": 0.00013967420764224092, + "loss": 0.8735, + "step": 30195 + }, + { + "epoch": 0.43, + "grad_norm": 0.61328125, + "learning_rate": 0.0001396512249152302, + "loss": 1.1065, + "step": 30200 + }, + { + "epoch": 0.43, + "grad_norm": 0.54296875, + "learning_rate": 0.00013962823970283553, + "loss": 0.9289, + "step": 30205 + }, + { + "epoch": 0.43, + "grad_norm": 0.5625, + "learning_rate": 0.00013960525200649765, + "loss": 1.0908, + "step": 30210 + }, + { + "epoch": 0.43, + "grad_norm": 0.56640625, + "learning_rate": 0.00013958226182765753, + "loss": 1.0867, + "step": 30215 + }, + { + "epoch": 0.43, + "grad_norm": 0.59375, + "learning_rate": 0.0001395592691677561, + "loss": 0.9663, + "step": 30220 + }, + { + "epoch": 0.43, + "grad_norm": 0.65234375, + "learning_rate": 0.00013953627402823465, + "loss": 0.9168, + "step": 30225 + }, + { + "epoch": 0.43, + "grad_norm": 0.5546875, + "learning_rate": 0.0001395132764105345, + "loss": 0.8632, + "step": 30230 + }, + { + "epoch": 0.43, + "grad_norm": 0.494140625, + "learning_rate": 0.00013949027631609718, + "loss": 0.9526, + "step": 30235 + }, + { + "epoch": 0.43, + "grad_norm": 0.53125, + "learning_rate": 0.0001394672737463644, + "loss": 0.9847, + "step": 30240 + }, + { + "epoch": 0.43, + "grad_norm": 0.58203125, + "learning_rate": 0.00013944426870277793, + "loss": 0.9977, + "step": 30245 + }, + { + "epoch": 0.43, + "grad_norm": 0.5625, + "learning_rate": 0.00013942126118677973, + "loss": 0.8099, + "step": 30250 + }, + { + "epoch": 0.43, + "grad_norm": 0.59765625, + "learning_rate": 0.00013939825119981206, + "loss": 0.9267, + "step": 30255 + }, + { + "epoch": 0.43, + "grad_norm": 0.53125, + "learning_rate": 0.0001393752387433171, + "loss": 0.9748, + "step": 30260 + }, + { + "epoch": 0.43, + "grad_norm": 0.486328125, + "learning_rate": 0.00013935222381873728, + "loss": 1.0083, + "step": 30265 + }, + { + "epoch": 0.43, + "grad_norm": 0.609375, + "learning_rate": 0.00013932920642751535, + "loss": 0.9495, + "step": 30270 + }, + { + "epoch": 0.43, + "grad_norm": 0.71484375, + "learning_rate": 0.0001393061865710939, + "loss": 0.913, + "step": 30275 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.00013928316425091593, + "loss": 0.9324, + "step": 30280 + }, + { + "epoch": 0.43, + "grad_norm": 0.5078125, + "learning_rate": 0.00013926013946842449, + "loss": 0.8735, + "step": 30285 + }, + { + "epoch": 0.43, + "grad_norm": 0.51953125, + "learning_rate": 0.00013923711222506277, + "loss": 0.903, + "step": 30290 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.00013921408252227422, + "loss": 0.935, + "step": 30295 + }, + { + "epoch": 0.43, + "grad_norm": 0.78125, + "learning_rate": 0.00013919105036150226, + "loss": 1.0949, + "step": 30300 + }, + { + "epoch": 0.43, + "grad_norm": 0.56640625, + "learning_rate": 0.00013916801574419068, + "loss": 0.8978, + "step": 30305 + }, + { + "epoch": 0.43, + "grad_norm": 0.57421875, + "learning_rate": 0.00013914497867178322, + "loss": 1.004, + "step": 30310 + }, + { + "epoch": 0.43, + "grad_norm": 0.953125, + "learning_rate": 0.00013912193914572391, + "loss": 1.0612, + "step": 30315 + }, + { + "epoch": 0.43, + "grad_norm": 0.5703125, + "learning_rate": 0.00013909889716745693, + "loss": 0.9002, + "step": 30320 + }, + { + "epoch": 0.43, + "grad_norm": 0.59375, + "learning_rate": 0.00013907585273842656, + "loss": 0.8754, + "step": 30325 + }, + { + "epoch": 0.44, + "grad_norm": 0.58984375, + "learning_rate": 0.00013905280586007718, + "loss": 0.9143, + "step": 30330 + }, + { + "epoch": 0.44, + "grad_norm": 0.44921875, + "learning_rate": 0.0001390297565338535, + "loss": 0.6927, + "step": 30335 + }, + { + "epoch": 0.44, + "grad_norm": 0.55078125, + "learning_rate": 0.00013900670476120022, + "loss": 0.9526, + "step": 30340 + }, + { + "epoch": 0.44, + "grad_norm": 0.625, + "learning_rate": 0.00013898365054356226, + "loss": 0.9771, + "step": 30345 + }, + { + "epoch": 0.44, + "grad_norm": 0.6171875, + "learning_rate": 0.00013896059388238466, + "loss": 0.9911, + "step": 30350 + }, + { + "epoch": 0.44, + "grad_norm": 0.52734375, + "learning_rate": 0.00013893753477911268, + "loss": 0.9632, + "step": 30355 + }, + { + "epoch": 0.44, + "grad_norm": 0.57421875, + "learning_rate": 0.0001389144732351917, + "loss": 1.0988, + "step": 30360 + }, + { + "epoch": 0.44, + "grad_norm": 0.60546875, + "learning_rate": 0.0001388914092520672, + "loss": 0.9119, + "step": 30365 + }, + { + "epoch": 0.44, + "grad_norm": 0.5546875, + "learning_rate": 0.00013886834283118483, + "loss": 0.8508, + "step": 30370 + }, + { + "epoch": 0.44, + "grad_norm": 0.5546875, + "learning_rate": 0.0001388452739739905, + "loss": 0.9863, + "step": 30375 + }, + { + "epoch": 0.44, + "grad_norm": 0.60546875, + "learning_rate": 0.0001388222026819302, + "loss": 0.9309, + "step": 30380 + }, + { + "epoch": 0.44, + "grad_norm": 0.6484375, + "learning_rate": 0.00013879912895644995, + "loss": 0.9081, + "step": 30385 + }, + { + "epoch": 0.44, + "grad_norm": 0.578125, + "learning_rate": 0.00013877605279899612, + "loss": 0.9678, + "step": 30390 + }, + { + "epoch": 0.44, + "grad_norm": 0.55859375, + "learning_rate": 0.00013875297421101518, + "loss": 0.8776, + "step": 30395 + }, + { + "epoch": 0.44, + "grad_norm": 0.53125, + "learning_rate": 0.00013872989319395367, + "loss": 0.8552, + "step": 30400 + }, + { + "epoch": 0.44, + "grad_norm": 0.55078125, + "learning_rate": 0.0001387068097492583, + "loss": 0.9758, + "step": 30405 + }, + { + "epoch": 0.44, + "grad_norm": 0.60546875, + "learning_rate": 0.00013868372387837604, + "loss": 0.9454, + "step": 30410 + }, + { + "epoch": 0.44, + "grad_norm": 0.60546875, + "learning_rate": 0.00013866063558275394, + "loss": 1.0248, + "step": 30415 + }, + { + "epoch": 0.44, + "grad_norm": 0.58984375, + "learning_rate": 0.00013863754486383913, + "loss": 1.0927, + "step": 30420 + }, + { + "epoch": 0.44, + "grad_norm": 0.60546875, + "learning_rate": 0.00013861445172307904, + "loss": 0.9265, + "step": 30425 + }, + { + "epoch": 0.44, + "grad_norm": 0.6015625, + "learning_rate": 0.0001385913561619211, + "loss": 1.1097, + "step": 30430 + }, + { + "epoch": 0.44, + "grad_norm": 0.58984375, + "learning_rate": 0.00013856825818181302, + "loss": 1.0002, + "step": 30435 + }, + { + "epoch": 0.44, + "grad_norm": 0.65234375, + "learning_rate": 0.0001385451577842026, + "loss": 1.1563, + "step": 30440 + }, + { + "epoch": 0.44, + "grad_norm": 0.5078125, + "learning_rate": 0.00013852205497053775, + "loss": 0.8349, + "step": 30445 + }, + { + "epoch": 0.44, + "grad_norm": 0.6171875, + "learning_rate": 0.00013849894974226666, + "loss": 0.9589, + "step": 30450 + }, + { + "epoch": 0.44, + "grad_norm": 0.5234375, + "learning_rate": 0.00013847584210083754, + "loss": 1.0755, + "step": 30455 + }, + { + "epoch": 0.44, + "grad_norm": 0.58203125, + "learning_rate": 0.00013845273204769884, + "loss": 0.9441, + "step": 30460 + }, + { + "epoch": 0.44, + "grad_norm": 0.5859375, + "learning_rate": 0.00013842961958429907, + "loss": 1.0065, + "step": 30465 + }, + { + "epoch": 0.44, + "grad_norm": 0.61328125, + "learning_rate": 0.000138406504712087, + "loss": 0.9354, + "step": 30470 + }, + { + "epoch": 0.44, + "grad_norm": 0.62109375, + "learning_rate": 0.00013838338743251148, + "loss": 0.9546, + "step": 30475 + }, + { + "epoch": 0.44, + "grad_norm": 0.578125, + "learning_rate": 0.0001383602677470215, + "loss": 0.9654, + "step": 30480 + }, + { + "epoch": 0.44, + "grad_norm": 0.60546875, + "learning_rate": 0.00013833714565706626, + "loss": 0.9255, + "step": 30485 + }, + { + "epoch": 0.44, + "grad_norm": 0.54296875, + "learning_rate": 0.00013831402116409506, + "loss": 0.9128, + "step": 30490 + }, + { + "epoch": 0.44, + "grad_norm": 0.51171875, + "learning_rate": 0.00013829089426955736, + "loss": 0.8833, + "step": 30495 + }, + { + "epoch": 0.44, + "grad_norm": 0.546875, + "learning_rate": 0.0001382677649749028, + "loss": 0.9549, + "step": 30500 + }, + { + "epoch": 0.44, + "grad_norm": 0.67578125, + "learning_rate": 0.0001382446332815812, + "loss": 0.9081, + "step": 30505 + }, + { + "epoch": 0.44, + "grad_norm": 0.53515625, + "learning_rate": 0.0001382214991910424, + "loss": 0.9331, + "step": 30510 + }, + { + "epoch": 0.44, + "grad_norm": 0.58984375, + "learning_rate": 0.0001381983627047365, + "loss": 0.9937, + "step": 30515 + }, + { + "epoch": 0.44, + "grad_norm": 0.5625, + "learning_rate": 0.0001381752238241137, + "loss": 0.8444, + "step": 30520 + }, + { + "epoch": 0.44, + "grad_norm": 0.58984375, + "learning_rate": 0.0001381520825506244, + "loss": 1.0127, + "step": 30525 + }, + { + "epoch": 0.44, + "grad_norm": 0.5, + "learning_rate": 0.00013812893888571917, + "loss": 0.9867, + "step": 30530 + }, + { + "epoch": 0.44, + "grad_norm": 0.59765625, + "learning_rate": 0.00013810579283084855, + "loss": 0.9169, + "step": 30535 + }, + { + "epoch": 0.44, + "grad_norm": 0.58203125, + "learning_rate": 0.00013808264438746346, + "loss": 0.915, + "step": 30540 + }, + { + "epoch": 0.44, + "grad_norm": 0.59375, + "learning_rate": 0.00013805949355701487, + "loss": 0.9577, + "step": 30545 + }, + { + "epoch": 0.44, + "grad_norm": 0.46484375, + "learning_rate": 0.00013803634034095384, + "loss": 0.9792, + "step": 30550 + }, + { + "epoch": 0.44, + "grad_norm": 0.59375, + "learning_rate": 0.00013801318474073167, + "loss": 0.9618, + "step": 30555 + }, + { + "epoch": 0.44, + "grad_norm": 0.5390625, + "learning_rate": 0.00013799002675779983, + "loss": 0.8944, + "step": 30560 + }, + { + "epoch": 0.44, + "grad_norm": 0.5234375, + "learning_rate": 0.00013796686639360982, + "loss": 0.919, + "step": 30565 + }, + { + "epoch": 0.44, + "grad_norm": 0.53125, + "learning_rate": 0.00013794370364961342, + "loss": 1.0096, + "step": 30570 + }, + { + "epoch": 0.44, + "grad_norm": 0.69921875, + "learning_rate": 0.00013792053852726242, + "loss": 1.1821, + "step": 30575 + }, + { + "epoch": 0.44, + "grad_norm": 0.67578125, + "learning_rate": 0.00013789737102800888, + "loss": 1.1147, + "step": 30580 + }, + { + "epoch": 0.44, + "grad_norm": 0.50390625, + "learning_rate": 0.000137874201153305, + "loss": 1.0756, + "step": 30585 + }, + { + "epoch": 0.44, + "grad_norm": 0.57421875, + "learning_rate": 0.00013785102890460303, + "loss": 0.9199, + "step": 30590 + }, + { + "epoch": 0.44, + "grad_norm": 0.515625, + "learning_rate": 0.00013782785428335546, + "loss": 0.9286, + "step": 30595 + }, + { + "epoch": 0.44, + "grad_norm": 0.62109375, + "learning_rate": 0.0001378046772910149, + "loss": 0.9718, + "step": 30600 + }, + { + "epoch": 0.44, + "grad_norm": 0.83984375, + "learning_rate": 0.0001377814979290341, + "loss": 1.0316, + "step": 30605 + }, + { + "epoch": 0.44, + "grad_norm": 0.51171875, + "learning_rate": 0.00013775831619886603, + "loss": 0.9671, + "step": 30610 + }, + { + "epoch": 0.44, + "grad_norm": 0.6015625, + "learning_rate": 0.0001377351321019636, + "loss": 0.9524, + "step": 30615 + }, + { + "epoch": 0.44, + "grad_norm": 0.5078125, + "learning_rate": 0.00013771194563978024, + "loss": 0.9654, + "step": 30620 + }, + { + "epoch": 0.44, + "grad_norm": 0.5546875, + "learning_rate": 0.00013768875681376908, + "loss": 0.9857, + "step": 30625 + }, + { + "epoch": 0.44, + "grad_norm": 0.58203125, + "learning_rate": 0.00013766556562538375, + "loss": 1.0038, + "step": 30630 + }, + { + "epoch": 0.44, + "grad_norm": 0.57421875, + "learning_rate": 0.0001376423720760779, + "loss": 0.9687, + "step": 30635 + }, + { + "epoch": 0.44, + "grad_norm": 0.5625, + "learning_rate": 0.00013761917616730523, + "loss": 0.9222, + "step": 30640 + }, + { + "epoch": 0.44, + "grad_norm": 0.58984375, + "learning_rate": 0.00013759597790051982, + "loss": 0.9985, + "step": 30645 + }, + { + "epoch": 0.44, + "grad_norm": 0.62109375, + "learning_rate": 0.00013757277727717565, + "loss": 1.0839, + "step": 30650 + }, + { + "epoch": 0.44, + "grad_norm": 0.5625, + "learning_rate": 0.000137549574298727, + "loss": 0.9799, + "step": 30655 + }, + { + "epoch": 0.44, + "grad_norm": 0.52734375, + "learning_rate": 0.0001375263689666283, + "loss": 0.9521, + "step": 30660 + }, + { + "epoch": 0.44, + "grad_norm": 0.494140625, + "learning_rate": 0.00013750316128233406, + "loss": 0.9973, + "step": 30665 + }, + { + "epoch": 0.44, + "grad_norm": 0.67578125, + "learning_rate": 0.00013747995124729892, + "loss": 0.8904, + "step": 30670 + }, + { + "epoch": 0.44, + "grad_norm": 0.482421875, + "learning_rate": 0.00013745673886297782, + "loss": 0.9553, + "step": 30675 + }, + { + "epoch": 0.44, + "grad_norm": 0.5546875, + "learning_rate": 0.0001374335241308256, + "loss": 1.0507, + "step": 30680 + }, + { + "epoch": 0.44, + "grad_norm": 0.5859375, + "learning_rate": 0.00013741030705229746, + "loss": 0.9111, + "step": 30685 + }, + { + "epoch": 0.44, + "grad_norm": 0.5546875, + "learning_rate": 0.00013738708762884872, + "loss": 0.9156, + "step": 30690 + }, + { + "epoch": 0.44, + "grad_norm": 0.67578125, + "learning_rate": 0.00013736386586193472, + "loss": 1.0744, + "step": 30695 + }, + { + "epoch": 0.44, + "grad_norm": 0.54296875, + "learning_rate": 0.000137340641753011, + "loss": 1.0734, + "step": 30700 + }, + { + "epoch": 0.44, + "grad_norm": 0.55078125, + "learning_rate": 0.0001373174153035334, + "loss": 0.8312, + "step": 30705 + }, + { + "epoch": 0.44, + "grad_norm": 0.43359375, + "learning_rate": 0.00013729418651495766, + "loss": 0.829, + "step": 30710 + }, + { + "epoch": 0.44, + "grad_norm": 0.6328125, + "learning_rate": 0.00013727095538873987, + "loss": 0.8859, + "step": 30715 + }, + { + "epoch": 0.44, + "grad_norm": 0.4609375, + "learning_rate": 0.00013724772192633616, + "loss": 0.8469, + "step": 30720 + }, + { + "epoch": 0.44, + "grad_norm": 0.59375, + "learning_rate": 0.00013722448612920278, + "loss": 0.9399, + "step": 30725 + }, + { + "epoch": 0.44, + "grad_norm": 0.474609375, + "learning_rate": 0.0001372012479987962, + "loss": 0.8732, + "step": 30730 + }, + { + "epoch": 0.44, + "grad_norm": 0.59375, + "learning_rate": 0.00013717800753657307, + "loss": 0.8908, + "step": 30735 + }, + { + "epoch": 0.44, + "grad_norm": 0.6640625, + "learning_rate": 0.0001371547647439901, + "loss": 0.9621, + "step": 30740 + }, + { + "epoch": 0.44, + "grad_norm": 0.546875, + "learning_rate": 0.00013713151962250412, + "loss": 1.0011, + "step": 30745 + }, + { + "epoch": 0.44, + "grad_norm": 0.6640625, + "learning_rate": 0.0001371082721735722, + "loss": 1.0021, + "step": 30750 + }, + { + "epoch": 0.44, + "grad_norm": 0.49609375, + "learning_rate": 0.00013708502239865152, + "loss": 0.9714, + "step": 30755 + }, + { + "epoch": 0.44, + "grad_norm": 0.578125, + "learning_rate": 0.00013706177029919943, + "loss": 1.1514, + "step": 30760 + }, + { + "epoch": 0.44, + "grad_norm": 0.50390625, + "learning_rate": 0.0001370385158766733, + "loss": 0.8423, + "step": 30765 + }, + { + "epoch": 0.44, + "grad_norm": 0.5390625, + "learning_rate": 0.00013701525913253088, + "loss": 0.9267, + "step": 30770 + }, + { + "epoch": 0.44, + "grad_norm": 0.6484375, + "learning_rate": 0.00013699200006822985, + "loss": 0.9194, + "step": 30775 + }, + { + "epoch": 0.44, + "grad_norm": 0.515625, + "learning_rate": 0.0001369687386852281, + "loss": 0.9124, + "step": 30780 + }, + { + "epoch": 0.44, + "grad_norm": 0.6015625, + "learning_rate": 0.00013694547498498372, + "loss": 0.9982, + "step": 30785 + }, + { + "epoch": 0.44, + "grad_norm": 0.53125, + "learning_rate": 0.0001369222089689549, + "loss": 0.8867, + "step": 30790 + }, + { + "epoch": 0.44, + "grad_norm": 0.58203125, + "learning_rate": 0.00013689894063859996, + "loss": 0.9391, + "step": 30795 + }, + { + "epoch": 0.44, + "grad_norm": 0.609375, + "learning_rate": 0.0001368756699953774, + "loss": 0.7938, + "step": 30800 + }, + { + "epoch": 0.44, + "grad_norm": 0.62890625, + "learning_rate": 0.00013685239704074582, + "loss": 1.0281, + "step": 30805 + }, + { + "epoch": 0.44, + "grad_norm": 0.59765625, + "learning_rate": 0.00013682912177616404, + "loss": 1.002, + "step": 30810 + }, + { + "epoch": 0.44, + "grad_norm": 0.56640625, + "learning_rate": 0.00013680584420309097, + "loss": 0.8366, + "step": 30815 + }, + { + "epoch": 0.44, + "grad_norm": 0.53515625, + "learning_rate": 0.00013678256432298567, + "loss": 0.981, + "step": 30820 + }, + { + "epoch": 0.44, + "grad_norm": 0.640625, + "learning_rate": 0.00013675928213730736, + "loss": 1.0917, + "step": 30825 + }, + { + "epoch": 0.44, + "grad_norm": 0.51953125, + "learning_rate": 0.00013673599764751535, + "loss": 0.9331, + "step": 30830 + }, + { + "epoch": 0.44, + "grad_norm": 0.6484375, + "learning_rate": 0.00013671271085506916, + "loss": 0.9488, + "step": 30835 + }, + { + "epoch": 0.44, + "grad_norm": 0.625, + "learning_rate": 0.00013668942176142847, + "loss": 0.9893, + "step": 30840 + }, + { + "epoch": 0.44, + "grad_norm": 0.58203125, + "learning_rate": 0.00013666613036805302, + "loss": 0.8505, + "step": 30845 + }, + { + "epoch": 0.44, + "grad_norm": 0.57421875, + "learning_rate": 0.0001366428366764028, + "loss": 1.0239, + "step": 30850 + }, + { + "epoch": 0.44, + "grad_norm": 0.5390625, + "learning_rate": 0.0001366195406879378, + "loss": 0.9631, + "step": 30855 + }, + { + "epoch": 0.44, + "grad_norm": 0.52734375, + "learning_rate": 0.00013659624240411827, + "loss": 0.8366, + "step": 30860 + }, + { + "epoch": 0.44, + "grad_norm": 0.53125, + "learning_rate": 0.00013657294182640463, + "loss": 0.9888, + "step": 30865 + }, + { + "epoch": 0.44, + "grad_norm": 0.51953125, + "learning_rate": 0.0001365496389562573, + "loss": 0.9006, + "step": 30870 + }, + { + "epoch": 0.44, + "grad_norm": 0.6484375, + "learning_rate": 0.00013652633379513703, + "loss": 0.9934, + "step": 30875 + }, + { + "epoch": 0.44, + "grad_norm": 0.48828125, + "learning_rate": 0.00013650302634450454, + "loss": 0.9593, + "step": 30880 + }, + { + "epoch": 0.44, + "grad_norm": 0.5703125, + "learning_rate": 0.00013647971660582075, + "loss": 1.047, + "step": 30885 + }, + { + "epoch": 0.44, + "grad_norm": 0.59765625, + "learning_rate": 0.0001364564045805468, + "loss": 0.9426, + "step": 30890 + }, + { + "epoch": 0.44, + "grad_norm": 0.5625, + "learning_rate": 0.00013643309027014389, + "loss": 1.0609, + "step": 30895 + }, + { + "epoch": 0.44, + "grad_norm": 0.609375, + "learning_rate": 0.0001364097736760734, + "loss": 0.8709, + "step": 30900 + }, + { + "epoch": 0.44, + "grad_norm": 0.61328125, + "learning_rate": 0.0001363864547997968, + "loss": 0.9918, + "step": 30905 + }, + { + "epoch": 0.44, + "grad_norm": 0.5234375, + "learning_rate": 0.0001363631336427758, + "loss": 0.8467, + "step": 30910 + }, + { + "epoch": 0.44, + "grad_norm": 0.54296875, + "learning_rate": 0.00013633981020647214, + "loss": 0.9783, + "step": 30915 + }, + { + "epoch": 0.44, + "grad_norm": 0.451171875, + "learning_rate": 0.0001363164844923478, + "loss": 1.0742, + "step": 30920 + }, + { + "epoch": 0.44, + "grad_norm": 0.58203125, + "learning_rate": 0.00013629315650186488, + "loss": 0.929, + "step": 30925 + }, + { + "epoch": 0.44, + "grad_norm": 0.53125, + "learning_rate": 0.00013626982623648554, + "loss": 0.8576, + "step": 30930 + }, + { + "epoch": 0.44, + "grad_norm": 0.58984375, + "learning_rate": 0.0001362464936976722, + "loss": 0.873, + "step": 30935 + }, + { + "epoch": 0.44, + "grad_norm": 0.5546875, + "learning_rate": 0.00013622315888688736, + "loss": 0.9349, + "step": 30940 + }, + { + "epoch": 0.44, + "grad_norm": 0.57421875, + "learning_rate": 0.00013619982180559364, + "loss": 0.9348, + "step": 30945 + }, + { + "epoch": 0.44, + "grad_norm": 0.43359375, + "learning_rate": 0.00013617648245525392, + "loss": 0.8357, + "step": 30950 + }, + { + "epoch": 0.44, + "grad_norm": 0.66015625, + "learning_rate": 0.00013615314083733102, + "loss": 0.9747, + "step": 30955 + }, + { + "epoch": 0.44, + "grad_norm": 0.5703125, + "learning_rate": 0.00013612979695328808, + "loss": 0.9304, + "step": 30960 + }, + { + "epoch": 0.44, + "grad_norm": 0.640625, + "learning_rate": 0.00013610645080458833, + "loss": 0.9786, + "step": 30965 + }, + { + "epoch": 0.44, + "grad_norm": 0.54296875, + "learning_rate": 0.00013608310239269513, + "loss": 0.8328, + "step": 30970 + }, + { + "epoch": 0.44, + "grad_norm": 0.6328125, + "learning_rate": 0.00013605975171907195, + "loss": 0.8963, + "step": 30975 + }, + { + "epoch": 0.44, + "grad_norm": 0.703125, + "learning_rate": 0.0001360363987851825, + "loss": 1.0268, + "step": 30980 + }, + { + "epoch": 0.44, + "grad_norm": 0.57421875, + "learning_rate": 0.00013601304359249053, + "loss": 0.9399, + "step": 30985 + }, + { + "epoch": 0.44, + "grad_norm": 0.5390625, + "learning_rate": 0.00013598968614245995, + "loss": 0.9845, + "step": 30990 + }, + { + "epoch": 0.44, + "grad_norm": 0.53125, + "learning_rate": 0.00013596632643655485, + "loss": 0.7482, + "step": 30995 + }, + { + "epoch": 0.44, + "grad_norm": 0.57421875, + "learning_rate": 0.00013594296447623947, + "loss": 0.95, + "step": 31000 + }, + { + "epoch": 0.44, + "grad_norm": 0.59765625, + "learning_rate": 0.00013591960026297813, + "loss": 0.8751, + "step": 31005 + }, + { + "epoch": 0.44, + "grad_norm": 0.53125, + "learning_rate": 0.00013589623379823532, + "loss": 0.85, + "step": 31010 + }, + { + "epoch": 0.44, + "grad_norm": 0.56640625, + "learning_rate": 0.00013587286508347574, + "loss": 0.8601, + "step": 31015 + }, + { + "epoch": 0.44, + "grad_norm": 0.58984375, + "learning_rate": 0.0001358494941201641, + "loss": 0.7568, + "step": 31020 + }, + { + "epoch": 0.45, + "grad_norm": 0.9921875, + "learning_rate": 0.0001358261209097653, + "loss": 0.9786, + "step": 31025 + }, + { + "epoch": 0.45, + "grad_norm": 0.5078125, + "learning_rate": 0.0001358027454537445, + "loss": 0.9896, + "step": 31030 + }, + { + "epoch": 0.45, + "grad_norm": 0.5859375, + "learning_rate": 0.00013577936775356676, + "loss": 1.0181, + "step": 31035 + }, + { + "epoch": 0.45, + "grad_norm": 0.49609375, + "learning_rate": 0.0001357559878106976, + "loss": 0.8901, + "step": 31040 + }, + { + "epoch": 0.45, + "grad_norm": 0.55078125, + "learning_rate": 0.00013573260562660232, + "loss": 1.1547, + "step": 31045 + }, + { + "epoch": 0.45, + "grad_norm": 0.5546875, + "learning_rate": 0.00013570922120274666, + "loss": 1.0983, + "step": 31050 + }, + { + "epoch": 0.45, + "grad_norm": 0.52734375, + "learning_rate": 0.00013568583454059632, + "loss": 0.9642, + "step": 31055 + }, + { + "epoch": 0.45, + "grad_norm": 0.61328125, + "learning_rate": 0.00013566244564161727, + "loss": 0.9549, + "step": 31060 + }, + { + "epoch": 0.45, + "grad_norm": 0.57421875, + "learning_rate": 0.00013563905450727546, + "loss": 1.0332, + "step": 31065 + }, + { + "epoch": 0.45, + "grad_norm": 0.5546875, + "learning_rate": 0.00013561566113903716, + "loss": 1.0243, + "step": 31070 + }, + { + "epoch": 0.45, + "grad_norm": 0.5390625, + "learning_rate": 0.00013559226553836866, + "loss": 0.9611, + "step": 31075 + }, + { + "epoch": 0.45, + "grad_norm": 0.53515625, + "learning_rate": 0.00013556886770673638, + "loss": 0.9881, + "step": 31080 + }, + { + "epoch": 0.45, + "grad_norm": 0.609375, + "learning_rate": 0.00013554546764560702, + "loss": 0.8778, + "step": 31085 + }, + { + "epoch": 0.45, + "grad_norm": 0.5390625, + "learning_rate": 0.00013552206535644723, + "loss": 0.9477, + "step": 31090 + }, + { + "epoch": 0.45, + "grad_norm": 0.609375, + "learning_rate": 0.00013549866084072392, + "loss": 0.8397, + "step": 31095 + }, + { + "epoch": 0.45, + "grad_norm": 0.482421875, + "learning_rate": 0.00013547525409990415, + "loss": 0.9632, + "step": 31100 + }, + { + "epoch": 0.45, + "grad_norm": 0.4375, + "learning_rate": 0.000135451845135455, + "loss": 0.7316, + "step": 31105 + }, + { + "epoch": 0.45, + "grad_norm": 0.5703125, + "learning_rate": 0.0001354284339488439, + "loss": 1.0173, + "step": 31110 + }, + { + "epoch": 0.45, + "grad_norm": 0.57421875, + "learning_rate": 0.00013540502054153815, + "loss": 0.8948, + "step": 31115 + }, + { + "epoch": 0.45, + "grad_norm": 0.54296875, + "learning_rate": 0.0001353816049150054, + "loss": 0.8243, + "step": 31120 + }, + { + "epoch": 0.45, + "grad_norm": 0.490234375, + "learning_rate": 0.00013535818707071336, + "loss": 0.8822, + "step": 31125 + }, + { + "epoch": 0.45, + "grad_norm": 0.54296875, + "learning_rate": 0.0001353347670101299, + "loss": 0.958, + "step": 31130 + }, + { + "epoch": 0.45, + "grad_norm": 0.55078125, + "learning_rate": 0.00013531134473472297, + "loss": 0.906, + "step": 31135 + }, + { + "epoch": 0.45, + "grad_norm": 0.62109375, + "learning_rate": 0.00013528792024596075, + "loss": 0.9097, + "step": 31140 + }, + { + "epoch": 0.45, + "grad_norm": 0.51953125, + "learning_rate": 0.0001352644935453115, + "loss": 1.0661, + "step": 31145 + }, + { + "epoch": 0.45, + "grad_norm": 0.6484375, + "learning_rate": 0.00013524106463424365, + "loss": 0.9701, + "step": 31150 + }, + { + "epoch": 0.45, + "grad_norm": 0.55859375, + "learning_rate": 0.00013521763351422573, + "loss": 1.0976, + "step": 31155 + }, + { + "epoch": 0.45, + "grad_norm": 0.55078125, + "learning_rate": 0.0001351942001867264, + "loss": 1.0888, + "step": 31160 + }, + { + "epoch": 0.45, + "grad_norm": 0.65625, + "learning_rate": 0.00013517076465321455, + "loss": 0.9993, + "step": 31165 + }, + { + "epoch": 0.45, + "grad_norm": 0.65234375, + "learning_rate": 0.00013514732691515907, + "loss": 1.0969, + "step": 31170 + }, + { + "epoch": 0.45, + "grad_norm": 0.5390625, + "learning_rate": 0.00013512388697402912, + "loss": 0.9507, + "step": 31175 + }, + { + "epoch": 0.45, + "grad_norm": 0.57421875, + "learning_rate": 0.00013510044483129398, + "loss": 1.1196, + "step": 31180 + }, + { + "epoch": 0.45, + "grad_norm": 0.5390625, + "learning_rate": 0.00013507700048842296, + "loss": 0.8669, + "step": 31185 + }, + { + "epoch": 0.45, + "grad_norm": 0.5859375, + "learning_rate": 0.00013505355394688562, + "loss": 1.0395, + "step": 31190 + }, + { + "epoch": 0.45, + "grad_norm": 0.57421875, + "learning_rate": 0.00013503010520815156, + "loss": 1.0378, + "step": 31195 + }, + { + "epoch": 0.45, + "grad_norm": 0.5625, + "learning_rate": 0.0001350066542736906, + "loss": 1.0775, + "step": 31200 + }, + { + "epoch": 0.45, + "grad_norm": 0.58203125, + "learning_rate": 0.00013498320114497273, + "loss": 0.8533, + "step": 31205 + }, + { + "epoch": 0.45, + "grad_norm": 0.55078125, + "learning_rate": 0.00013495974582346793, + "loss": 0.9821, + "step": 31210 + }, + { + "epoch": 0.45, + "grad_norm": 0.546875, + "learning_rate": 0.00013493628831064646, + "loss": 0.9691, + "step": 31215 + }, + { + "epoch": 0.45, + "grad_norm": 0.53515625, + "learning_rate": 0.00013491282860797864, + "loss": 0.9099, + "step": 31220 + }, + { + "epoch": 0.45, + "grad_norm": 0.53125, + "learning_rate": 0.00013488936671693496, + "loss": 0.9978, + "step": 31225 + }, + { + "epoch": 0.45, + "grad_norm": 0.53125, + "learning_rate": 0.00013486590263898603, + "loss": 0.9646, + "step": 31230 + }, + { + "epoch": 0.45, + "grad_norm": 0.55859375, + "learning_rate": 0.00013484243637560265, + "loss": 0.8978, + "step": 31235 + }, + { + "epoch": 0.45, + "grad_norm": 0.60546875, + "learning_rate": 0.00013481896792825565, + "loss": 0.8367, + "step": 31240 + }, + { + "epoch": 0.45, + "grad_norm": 0.5859375, + "learning_rate": 0.0001347954972984161, + "loss": 0.9076, + "step": 31245 + }, + { + "epoch": 0.45, + "grad_norm": 0.62109375, + "learning_rate": 0.00013477202448755515, + "loss": 1.0487, + "step": 31250 + }, + { + "epoch": 0.45, + "grad_norm": 0.51953125, + "learning_rate": 0.00013474854949714407, + "loss": 0.7774, + "step": 31255 + }, + { + "epoch": 0.45, + "grad_norm": 0.66796875, + "learning_rate": 0.00013472507232865436, + "loss": 1.0968, + "step": 31260 + }, + { + "epoch": 0.45, + "grad_norm": 0.63671875, + "learning_rate": 0.00013470159298355758, + "loss": 1.0582, + "step": 31265 + }, + { + "epoch": 0.45, + "grad_norm": 0.5859375, + "learning_rate": 0.0001346781114633254, + "loss": 1.0763, + "step": 31270 + }, + { + "epoch": 0.45, + "grad_norm": 0.609375, + "learning_rate": 0.00013465462776942973, + "loss": 1.0131, + "step": 31275 + }, + { + "epoch": 0.45, + "grad_norm": 0.53515625, + "learning_rate": 0.0001346311419033425, + "loss": 0.9431, + "step": 31280 + }, + { + "epoch": 0.45, + "grad_norm": 0.55859375, + "learning_rate": 0.00013460765386653583, + "loss": 0.9907, + "step": 31285 + }, + { + "epoch": 0.45, + "grad_norm": 0.53515625, + "learning_rate": 0.00013458416366048204, + "loss": 0.9854, + "step": 31290 + }, + { + "epoch": 0.45, + "grad_norm": 0.65625, + "learning_rate": 0.00013456067128665347, + "loss": 1.0112, + "step": 31295 + }, + { + "epoch": 0.45, + "grad_norm": 0.5625, + "learning_rate": 0.0001345371767465227, + "loss": 1.0002, + "step": 31300 + }, + { + "epoch": 0.45, + "grad_norm": 0.498046875, + "learning_rate": 0.00013451368004156232, + "loss": 0.9572, + "step": 31305 + }, + { + "epoch": 0.45, + "grad_norm": 0.625, + "learning_rate": 0.00013449018117324516, + "loss": 0.9806, + "step": 31310 + }, + { + "epoch": 0.45, + "grad_norm": 0.5703125, + "learning_rate": 0.00013446668014304418, + "loss": 0.9183, + "step": 31315 + }, + { + "epoch": 0.45, + "grad_norm": 0.52734375, + "learning_rate": 0.00013444317695243243, + "loss": 0.8783, + "step": 31320 + }, + { + "epoch": 0.45, + "grad_norm": 0.53515625, + "learning_rate": 0.0001344196716028831, + "loss": 1.051, + "step": 31325 + }, + { + "epoch": 0.45, + "grad_norm": 0.52734375, + "learning_rate": 0.0001343961640958696, + "loss": 0.8848, + "step": 31330 + }, + { + "epoch": 0.45, + "grad_norm": 0.50390625, + "learning_rate": 0.0001343726544328653, + "loss": 0.9742, + "step": 31335 + }, + { + "epoch": 0.45, + "grad_norm": 0.5859375, + "learning_rate": 0.0001343491426153439, + "loss": 1.0643, + "step": 31340 + }, + { + "epoch": 0.45, + "grad_norm": 0.546875, + "learning_rate": 0.00013432562864477916, + "loss": 0.8818, + "step": 31345 + }, + { + "epoch": 0.45, + "grad_norm": 0.640625, + "learning_rate": 0.0001343021125226449, + "loss": 0.9056, + "step": 31350 + }, + { + "epoch": 0.45, + "grad_norm": 0.546875, + "learning_rate": 0.00013427859425041514, + "loss": 0.929, + "step": 31355 + }, + { + "epoch": 0.45, + "grad_norm": 0.55078125, + "learning_rate": 0.00013425507382956405, + "loss": 0.8192, + "step": 31360 + }, + { + "epoch": 0.45, + "grad_norm": 0.55078125, + "learning_rate": 0.00013423155126156592, + "loss": 1.0017, + "step": 31365 + }, + { + "epoch": 0.45, + "grad_norm": 0.65234375, + "learning_rate": 0.0001342080265478952, + "loss": 0.981, + "step": 31370 + }, + { + "epoch": 0.45, + "grad_norm": 0.57421875, + "learning_rate": 0.00013418449969002636, + "loss": 0.9689, + "step": 31375 + }, + { + "epoch": 0.45, + "grad_norm": 0.5546875, + "learning_rate": 0.00013416097068943422, + "loss": 0.8402, + "step": 31380 + }, + { + "epoch": 0.45, + "grad_norm": 0.62890625, + "learning_rate": 0.0001341374395475935, + "loss": 0.9719, + "step": 31385 + }, + { + "epoch": 0.45, + "grad_norm": 0.53125, + "learning_rate": 0.00013411390626597917, + "loss": 1.0305, + "step": 31390 + }, + { + "epoch": 0.45, + "grad_norm": 0.498046875, + "learning_rate": 0.00013409037084606635, + "loss": 0.9859, + "step": 31395 + }, + { + "epoch": 0.45, + "grad_norm": 0.54296875, + "learning_rate": 0.0001340668332893303, + "loss": 1.1799, + "step": 31400 + }, + { + "epoch": 0.45, + "grad_norm": 0.640625, + "learning_rate": 0.0001340432935972463, + "loss": 1.0158, + "step": 31405 + }, + { + "epoch": 0.45, + "grad_norm": 0.48828125, + "learning_rate": 0.0001340197517712899, + "loss": 0.8952, + "step": 31410 + }, + { + "epoch": 0.45, + "grad_norm": 0.5546875, + "learning_rate": 0.0001339962078129367, + "loss": 0.9101, + "step": 31415 + }, + { + "epoch": 0.45, + "grad_norm": 0.6484375, + "learning_rate": 0.00013397266172366246, + "loss": 1.0594, + "step": 31420 + }, + { + "epoch": 0.45, + "grad_norm": 0.50390625, + "learning_rate": 0.00013394911350494314, + "loss": 0.8013, + "step": 31425 + }, + { + "epoch": 0.45, + "grad_norm": 0.5625, + "learning_rate": 0.0001339255631582547, + "loss": 0.9715, + "step": 31430 + }, + { + "epoch": 0.45, + "grad_norm": 0.5078125, + "learning_rate": 0.0001339020106850733, + "loss": 0.9856, + "step": 31435 + }, + { + "epoch": 0.45, + "grad_norm": 0.49609375, + "learning_rate": 0.00013387845608687528, + "loss": 0.9213, + "step": 31440 + }, + { + "epoch": 0.45, + "grad_norm": 0.625, + "learning_rate": 0.00013385489936513703, + "loss": 1.1167, + "step": 31445 + }, + { + "epoch": 0.45, + "grad_norm": 0.515625, + "learning_rate": 0.00013383134052133517, + "loss": 0.9305, + "step": 31450 + }, + { + "epoch": 0.45, + "grad_norm": 0.5546875, + "learning_rate": 0.00013380777955694634, + "loss": 0.9467, + "step": 31455 + }, + { + "epoch": 0.45, + "grad_norm": 0.6015625, + "learning_rate": 0.00013378421647344737, + "loss": 0.8589, + "step": 31460 + }, + { + "epoch": 0.45, + "grad_norm": 0.515625, + "learning_rate": 0.00013376065127231523, + "loss": 0.9975, + "step": 31465 + }, + { + "epoch": 0.45, + "grad_norm": 0.5390625, + "learning_rate": 0.00013373708395502705, + "loss": 0.9272, + "step": 31470 + }, + { + "epoch": 0.45, + "grad_norm": 0.5625, + "learning_rate": 0.00013371351452306, + "loss": 1.0828, + "step": 31475 + }, + { + "epoch": 0.45, + "grad_norm": 0.5234375, + "learning_rate": 0.00013368994297789145, + "loss": 0.9316, + "step": 31480 + }, + { + "epoch": 0.45, + "grad_norm": 0.5546875, + "learning_rate": 0.00013366636932099892, + "loss": 1.1062, + "step": 31485 + }, + { + "epoch": 0.45, + "grad_norm": 0.6015625, + "learning_rate": 0.00013364279355386003, + "loss": 0.975, + "step": 31490 + }, + { + "epoch": 0.45, + "grad_norm": 0.48828125, + "learning_rate": 0.0001336192156779525, + "loss": 0.8687, + "step": 31495 + }, + { + "epoch": 0.45, + "grad_norm": 0.62890625, + "learning_rate": 0.00013359563569475424, + "loss": 1.0445, + "step": 31500 + }, + { + "epoch": 0.45, + "grad_norm": 0.52734375, + "learning_rate": 0.0001335720536057433, + "loss": 0.8226, + "step": 31505 + }, + { + "epoch": 0.45, + "grad_norm": 0.61328125, + "learning_rate": 0.0001335484694123978, + "loss": 0.8799, + "step": 31510 + }, + { + "epoch": 0.45, + "grad_norm": 0.55859375, + "learning_rate": 0.00013352488311619602, + "loss": 0.9934, + "step": 31515 + }, + { + "epoch": 0.45, + "grad_norm": 0.5078125, + "learning_rate": 0.00013350129471861638, + "loss": 0.7989, + "step": 31520 + }, + { + "epoch": 0.45, + "grad_norm": 0.6015625, + "learning_rate": 0.00013347770422113744, + "loss": 1.0236, + "step": 31525 + }, + { + "epoch": 0.45, + "grad_norm": 0.5703125, + "learning_rate": 0.00013345411162523786, + "loss": 0.9755, + "step": 31530 + }, + { + "epoch": 0.45, + "grad_norm": 0.59375, + "learning_rate": 0.00013343051693239647, + "loss": 1.1008, + "step": 31535 + }, + { + "epoch": 0.45, + "grad_norm": 0.59765625, + "learning_rate": 0.00013340692014409222, + "loss": 0.9241, + "step": 31540 + }, + { + "epoch": 0.45, + "grad_norm": 0.58203125, + "learning_rate": 0.00013338332126180415, + "loss": 1.1343, + "step": 31545 + }, + { + "epoch": 0.45, + "grad_norm": 0.578125, + "learning_rate": 0.00013335972028701148, + "loss": 0.9986, + "step": 31550 + }, + { + "epoch": 0.45, + "grad_norm": 0.55859375, + "learning_rate": 0.00013333611722119357, + "loss": 0.9672, + "step": 31555 + }, + { + "epoch": 0.45, + "grad_norm": 0.54296875, + "learning_rate": 0.00013331251206582983, + "loss": 1.0488, + "step": 31560 + }, + { + "epoch": 0.45, + "grad_norm": 0.6875, + "learning_rate": 0.00013328890482239994, + "loss": 1.0026, + "step": 31565 + }, + { + "epoch": 0.45, + "grad_norm": 0.609375, + "learning_rate": 0.00013326529549238352, + "loss": 1.0177, + "step": 31570 + }, + { + "epoch": 0.45, + "grad_norm": 0.56640625, + "learning_rate": 0.00013324168407726056, + "loss": 0.9506, + "step": 31575 + }, + { + "epoch": 0.45, + "grad_norm": 0.6328125, + "learning_rate": 0.00013321807057851094, + "loss": 0.9342, + "step": 31580 + }, + { + "epoch": 0.45, + "grad_norm": 0.56640625, + "learning_rate": 0.00013319445499761486, + "loss": 1.0342, + "step": 31585 + }, + { + "epoch": 0.45, + "grad_norm": 0.52734375, + "learning_rate": 0.00013317083733605252, + "loss": 1.0099, + "step": 31590 + }, + { + "epoch": 0.45, + "grad_norm": 0.5234375, + "learning_rate": 0.0001331472175953043, + "loss": 0.9359, + "step": 31595 + }, + { + "epoch": 0.45, + "grad_norm": 0.5546875, + "learning_rate": 0.00013312359577685072, + "loss": 0.971, + "step": 31600 + }, + { + "epoch": 0.45, + "grad_norm": 0.546875, + "learning_rate": 0.00013309997188217247, + "loss": 0.93, + "step": 31605 + }, + { + "epoch": 0.45, + "grad_norm": 0.51953125, + "learning_rate": 0.00013307634591275028, + "loss": 0.9013, + "step": 31610 + }, + { + "epoch": 0.45, + "grad_norm": 0.54296875, + "learning_rate": 0.00013305271787006503, + "loss": 0.9941, + "step": 31615 + }, + { + "epoch": 0.45, + "grad_norm": 0.6328125, + "learning_rate": 0.0001330290877555978, + "loss": 1.1095, + "step": 31620 + }, + { + "epoch": 0.45, + "grad_norm": 0.50390625, + "learning_rate": 0.0001330054555708297, + "loss": 0.9006, + "step": 31625 + }, + { + "epoch": 0.45, + "grad_norm": 0.515625, + "learning_rate": 0.00013298182131724211, + "loss": 0.9277, + "step": 31630 + }, + { + "epoch": 0.45, + "grad_norm": 0.640625, + "learning_rate": 0.00013295818499631636, + "loss": 1.009, + "step": 31635 + }, + { + "epoch": 0.45, + "grad_norm": 0.5234375, + "learning_rate": 0.00013293454660953403, + "loss": 0.9823, + "step": 31640 + }, + { + "epoch": 0.45, + "grad_norm": 0.55078125, + "learning_rate": 0.00013291090615837685, + "loss": 1.0153, + "step": 31645 + }, + { + "epoch": 0.45, + "grad_norm": 0.50390625, + "learning_rate": 0.00013288726364432652, + "loss": 0.8506, + "step": 31650 + }, + { + "epoch": 0.45, + "grad_norm": 0.73828125, + "learning_rate": 0.00013286361906886512, + "loss": 1.0333, + "step": 31655 + }, + { + "epoch": 0.45, + "grad_norm": 0.57421875, + "learning_rate": 0.00013283997243347464, + "loss": 1.1103, + "step": 31660 + }, + { + "epoch": 0.45, + "grad_norm": 0.578125, + "learning_rate": 0.00013281632373963727, + "loss": 0.9147, + "step": 31665 + }, + { + "epoch": 0.45, + "grad_norm": 0.61328125, + "learning_rate": 0.00013279267298883535, + "loss": 0.9931, + "step": 31670 + }, + { + "epoch": 0.45, + "grad_norm": 0.59375, + "learning_rate": 0.00013276902018255132, + "loss": 1.0037, + "step": 31675 + }, + { + "epoch": 0.45, + "grad_norm": 0.609375, + "learning_rate": 0.0001327453653222678, + "loss": 0.9983, + "step": 31680 + }, + { + "epoch": 0.45, + "grad_norm": 0.61328125, + "learning_rate": 0.00013272170840946754, + "loss": 1.1003, + "step": 31685 + }, + { + "epoch": 0.45, + "grad_norm": 0.5390625, + "learning_rate": 0.00013269804944563327, + "loss": 1.095, + "step": 31690 + }, + { + "epoch": 0.45, + "grad_norm": 0.59375, + "learning_rate": 0.00013267438843224803, + "loss": 1.0487, + "step": 31695 + }, + { + "epoch": 0.45, + "grad_norm": 0.64453125, + "learning_rate": 0.00013265072537079492, + "loss": 1.1067, + "step": 31700 + }, + { + "epoch": 0.45, + "grad_norm": 0.56640625, + "learning_rate": 0.00013262706026275712, + "loss": 0.8894, + "step": 31705 + }, + { + "epoch": 0.45, + "grad_norm": 0.59375, + "learning_rate": 0.00013260339310961806, + "loss": 0.8737, + "step": 31710 + }, + { + "epoch": 0.45, + "grad_norm": 0.462890625, + "learning_rate": 0.0001325797239128612, + "loss": 0.7964, + "step": 31715 + }, + { + "epoch": 0.46, + "grad_norm": 0.59375, + "learning_rate": 0.00013255605267397012, + "loss": 0.8535, + "step": 31720 + }, + { + "epoch": 0.46, + "grad_norm": 0.6484375, + "learning_rate": 0.0001325323793944286, + "loss": 1.0003, + "step": 31725 + }, + { + "epoch": 0.46, + "grad_norm": 0.625, + "learning_rate": 0.00013250870407572045, + "loss": 0.8312, + "step": 31730 + }, + { + "epoch": 0.46, + "grad_norm": 0.59765625, + "learning_rate": 0.00013248502671932971, + "loss": 1.0002, + "step": 31735 + }, + { + "epoch": 0.46, + "grad_norm": 0.55859375, + "learning_rate": 0.00013246134732674056, + "loss": 0.912, + "step": 31740 + }, + { + "epoch": 0.46, + "grad_norm": 0.58984375, + "learning_rate": 0.0001324376658994371, + "loss": 0.928, + "step": 31745 + }, + { + "epoch": 0.46, + "grad_norm": 0.6640625, + "learning_rate": 0.00013241398243890386, + "loss": 1.0814, + "step": 31750 + }, + { + "epoch": 0.46, + "grad_norm": 0.4140625, + "learning_rate": 0.00013239029694662527, + "loss": 0.7331, + "step": 31755 + }, + { + "epoch": 0.46, + "grad_norm": 0.5625, + "learning_rate": 0.00013236660942408596, + "loss": 0.9826, + "step": 31760 + }, + { + "epoch": 0.46, + "grad_norm": 0.56640625, + "learning_rate": 0.00013234291987277076, + "loss": 0.8716, + "step": 31765 + }, + { + "epoch": 0.46, + "grad_norm": 0.5234375, + "learning_rate": 0.00013231922829416443, + "loss": 0.8813, + "step": 31770 + }, + { + "epoch": 0.46, + "grad_norm": 0.546875, + "learning_rate": 0.0001322955346897521, + "loss": 1.0243, + "step": 31775 + }, + { + "epoch": 0.46, + "grad_norm": 0.5703125, + "learning_rate": 0.0001322718390610189, + "loss": 0.9718, + "step": 31780 + }, + { + "epoch": 0.46, + "grad_norm": 0.5703125, + "learning_rate": 0.00013224814140945003, + "loss": 0.838, + "step": 31785 + }, + { + "epoch": 0.46, + "grad_norm": 0.55859375, + "learning_rate": 0.00013222444173653097, + "loss": 0.8757, + "step": 31790 + }, + { + "epoch": 0.46, + "grad_norm": 0.58203125, + "learning_rate": 0.0001322007400437472, + "loss": 0.9052, + "step": 31795 + }, + { + "epoch": 0.46, + "grad_norm": 0.54296875, + "learning_rate": 0.00013217703633258433, + "loss": 0.9327, + "step": 31800 + }, + { + "epoch": 0.46, + "grad_norm": 0.5546875, + "learning_rate": 0.00013215333060452816, + "loss": 0.9738, + "step": 31805 + }, + { + "epoch": 0.46, + "grad_norm": 0.546875, + "learning_rate": 0.00013212962286106468, + "loss": 0.8292, + "step": 31810 + }, + { + "epoch": 0.46, + "grad_norm": 0.59375, + "learning_rate": 0.00013210591310367978, + "loss": 0.9225, + "step": 31815 + }, + { + "epoch": 0.46, + "grad_norm": 0.7109375, + "learning_rate": 0.00013208220133385974, + "loss": 1.0917, + "step": 31820 + }, + { + "epoch": 0.46, + "grad_norm": 0.63671875, + "learning_rate": 0.00013205848755309073, + "loss": 0.9889, + "step": 31825 + }, + { + "epoch": 0.46, + "grad_norm": 0.5234375, + "learning_rate": 0.00013203477176285924, + "loss": 0.9825, + "step": 31830 + }, + { + "epoch": 0.46, + "grad_norm": 0.59765625, + "learning_rate": 0.00013201105396465178, + "loss": 1.0294, + "step": 31835 + }, + { + "epoch": 0.46, + "grad_norm": 0.75, + "learning_rate": 0.00013198733415995494, + "loss": 1.1127, + "step": 31840 + }, + { + "epoch": 0.46, + "grad_norm": 0.58984375, + "learning_rate": 0.00013196361235025562, + "loss": 0.8949, + "step": 31845 + }, + { + "epoch": 0.46, + "grad_norm": 0.5625, + "learning_rate": 0.00013193988853704068, + "loss": 0.845, + "step": 31850 + }, + { + "epoch": 0.46, + "grad_norm": 0.5390625, + "learning_rate": 0.00013191616272179713, + "loss": 0.8679, + "step": 31855 + }, + { + "epoch": 0.46, + "grad_norm": 0.60546875, + "learning_rate": 0.00013189243490601215, + "loss": 0.9756, + "step": 31860 + }, + { + "epoch": 0.46, + "grad_norm": 0.53515625, + "learning_rate": 0.00013186870509117302, + "loss": 1.0327, + "step": 31865 + }, + { + "epoch": 0.46, + "grad_norm": 0.5859375, + "learning_rate": 0.00013184497327876717, + "loss": 0.9826, + "step": 31870 + }, + { + "epoch": 0.46, + "grad_norm": 0.5703125, + "learning_rate": 0.00013182123947028216, + "loss": 0.7791, + "step": 31875 + }, + { + "epoch": 0.46, + "grad_norm": 0.578125, + "learning_rate": 0.00013179750366720556, + "loss": 0.9861, + "step": 31880 + }, + { + "epoch": 0.46, + "grad_norm": 0.6015625, + "learning_rate": 0.00013177376587102522, + "loss": 0.9623, + "step": 31885 + }, + { + "epoch": 0.46, + "grad_norm": 0.5625, + "learning_rate": 0.0001317500260832291, + "loss": 0.9592, + "step": 31890 + }, + { + "epoch": 0.46, + "grad_norm": 0.55859375, + "learning_rate": 0.00013172628430530513, + "loss": 0.9209, + "step": 31895 + }, + { + "epoch": 0.46, + "grad_norm": 0.5390625, + "learning_rate": 0.00013170254053874157, + "loss": 0.7683, + "step": 31900 + }, + { + "epoch": 0.46, + "grad_norm": 0.515625, + "learning_rate": 0.00013167879478502665, + "loss": 0.9936, + "step": 31905 + }, + { + "epoch": 0.46, + "grad_norm": 0.53515625, + "learning_rate": 0.00013165504704564876, + "loss": 1.0267, + "step": 31910 + }, + { + "epoch": 0.46, + "grad_norm": 0.5234375, + "learning_rate": 0.00013163129732209652, + "loss": 0.9512, + "step": 31915 + }, + { + "epoch": 0.46, + "grad_norm": 0.60546875, + "learning_rate": 0.0001316075456158585, + "loss": 0.9878, + "step": 31920 + }, + { + "epoch": 0.46, + "grad_norm": 0.5234375, + "learning_rate": 0.00013158379192842353, + "loss": 1.0096, + "step": 31925 + }, + { + "epoch": 0.46, + "grad_norm": 0.5078125, + "learning_rate": 0.00013156003626128054, + "loss": 0.9902, + "step": 31930 + }, + { + "epoch": 0.46, + "grad_norm": 0.52734375, + "learning_rate": 0.00013153627861591847, + "loss": 0.7962, + "step": 31935 + }, + { + "epoch": 0.46, + "grad_norm": 0.59765625, + "learning_rate": 0.00013151251899382662, + "loss": 0.873, + "step": 31940 + }, + { + "epoch": 0.46, + "grad_norm": 0.62890625, + "learning_rate": 0.00013148875739649413, + "loss": 0.9694, + "step": 31945 + }, + { + "epoch": 0.46, + "grad_norm": 0.6875, + "learning_rate": 0.00013146499382541048, + "loss": 1.0138, + "step": 31950 + }, + { + "epoch": 0.46, + "grad_norm": 0.53515625, + "learning_rate": 0.00013144122828206523, + "loss": 0.9132, + "step": 31955 + }, + { + "epoch": 0.46, + "grad_norm": 0.53515625, + "learning_rate": 0.00013141746076794793, + "loss": 0.9452, + "step": 31960 + }, + { + "epoch": 0.46, + "grad_norm": 0.5859375, + "learning_rate": 0.0001313936912845484, + "loss": 1.0403, + "step": 31965 + }, + { + "epoch": 0.46, + "grad_norm": 0.5625, + "learning_rate": 0.00013136991983335656, + "loss": 0.9956, + "step": 31970 + }, + { + "epoch": 0.46, + "grad_norm": 0.6484375, + "learning_rate": 0.00013134614641586244, + "loss": 0.8793, + "step": 31975 + }, + { + "epoch": 0.46, + "grad_norm": 0.57421875, + "learning_rate": 0.00013132237103355613, + "loss": 0.9511, + "step": 31980 + }, + { + "epoch": 0.46, + "grad_norm": 0.57421875, + "learning_rate": 0.00013129859368792794, + "loss": 0.9065, + "step": 31985 + }, + { + "epoch": 0.46, + "grad_norm": 0.54296875, + "learning_rate": 0.00013127481438046824, + "loss": 0.9021, + "step": 31990 + }, + { + "epoch": 0.46, + "grad_norm": 0.5625, + "learning_rate": 0.00013125103311266756, + "loss": 0.982, + "step": 31995 + }, + { + "epoch": 0.46, + "grad_norm": 0.45703125, + "learning_rate": 0.00013122724988601656, + "loss": 0.8726, + "step": 32000 + }, + { + "epoch": 0.46, + "grad_norm": 0.609375, + "learning_rate": 0.00013120346470200594, + "loss": 0.845, + "step": 32005 + }, + { + "epoch": 0.46, + "grad_norm": 0.53515625, + "learning_rate": 0.00013117967756212667, + "loss": 1.1373, + "step": 32010 + }, + { + "epoch": 0.46, + "grad_norm": 0.55859375, + "learning_rate": 0.00013115588846786963, + "loss": 0.9724, + "step": 32015 + }, + { + "epoch": 0.46, + "grad_norm": 0.56640625, + "learning_rate": 0.00013113209742072606, + "loss": 1.0094, + "step": 32020 + }, + { + "epoch": 0.46, + "grad_norm": 0.609375, + "learning_rate": 0.00013110830442218714, + "loss": 0.9913, + "step": 32025 + }, + { + "epoch": 0.46, + "grad_norm": 0.5234375, + "learning_rate": 0.0001310845094737443, + "loss": 0.919, + "step": 32030 + }, + { + "epoch": 0.46, + "grad_norm": 0.67578125, + "learning_rate": 0.00013106071257688897, + "loss": 1.0586, + "step": 32035 + }, + { + "epoch": 0.46, + "grad_norm": 0.546875, + "learning_rate": 0.0001310369137331128, + "loss": 1.0326, + "step": 32040 + }, + { + "epoch": 0.46, + "grad_norm": 0.54296875, + "learning_rate": 0.00013101311294390756, + "loss": 0.996, + "step": 32045 + }, + { + "epoch": 0.46, + "grad_norm": 0.58984375, + "learning_rate": 0.00013098931021076506, + "loss": 1.1005, + "step": 32050 + }, + { + "epoch": 0.46, + "grad_norm": 0.640625, + "learning_rate": 0.00013096550553517734, + "loss": 0.9351, + "step": 32055 + }, + { + "epoch": 0.46, + "grad_norm": 0.56640625, + "learning_rate": 0.0001309416989186364, + "loss": 0.8718, + "step": 32060 + }, + { + "epoch": 0.46, + "grad_norm": 0.56640625, + "learning_rate": 0.0001309178903626346, + "loss": 0.9508, + "step": 32065 + }, + { + "epoch": 0.46, + "grad_norm": 0.56640625, + "learning_rate": 0.00013089407986866414, + "loss": 0.9767, + "step": 32070 + }, + { + "epoch": 0.46, + "grad_norm": 0.484375, + "learning_rate": 0.00013087026743821765, + "loss": 0.9856, + "step": 32075 + }, + { + "epoch": 0.46, + "grad_norm": 0.56640625, + "learning_rate": 0.00013084645307278762, + "loss": 0.9379, + "step": 32080 + }, + { + "epoch": 0.46, + "grad_norm": 0.609375, + "learning_rate": 0.00013082263677386674, + "loss": 0.9073, + "step": 32085 + }, + { + "epoch": 0.46, + "grad_norm": 0.515625, + "learning_rate": 0.00013079881854294792, + "loss": 0.7946, + "step": 32090 + }, + { + "epoch": 0.46, + "grad_norm": 0.5546875, + "learning_rate": 0.00013077499838152405, + "loss": 0.9327, + "step": 32095 + }, + { + "epoch": 0.46, + "grad_norm": 0.66796875, + "learning_rate": 0.00013075117629108825, + "loss": 0.963, + "step": 32100 + }, + { + "epoch": 0.46, + "grad_norm": 0.5859375, + "learning_rate": 0.0001307273522731337, + "loss": 1.0912, + "step": 32105 + }, + { + "epoch": 0.46, + "grad_norm": 0.75, + "learning_rate": 0.0001307035263291537, + "loss": 1.0582, + "step": 32110 + }, + { + "epoch": 0.46, + "grad_norm": 0.52734375, + "learning_rate": 0.0001306796984606417, + "loss": 1.0168, + "step": 32115 + }, + { + "epoch": 0.46, + "grad_norm": 0.56640625, + "learning_rate": 0.00013065586866909128, + "loss": 0.9312, + "step": 32120 + }, + { + "epoch": 0.46, + "grad_norm": 0.625, + "learning_rate": 0.00013063203695599606, + "loss": 1.0982, + "step": 32125 + }, + { + "epoch": 0.46, + "grad_norm": 1.09375, + "learning_rate": 0.0001306082033228499, + "loss": 1.0406, + "step": 32130 + }, + { + "epoch": 0.46, + "grad_norm": 0.609375, + "learning_rate": 0.00013058436777114673, + "loss": 1.037, + "step": 32135 + }, + { + "epoch": 0.46, + "grad_norm": 0.58203125, + "learning_rate": 0.0001305605303023805, + "loss": 0.8963, + "step": 32140 + }, + { + "epoch": 0.46, + "grad_norm": 0.453125, + "learning_rate": 0.00013053669091804546, + "loss": 0.9866, + "step": 32145 + }, + { + "epoch": 0.46, + "grad_norm": 0.61328125, + "learning_rate": 0.00013051284961963585, + "loss": 1.0073, + "step": 32150 + }, + { + "epoch": 0.46, + "grad_norm": 0.6640625, + "learning_rate": 0.00013048900640864606, + "loss": 1.0038, + "step": 32155 + }, + { + "epoch": 0.46, + "grad_norm": 0.5390625, + "learning_rate": 0.00013046516128657065, + "loss": 1.0067, + "step": 32160 + }, + { + "epoch": 0.46, + "grad_norm": 0.52734375, + "learning_rate": 0.00013044131425490418, + "loss": 0.9225, + "step": 32165 + }, + { + "epoch": 0.46, + "grad_norm": 0.54296875, + "learning_rate": 0.0001304174653151415, + "loss": 0.8713, + "step": 32170 + }, + { + "epoch": 0.46, + "grad_norm": 0.50390625, + "learning_rate": 0.00013039361446877745, + "loss": 0.8281, + "step": 32175 + }, + { + "epoch": 0.46, + "grad_norm": 0.54296875, + "learning_rate": 0.000130369761717307, + "loss": 0.9131, + "step": 32180 + }, + { + "epoch": 0.46, + "grad_norm": 0.58984375, + "learning_rate": 0.00013034590706222538, + "loss": 0.891, + "step": 32185 + }, + { + "epoch": 0.46, + "grad_norm": 0.5703125, + "learning_rate": 0.0001303220505050277, + "loss": 1.1046, + "step": 32190 + }, + { + "epoch": 0.46, + "grad_norm": 0.5703125, + "learning_rate": 0.00013029819204720932, + "loss": 1.0849, + "step": 32195 + }, + { + "epoch": 0.46, + "grad_norm": 0.5703125, + "learning_rate": 0.0001302743316902658, + "loss": 0.9852, + "step": 32200 + }, + { + "epoch": 0.46, + "grad_norm": 0.58203125, + "learning_rate": 0.00013025046943569268, + "loss": 0.9682, + "step": 32205 + }, + { + "epoch": 0.46, + "grad_norm": 0.6328125, + "learning_rate": 0.00013022660528498568, + "loss": 0.937, + "step": 32210 + }, + { + "epoch": 0.46, + "grad_norm": 0.51171875, + "learning_rate": 0.00013020273923964064, + "loss": 0.8642, + "step": 32215 + }, + { + "epoch": 0.46, + "grad_norm": 0.609375, + "learning_rate": 0.00013017887130115349, + "loss": 0.8892, + "step": 32220 + }, + { + "epoch": 0.46, + "grad_norm": 0.48046875, + "learning_rate": 0.00013015500147102032, + "loss": 0.9228, + "step": 32225 + }, + { + "epoch": 0.46, + "grad_norm": 0.5703125, + "learning_rate": 0.00013013112975073733, + "loss": 1.0077, + "step": 32230 + }, + { + "epoch": 0.46, + "grad_norm": 0.5234375, + "learning_rate": 0.0001301072561418008, + "loss": 1.0031, + "step": 32235 + }, + { + "epoch": 0.46, + "grad_norm": 0.5390625, + "learning_rate": 0.00013008338064570717, + "loss": 0.9054, + "step": 32240 + }, + { + "epoch": 0.46, + "grad_norm": 0.55859375, + "learning_rate": 0.000130059503263953, + "loss": 1.0037, + "step": 32245 + }, + { + "epoch": 0.46, + "grad_norm": 0.5703125, + "learning_rate": 0.00013003562399803488, + "loss": 0.8962, + "step": 32250 + }, + { + "epoch": 0.46, + "grad_norm": 0.5546875, + "learning_rate": 0.00013001174284944968, + "loss": 0.8093, + "step": 32255 + }, + { + "epoch": 0.46, + "grad_norm": 0.56640625, + "learning_rate": 0.00012998785981969423, + "loss": 0.9555, + "step": 32260 + }, + { + "epoch": 0.46, + "grad_norm": 0.55859375, + "learning_rate": 0.00012996397491026558, + "loss": 0.9951, + "step": 32265 + }, + { + "epoch": 0.46, + "grad_norm": 0.5546875, + "learning_rate": 0.0001299400881226609, + "loss": 0.764, + "step": 32270 + }, + { + "epoch": 0.46, + "grad_norm": 0.4765625, + "learning_rate": 0.00012991619945837735, + "loss": 0.9576, + "step": 32275 + }, + { + "epoch": 0.46, + "grad_norm": 0.62109375, + "learning_rate": 0.00012989230891891236, + "loss": 0.8473, + "step": 32280 + }, + { + "epoch": 0.46, + "grad_norm": 0.5078125, + "learning_rate": 0.0001298684165057634, + "loss": 0.803, + "step": 32285 + }, + { + "epoch": 0.46, + "grad_norm": 0.55859375, + "learning_rate": 0.0001298445222204281, + "loss": 0.8959, + "step": 32290 + }, + { + "epoch": 0.46, + "grad_norm": 0.6484375, + "learning_rate": 0.00012982062606440412, + "loss": 1.0361, + "step": 32295 + }, + { + "epoch": 0.46, + "grad_norm": 0.546875, + "learning_rate": 0.00012979672803918938, + "loss": 0.9159, + "step": 32300 + }, + { + "epoch": 0.46, + "grad_norm": 0.56640625, + "learning_rate": 0.00012977282814628172, + "loss": 1.0206, + "step": 32305 + }, + { + "epoch": 0.46, + "grad_norm": 0.5703125, + "learning_rate": 0.00012974892638717932, + "loss": 0.9815, + "step": 32310 + }, + { + "epoch": 0.46, + "grad_norm": 0.5859375, + "learning_rate": 0.00012972502276338034, + "loss": 1.0164, + "step": 32315 + }, + { + "epoch": 0.46, + "grad_norm": 0.53125, + "learning_rate": 0.0001297011172763831, + "loss": 1.0121, + "step": 32320 + }, + { + "epoch": 0.46, + "grad_norm": 0.69140625, + "learning_rate": 0.00012967720992768596, + "loss": 1.0233, + "step": 32325 + }, + { + "epoch": 0.46, + "grad_norm": 0.59765625, + "learning_rate": 0.00012965330071878752, + "loss": 1.0093, + "step": 32330 + }, + { + "epoch": 0.46, + "grad_norm": 0.5390625, + "learning_rate": 0.00012962938965118643, + "loss": 0.935, + "step": 32335 + }, + { + "epoch": 0.46, + "grad_norm": 0.5078125, + "learning_rate": 0.00012960547672638144, + "loss": 0.9044, + "step": 32340 + }, + { + "epoch": 0.46, + "grad_norm": 0.640625, + "learning_rate": 0.00012958156194587146, + "loss": 1.0922, + "step": 32345 + }, + { + "epoch": 0.46, + "grad_norm": 0.52734375, + "learning_rate": 0.00012955764531115548, + "loss": 0.9507, + "step": 32350 + }, + { + "epoch": 0.46, + "grad_norm": 0.52734375, + "learning_rate": 0.00012953372682373264, + "loss": 0.9014, + "step": 32355 + }, + { + "epoch": 0.46, + "grad_norm": 0.52734375, + "learning_rate": 0.00012950980648510213, + "loss": 1.078, + "step": 32360 + }, + { + "epoch": 0.46, + "grad_norm": 0.578125, + "learning_rate": 0.00012948588429676335, + "loss": 1.0288, + "step": 32365 + }, + { + "epoch": 0.46, + "grad_norm": 0.6015625, + "learning_rate": 0.00012946196026021578, + "loss": 1.0478, + "step": 32370 + }, + { + "epoch": 0.46, + "grad_norm": 0.58203125, + "learning_rate": 0.000129438034376959, + "loss": 0.9876, + "step": 32375 + }, + { + "epoch": 0.46, + "grad_norm": 0.515625, + "learning_rate": 0.0001294141066484927, + "loss": 0.7554, + "step": 32380 + }, + { + "epoch": 0.46, + "grad_norm": 0.53515625, + "learning_rate": 0.00012939017707631664, + "loss": 0.9796, + "step": 32385 + }, + { + "epoch": 0.46, + "grad_norm": 0.57421875, + "learning_rate": 0.00012936624566193086, + "loss": 1.0194, + "step": 32390 + }, + { + "epoch": 0.46, + "grad_norm": 0.5, + "learning_rate": 0.00012934231240683536, + "loss": 0.8431, + "step": 32395 + }, + { + "epoch": 0.46, + "grad_norm": 0.5390625, + "learning_rate": 0.00012931837731253027, + "loss": 0.9893, + "step": 32400 + }, + { + "epoch": 0.46, + "grad_norm": 0.6640625, + "learning_rate": 0.0001292944403805159, + "loss": 1.1741, + "step": 32405 + }, + { + "epoch": 0.46, + "grad_norm": 0.5390625, + "learning_rate": 0.00012927050161229265, + "loss": 0.6753, + "step": 32410 + }, + { + "epoch": 0.46, + "grad_norm": 0.578125, + "learning_rate": 0.00012924656100936103, + "loss": 0.9545, + "step": 32415 + }, + { + "epoch": 0.47, + "grad_norm": 0.51171875, + "learning_rate": 0.0001292226185732217, + "loss": 1.087, + "step": 32420 + }, + { + "epoch": 0.47, + "grad_norm": 0.578125, + "learning_rate": 0.00012919867430537525, + "loss": 1.0627, + "step": 32425 + }, + { + "epoch": 0.47, + "grad_norm": 0.515625, + "learning_rate": 0.00012917472820732272, + "loss": 1.1009, + "step": 32430 + }, + { + "epoch": 0.47, + "grad_norm": 0.5390625, + "learning_rate": 0.00012915078028056498, + "loss": 0.9163, + "step": 32435 + }, + { + "epoch": 0.47, + "grad_norm": 0.51953125, + "learning_rate": 0.00012912683052660313, + "loss": 0.922, + "step": 32440 + }, + { + "epoch": 0.47, + "grad_norm": 0.6953125, + "learning_rate": 0.00012910287894693834, + "loss": 0.8632, + "step": 32445 + }, + { + "epoch": 0.47, + "grad_norm": 0.62890625, + "learning_rate": 0.000129078925543072, + "loss": 1.0744, + "step": 32450 + }, + { + "epoch": 0.47, + "grad_norm": 0.546875, + "learning_rate": 0.00012905497031650548, + "loss": 0.9194, + "step": 32455 + }, + { + "epoch": 0.47, + "grad_norm": 0.494140625, + "learning_rate": 0.00012903101326874032, + "loss": 1.0821, + "step": 32460 + }, + { + "epoch": 0.47, + "grad_norm": 0.5703125, + "learning_rate": 0.00012900705440127818, + "loss": 0.9133, + "step": 32465 + }, + { + "epoch": 0.47, + "grad_norm": 0.5078125, + "learning_rate": 0.00012898309371562084, + "loss": 0.9024, + "step": 32470 + }, + { + "epoch": 0.47, + "grad_norm": 0.578125, + "learning_rate": 0.0001289591312132702, + "loss": 1.0314, + "step": 32475 + }, + { + "epoch": 0.47, + "grad_norm": 0.546875, + "learning_rate": 0.0001289351668957282, + "loss": 1.0295, + "step": 32480 + }, + { + "epoch": 0.47, + "grad_norm": 0.52734375, + "learning_rate": 0.00012891120076449699, + "loss": 0.9198, + "step": 32485 + }, + { + "epoch": 0.47, + "grad_norm": 0.56640625, + "learning_rate": 0.0001288872328210788, + "loss": 1.0714, + "step": 32490 + }, + { + "epoch": 0.47, + "grad_norm": 0.54296875, + "learning_rate": 0.00012886326306697595, + "loss": 0.9769, + "step": 32495 + }, + { + "epoch": 0.47, + "grad_norm": 0.50390625, + "learning_rate": 0.00012883929150369093, + "loss": 1.016, + "step": 32500 + }, + { + "epoch": 0.47, + "grad_norm": 0.6015625, + "learning_rate": 0.0001288153181327262, + "loss": 1.0573, + "step": 32505 + }, + { + "epoch": 0.47, + "grad_norm": 0.5546875, + "learning_rate": 0.00012879134295558457, + "loss": 1.1267, + "step": 32510 + }, + { + "epoch": 0.47, + "grad_norm": 0.56640625, + "learning_rate": 0.00012876736597376874, + "loss": 1.0102, + "step": 32515 + }, + { + "epoch": 0.47, + "grad_norm": 0.5625, + "learning_rate": 0.00012874338718878167, + "loss": 0.9099, + "step": 32520 + }, + { + "epoch": 0.47, + "grad_norm": 0.52734375, + "learning_rate": 0.00012871940660212636, + "loss": 1.0459, + "step": 32525 + }, + { + "epoch": 0.47, + "grad_norm": 0.6015625, + "learning_rate": 0.00012869542421530594, + "loss": 1.0324, + "step": 32530 + }, + { + "epoch": 0.47, + "grad_norm": 0.5703125, + "learning_rate": 0.0001286714400298236, + "loss": 0.8854, + "step": 32535 + }, + { + "epoch": 0.47, + "grad_norm": 0.5234375, + "learning_rate": 0.0001286474540471828, + "loss": 1.0234, + "step": 32540 + }, + { + "epoch": 0.47, + "grad_norm": 0.578125, + "learning_rate": 0.00012862346626888694, + "loss": 1.0255, + "step": 32545 + }, + { + "epoch": 0.47, + "grad_norm": 0.546875, + "learning_rate": 0.00012859947669643958, + "loss": 0.9126, + "step": 32550 + }, + { + "epoch": 0.47, + "grad_norm": 0.46484375, + "learning_rate": 0.00012857548533134452, + "loss": 1.0146, + "step": 32555 + }, + { + "epoch": 0.47, + "grad_norm": 0.578125, + "learning_rate": 0.00012855149217510544, + "loss": 0.852, + "step": 32560 + }, + { + "epoch": 0.47, + "grad_norm": 0.5390625, + "learning_rate": 0.0001285274972292263, + "loss": 0.8511, + "step": 32565 + }, + { + "epoch": 0.47, + "grad_norm": 0.55078125, + "learning_rate": 0.0001285035004952112, + "loss": 1.1151, + "step": 32570 + }, + { + "epoch": 0.47, + "grad_norm": 0.546875, + "learning_rate": 0.00012847950197456416, + "loss": 1.0862, + "step": 32575 + }, + { + "epoch": 0.47, + "grad_norm": 0.58984375, + "learning_rate": 0.00012845550166878957, + "loss": 0.9294, + "step": 32580 + }, + { + "epoch": 0.47, + "grad_norm": 0.546875, + "learning_rate": 0.0001284314995793917, + "loss": 1.1179, + "step": 32585 + }, + { + "epoch": 0.47, + "grad_norm": 0.55078125, + "learning_rate": 0.000128407495707875, + "loss": 0.9428, + "step": 32590 + }, + { + "epoch": 0.47, + "grad_norm": 0.56640625, + "learning_rate": 0.00012838349005574417, + "loss": 1.0087, + "step": 32595 + }, + { + "epoch": 0.47, + "grad_norm": 0.52734375, + "learning_rate": 0.00012835948262450385, + "loss": 0.8835, + "step": 32600 + }, + { + "epoch": 0.47, + "grad_norm": 0.53515625, + "learning_rate": 0.00012833547341565887, + "loss": 1.0477, + "step": 32605 + }, + { + "epoch": 0.47, + "grad_norm": 0.58203125, + "learning_rate": 0.00012831146243071415, + "loss": 1.0284, + "step": 32610 + }, + { + "epoch": 0.47, + "grad_norm": 0.54296875, + "learning_rate": 0.00012828744967117469, + "loss": 1.0356, + "step": 32615 + }, + { + "epoch": 0.47, + "grad_norm": 0.5703125, + "learning_rate": 0.00012826343513854568, + "loss": 1.0029, + "step": 32620 + }, + { + "epoch": 0.47, + "grad_norm": 0.5546875, + "learning_rate": 0.00012823941883433236, + "loss": 0.9734, + "step": 32625 + }, + { + "epoch": 0.47, + "grad_norm": 0.62890625, + "learning_rate": 0.00012821540076004016, + "loss": 1.0066, + "step": 32630 + }, + { + "epoch": 0.47, + "grad_norm": 0.578125, + "learning_rate": 0.00012819138091717445, + "loss": 0.9152, + "step": 32635 + }, + { + "epoch": 0.47, + "grad_norm": 0.53125, + "learning_rate": 0.00012816735930724088, + "loss": 0.8258, + "step": 32640 + }, + { + "epoch": 0.47, + "grad_norm": 0.4921875, + "learning_rate": 0.00012814333593174515, + "loss": 1.0175, + "step": 32645 + }, + { + "epoch": 0.47, + "grad_norm": 0.57421875, + "learning_rate": 0.00012811931079219309, + "loss": 1.1105, + "step": 32650 + }, + { + "epoch": 0.47, + "grad_norm": 0.58984375, + "learning_rate": 0.00012809528389009058, + "loss": 0.878, + "step": 32655 + }, + { + "epoch": 0.47, + "grad_norm": 0.58203125, + "learning_rate": 0.0001280712552269437, + "loss": 0.9104, + "step": 32660 + }, + { + "epoch": 0.47, + "grad_norm": 0.546875, + "learning_rate": 0.00012804722480425857, + "loss": 1.0456, + "step": 32665 + }, + { + "epoch": 0.47, + "grad_norm": 0.53515625, + "learning_rate": 0.00012802319262354142, + "loss": 0.8883, + "step": 32670 + }, + { + "epoch": 0.47, + "grad_norm": 0.609375, + "learning_rate": 0.00012799915868629867, + "loss": 1.0246, + "step": 32675 + }, + { + "epoch": 0.47, + "grad_norm": 0.6171875, + "learning_rate": 0.00012797512299403673, + "loss": 0.9482, + "step": 32680 + }, + { + "epoch": 0.47, + "grad_norm": 0.5625, + "learning_rate": 0.00012795108554826228, + "loss": 0.9019, + "step": 32685 + }, + { + "epoch": 0.47, + "grad_norm": 0.5, + "learning_rate": 0.0001279270463504819, + "loss": 0.963, + "step": 32690 + }, + { + "epoch": 0.47, + "grad_norm": 0.65234375, + "learning_rate": 0.0001279030054022025, + "loss": 0.8933, + "step": 32695 + }, + { + "epoch": 0.47, + "grad_norm": 0.64453125, + "learning_rate": 0.00012787896270493088, + "loss": 1.0861, + "step": 32700 + }, + { + "epoch": 0.47, + "grad_norm": 0.53515625, + "learning_rate": 0.00012785491826017414, + "loss": 0.9753, + "step": 32705 + }, + { + "epoch": 0.47, + "grad_norm": 0.5234375, + "learning_rate": 0.00012783087206943942, + "loss": 0.9748, + "step": 32710 + }, + { + "epoch": 0.47, + "grad_norm": 0.57421875, + "learning_rate": 0.00012780682413423395, + "loss": 0.9589, + "step": 32715 + }, + { + "epoch": 0.47, + "grad_norm": 0.5, + "learning_rate": 0.00012778277445606506, + "loss": 0.805, + "step": 32720 + }, + { + "epoch": 0.47, + "grad_norm": 0.6171875, + "learning_rate": 0.00012775872303644021, + "loss": 1.01, + "step": 32725 + }, + { + "epoch": 0.47, + "grad_norm": 0.7109375, + "learning_rate": 0.000127734669876867, + "loss": 1.0963, + "step": 32730 + }, + { + "epoch": 0.47, + "grad_norm": 0.59375, + "learning_rate": 0.00012771061497885312, + "loss": 0.9843, + "step": 32735 + }, + { + "epoch": 0.47, + "grad_norm": 0.55859375, + "learning_rate": 0.0001276865583439063, + "loss": 0.9517, + "step": 32740 + }, + { + "epoch": 0.47, + "grad_norm": 0.478515625, + "learning_rate": 0.00012766249997353448, + "loss": 0.8473, + "step": 32745 + }, + { + "epoch": 0.47, + "grad_norm": 0.6640625, + "learning_rate": 0.00012763843986924564, + "loss": 0.919, + "step": 32750 + }, + { + "epoch": 0.47, + "grad_norm": 0.62109375, + "learning_rate": 0.00012761437803254793, + "loss": 0.949, + "step": 32755 + }, + { + "epoch": 0.47, + "grad_norm": 0.5625, + "learning_rate": 0.00012759031446494957, + "loss": 0.9639, + "step": 32760 + }, + { + "epoch": 0.47, + "grad_norm": 0.5390625, + "learning_rate": 0.00012756624916795885, + "loss": 0.9237, + "step": 32765 + }, + { + "epoch": 0.47, + "grad_norm": 0.55078125, + "learning_rate": 0.00012754218214308427, + "loss": 0.947, + "step": 32770 + }, + { + "epoch": 0.47, + "grad_norm": 0.48828125, + "learning_rate": 0.0001275181133918343, + "loss": 0.9556, + "step": 32775 + }, + { + "epoch": 0.47, + "grad_norm": 0.51953125, + "learning_rate": 0.00012749404291571766, + "loss": 1.0168, + "step": 32780 + }, + { + "epoch": 0.47, + "grad_norm": 0.5546875, + "learning_rate": 0.0001274699707162431, + "loss": 0.9174, + "step": 32785 + }, + { + "epoch": 0.47, + "grad_norm": 0.56640625, + "learning_rate": 0.0001274458967949195, + "loss": 0.8608, + "step": 32790 + }, + { + "epoch": 0.47, + "grad_norm": 0.6015625, + "learning_rate": 0.00012742182115325584, + "loss": 1.1732, + "step": 32795 + }, + { + "epoch": 0.47, + "grad_norm": 0.59765625, + "learning_rate": 0.00012739774379276117, + "loss": 1.1252, + "step": 32800 + }, + { + "epoch": 0.47, + "grad_norm": 0.67578125, + "learning_rate": 0.00012737366471494472, + "loss": 1.0629, + "step": 32805 + }, + { + "epoch": 0.47, + "grad_norm": 0.6796875, + "learning_rate": 0.0001273495839213158, + "loss": 1.0418, + "step": 32810 + }, + { + "epoch": 0.47, + "grad_norm": 0.60546875, + "learning_rate": 0.0001273255014133838, + "loss": 0.868, + "step": 32815 + }, + { + "epoch": 0.47, + "grad_norm": 0.546875, + "learning_rate": 0.00012730141719265826, + "loss": 0.9079, + "step": 32820 + }, + { + "epoch": 0.47, + "grad_norm": 0.60546875, + "learning_rate": 0.0001272773312606488, + "loss": 0.9518, + "step": 32825 + }, + { + "epoch": 0.47, + "grad_norm": 0.65625, + "learning_rate": 0.00012725324361886515, + "loss": 0.9046, + "step": 32830 + }, + { + "epoch": 0.47, + "grad_norm": 0.54296875, + "learning_rate": 0.00012722915426881715, + "loss": 0.9271, + "step": 32835 + }, + { + "epoch": 0.47, + "grad_norm": 0.6484375, + "learning_rate": 0.00012720506321201472, + "loss": 0.9904, + "step": 32840 + }, + { + "epoch": 0.47, + "grad_norm": 0.59375, + "learning_rate": 0.00012718097044996798, + "loss": 0.9101, + "step": 32845 + }, + { + "epoch": 0.47, + "grad_norm": 0.53515625, + "learning_rate": 0.00012715687598418706, + "loss": 0.937, + "step": 32850 + }, + { + "epoch": 0.47, + "grad_norm": 0.5390625, + "learning_rate": 0.00012713277981618218, + "loss": 1.0701, + "step": 32855 + }, + { + "epoch": 0.47, + "grad_norm": 0.5859375, + "learning_rate": 0.0001271086819474638, + "loss": 0.8446, + "step": 32860 + }, + { + "epoch": 0.47, + "grad_norm": 0.609375, + "learning_rate": 0.00012708458237954234, + "loss": 0.926, + "step": 32865 + }, + { + "epoch": 0.47, + "grad_norm": 0.64453125, + "learning_rate": 0.00012706048111392845, + "loss": 1.0323, + "step": 32870 + }, + { + "epoch": 0.47, + "grad_norm": 0.578125, + "learning_rate": 0.00012703637815213273, + "loss": 0.9371, + "step": 32875 + }, + { + "epoch": 0.47, + "grad_norm": 0.6015625, + "learning_rate": 0.00012701227349566608, + "loss": 0.8327, + "step": 32880 + }, + { + "epoch": 0.47, + "grad_norm": 0.58984375, + "learning_rate": 0.00012698816714603933, + "loss": 1.0061, + "step": 32885 + }, + { + "epoch": 0.47, + "grad_norm": 0.60546875, + "learning_rate": 0.0001269640591047635, + "loss": 0.91, + "step": 32890 + }, + { + "epoch": 0.47, + "grad_norm": 0.49609375, + "learning_rate": 0.00012693994937334983, + "loss": 0.823, + "step": 32895 + }, + { + "epoch": 0.47, + "grad_norm": 0.5703125, + "learning_rate": 0.0001269158379533094, + "loss": 0.9868, + "step": 32900 + }, + { + "epoch": 0.47, + "grad_norm": 0.59765625, + "learning_rate": 0.00012689172484615357, + "loss": 0.9551, + "step": 32905 + }, + { + "epoch": 0.47, + "grad_norm": 0.64453125, + "learning_rate": 0.00012686761005339383, + "loss": 0.8869, + "step": 32910 + }, + { + "epoch": 0.47, + "grad_norm": 0.5625, + "learning_rate": 0.00012684349357654166, + "loss": 0.941, + "step": 32915 + }, + { + "epoch": 0.47, + "grad_norm": 0.6171875, + "learning_rate": 0.00012681937541710877, + "loss": 0.9691, + "step": 32920 + }, + { + "epoch": 0.47, + "grad_norm": 0.486328125, + "learning_rate": 0.0001267952555766069, + "loss": 0.8838, + "step": 32925 + }, + { + "epoch": 0.47, + "grad_norm": 0.60546875, + "learning_rate": 0.00012677113405654784, + "loss": 0.8744, + "step": 32930 + }, + { + "epoch": 0.47, + "grad_norm": 0.6171875, + "learning_rate": 0.00012674701085844365, + "loss": 1.0615, + "step": 32935 + }, + { + "epoch": 0.47, + "grad_norm": 0.6171875, + "learning_rate": 0.00012672288598380632, + "loss": 0.9642, + "step": 32940 + }, + { + "epoch": 0.47, + "grad_norm": 0.486328125, + "learning_rate": 0.0001266987594341481, + "loss": 0.9284, + "step": 32945 + }, + { + "epoch": 0.47, + "grad_norm": 0.625, + "learning_rate": 0.00012667463121098123, + "loss": 1.0717, + "step": 32950 + }, + { + "epoch": 0.47, + "grad_norm": 0.62109375, + "learning_rate": 0.00012665050131581806, + "loss": 0.9979, + "step": 32955 + }, + { + "epoch": 0.47, + "grad_norm": 0.65625, + "learning_rate": 0.00012662636975017114, + "loss": 1.0399, + "step": 32960 + }, + { + "epoch": 0.47, + "grad_norm": 0.59375, + "learning_rate": 0.00012660223651555304, + "loss": 1.1574, + "step": 32965 + }, + { + "epoch": 0.47, + "grad_norm": 0.6640625, + "learning_rate": 0.00012657810161347644, + "loss": 0.8294, + "step": 32970 + }, + { + "epoch": 0.47, + "grad_norm": 0.5390625, + "learning_rate": 0.00012655396504545417, + "loss": 1.0261, + "step": 32975 + }, + { + "epoch": 0.47, + "grad_norm": 0.5546875, + "learning_rate": 0.00012652982681299915, + "loss": 0.9312, + "step": 32980 + }, + { + "epoch": 0.47, + "grad_norm": 0.59765625, + "learning_rate": 0.00012650568691762435, + "loss": 0.8175, + "step": 32985 + }, + { + "epoch": 0.47, + "grad_norm": 0.5703125, + "learning_rate": 0.00012648154536084292, + "loss": 1.0048, + "step": 32990 + }, + { + "epoch": 0.47, + "grad_norm": 0.5546875, + "learning_rate": 0.00012645740214416805, + "loss": 0.8705, + "step": 32995 + }, + { + "epoch": 0.47, + "grad_norm": 0.51171875, + "learning_rate": 0.0001264332572691131, + "loss": 0.8976, + "step": 33000 + }, + { + "epoch": 0.47, + "grad_norm": 0.70703125, + "learning_rate": 0.00012640911073719146, + "loss": 1.0398, + "step": 33005 + }, + { + "epoch": 0.47, + "grad_norm": 0.55078125, + "learning_rate": 0.00012638496254991667, + "loss": 1.1284, + "step": 33010 + }, + { + "epoch": 0.47, + "grad_norm": 0.5234375, + "learning_rate": 0.0001263608127088024, + "loss": 0.9641, + "step": 33015 + }, + { + "epoch": 0.47, + "grad_norm": 0.65234375, + "learning_rate": 0.00012633666121536236, + "loss": 0.8835, + "step": 33020 + }, + { + "epoch": 0.47, + "grad_norm": 0.5546875, + "learning_rate": 0.00012631250807111042, + "loss": 0.8836, + "step": 33025 + }, + { + "epoch": 0.47, + "grad_norm": 0.5703125, + "learning_rate": 0.0001262883532775605, + "loss": 0.9024, + "step": 33030 + }, + { + "epoch": 0.47, + "grad_norm": 0.55078125, + "learning_rate": 0.00012626419683622664, + "loss": 0.8939, + "step": 33035 + }, + { + "epoch": 0.47, + "grad_norm": 0.56640625, + "learning_rate": 0.000126240038748623, + "loss": 0.8697, + "step": 33040 + }, + { + "epoch": 0.47, + "grad_norm": 0.59375, + "learning_rate": 0.00012621587901626385, + "loss": 0.9675, + "step": 33045 + }, + { + "epoch": 0.47, + "grad_norm": 0.66015625, + "learning_rate": 0.00012619171764066358, + "loss": 1.0066, + "step": 33050 + }, + { + "epoch": 0.47, + "grad_norm": 0.484375, + "learning_rate": 0.0001261675546233366, + "loss": 0.9397, + "step": 33055 + }, + { + "epoch": 0.47, + "grad_norm": 0.51171875, + "learning_rate": 0.00012614338996579748, + "loss": 0.9274, + "step": 33060 + }, + { + "epoch": 0.47, + "grad_norm": 0.478515625, + "learning_rate": 0.0001261192236695609, + "loss": 0.8634, + "step": 33065 + }, + { + "epoch": 0.47, + "grad_norm": 0.609375, + "learning_rate": 0.00012609505573614167, + "loss": 0.9893, + "step": 33070 + }, + { + "epoch": 0.47, + "grad_norm": 0.55859375, + "learning_rate": 0.0001260708861670546, + "loss": 0.927, + "step": 33075 + }, + { + "epoch": 0.47, + "grad_norm": 0.61328125, + "learning_rate": 0.0001260467149638147, + "loss": 0.8995, + "step": 33080 + }, + { + "epoch": 0.47, + "grad_norm": 0.5546875, + "learning_rate": 0.00012602254212793702, + "loss": 0.9559, + "step": 33085 + }, + { + "epoch": 0.47, + "grad_norm": 0.578125, + "learning_rate": 0.00012599836766093677, + "loss": 0.8693, + "step": 33090 + }, + { + "epoch": 0.47, + "grad_norm": 0.5390625, + "learning_rate": 0.00012597419156432923, + "loss": 0.8482, + "step": 33095 + }, + { + "epoch": 0.47, + "grad_norm": 0.5703125, + "learning_rate": 0.0001259500138396298, + "loss": 1.0206, + "step": 33100 + }, + { + "epoch": 0.47, + "grad_norm": 0.546875, + "learning_rate": 0.00012592583448835394, + "loss": 1.0884, + "step": 33105 + }, + { + "epoch": 0.47, + "grad_norm": 0.703125, + "learning_rate": 0.00012590165351201725, + "loss": 1.1486, + "step": 33110 + }, + { + "epoch": 0.48, + "grad_norm": 0.65234375, + "learning_rate": 0.0001258774709121354, + "loss": 0.8503, + "step": 33115 + }, + { + "epoch": 0.48, + "grad_norm": 0.62109375, + "learning_rate": 0.0001258532866902242, + "loss": 0.9517, + "step": 33120 + }, + { + "epoch": 0.48, + "grad_norm": 0.58203125, + "learning_rate": 0.00012582910084779956, + "loss": 1.1299, + "step": 33125 + }, + { + "epoch": 0.48, + "grad_norm": 0.55078125, + "learning_rate": 0.00012580491338637744, + "loss": 0.9218, + "step": 33130 + }, + { + "epoch": 0.48, + "grad_norm": 0.5390625, + "learning_rate": 0.000125780724307474, + "loss": 0.9923, + "step": 33135 + }, + { + "epoch": 0.48, + "grad_norm": 0.490234375, + "learning_rate": 0.0001257565336126054, + "loss": 0.9179, + "step": 33140 + }, + { + "epoch": 0.48, + "grad_norm": 0.5859375, + "learning_rate": 0.00012573234130328789, + "loss": 0.8966, + "step": 33145 + }, + { + "epoch": 0.48, + "grad_norm": 0.5703125, + "learning_rate": 0.00012570814738103794, + "loss": 0.7933, + "step": 33150 + }, + { + "epoch": 0.48, + "grad_norm": 0.55078125, + "learning_rate": 0.00012568395184737205, + "loss": 0.9099, + "step": 33155 + }, + { + "epoch": 0.48, + "grad_norm": 0.53125, + "learning_rate": 0.0001256597547038068, + "loss": 1.0154, + "step": 33160 + }, + { + "epoch": 0.48, + "grad_norm": 0.59765625, + "learning_rate": 0.0001256355559518589, + "loss": 0.9243, + "step": 33165 + }, + { + "epoch": 0.48, + "grad_norm": 0.59765625, + "learning_rate": 0.00012561135559304516, + "loss": 1.0208, + "step": 33170 + }, + { + "epoch": 0.48, + "grad_norm": 0.53515625, + "learning_rate": 0.00012558715362888246, + "loss": 1.0687, + "step": 33175 + }, + { + "epoch": 0.48, + "grad_norm": 0.51953125, + "learning_rate": 0.00012556295006088783, + "loss": 0.9965, + "step": 33180 + }, + { + "epoch": 0.48, + "grad_norm": 0.55859375, + "learning_rate": 0.0001255387448905784, + "loss": 1.0748, + "step": 33185 + }, + { + "epoch": 0.48, + "grad_norm": 0.53515625, + "learning_rate": 0.00012551453811947136, + "loss": 0.8903, + "step": 33190 + }, + { + "epoch": 0.48, + "grad_norm": 0.55859375, + "learning_rate": 0.000125490329749084, + "loss": 1.049, + "step": 33195 + }, + { + "epoch": 0.48, + "grad_norm": 0.5078125, + "learning_rate": 0.0001254661197809337, + "loss": 0.7615, + "step": 33200 + }, + { + "epoch": 0.48, + "grad_norm": 0.640625, + "learning_rate": 0.00012544190821653806, + "loss": 0.8462, + "step": 33205 + }, + { + "epoch": 0.48, + "grad_norm": 0.58203125, + "learning_rate": 0.00012541769505741465, + "loss": 0.8998, + "step": 33210 + }, + { + "epoch": 0.48, + "grad_norm": 0.65234375, + "learning_rate": 0.00012539348030508115, + "loss": 0.8996, + "step": 33215 + }, + { + "epoch": 0.48, + "grad_norm": 0.59375, + "learning_rate": 0.00012536926396105534, + "loss": 0.9038, + "step": 33220 + }, + { + "epoch": 0.48, + "grad_norm": 0.546875, + "learning_rate": 0.00012534504602685522, + "loss": 0.8514, + "step": 33225 + }, + { + "epoch": 0.48, + "grad_norm": 0.53515625, + "learning_rate": 0.00012532082650399873, + "loss": 0.8233, + "step": 33230 + }, + { + "epoch": 0.48, + "grad_norm": 0.515625, + "learning_rate": 0.000125296605394004, + "loss": 0.9716, + "step": 33235 + }, + { + "epoch": 0.48, + "grad_norm": 0.5546875, + "learning_rate": 0.0001252723826983892, + "loss": 1.0804, + "step": 33240 + }, + { + "epoch": 0.48, + "grad_norm": 0.75390625, + "learning_rate": 0.00012524815841867272, + "loss": 1.1893, + "step": 33245 + }, + { + "epoch": 0.48, + "grad_norm": 0.54296875, + "learning_rate": 0.00012522393255637293, + "loss": 0.9279, + "step": 33250 + }, + { + "epoch": 0.48, + "grad_norm": 0.5546875, + "learning_rate": 0.00012519970511300826, + "loss": 0.8892, + "step": 33255 + }, + { + "epoch": 0.48, + "grad_norm": 0.71875, + "learning_rate": 0.00012517547609009738, + "loss": 0.9727, + "step": 33260 + }, + { + "epoch": 0.48, + "grad_norm": 0.7421875, + "learning_rate": 0.00012515124548915905, + "loss": 0.9415, + "step": 33265 + }, + { + "epoch": 0.48, + "grad_norm": 0.6015625, + "learning_rate": 0.00012512701331171195, + "loss": 0.9745, + "step": 33270 + }, + { + "epoch": 0.48, + "grad_norm": 0.5078125, + "learning_rate": 0.00012510277955927505, + "loss": 0.8611, + "step": 33275 + }, + { + "epoch": 0.48, + "grad_norm": 0.54296875, + "learning_rate": 0.00012507854423336737, + "loss": 1.0678, + "step": 33280 + }, + { + "epoch": 0.48, + "grad_norm": 0.59375, + "learning_rate": 0.0001250543073355079, + "loss": 0.8843, + "step": 33285 + }, + { + "epoch": 0.48, + "grad_norm": 0.625, + "learning_rate": 0.000125030068867216, + "loss": 0.8916, + "step": 33290 + }, + { + "epoch": 0.48, + "grad_norm": 0.5703125, + "learning_rate": 0.0001250058288300108, + "loss": 0.9249, + "step": 33295 + }, + { + "epoch": 0.48, + "grad_norm": 0.625, + "learning_rate": 0.00012498158722541183, + "loss": 0.9577, + "step": 33300 + }, + { + "epoch": 0.48, + "grad_norm": 0.52734375, + "learning_rate": 0.0001249573440549385, + "loss": 1.1123, + "step": 33305 + }, + { + "epoch": 0.48, + "grad_norm": 0.5390625, + "learning_rate": 0.00012493309932011038, + "loss": 0.8586, + "step": 33310 + }, + { + "epoch": 0.48, + "grad_norm": 0.609375, + "learning_rate": 0.0001249088530224473, + "loss": 1.082, + "step": 33315 + }, + { + "epoch": 0.48, + "grad_norm": 0.53515625, + "learning_rate": 0.00012488460516346886, + "loss": 0.9669, + "step": 33320 + }, + { + "epoch": 0.48, + "grad_norm": 0.5703125, + "learning_rate": 0.000124860355744695, + "loss": 0.9882, + "step": 33325 + }, + { + "epoch": 0.48, + "grad_norm": 0.54296875, + "learning_rate": 0.0001248361047676458, + "loss": 0.9785, + "step": 33330 + }, + { + "epoch": 0.48, + "grad_norm": 0.50390625, + "learning_rate": 0.00012481185223384123, + "loss": 0.8829, + "step": 33335 + }, + { + "epoch": 0.48, + "grad_norm": 0.59375, + "learning_rate": 0.00012478759814480155, + "loss": 1.1137, + "step": 33340 + }, + { + "epoch": 0.48, + "grad_norm": 0.6484375, + "learning_rate": 0.00012476334250204694, + "loss": 1.1887, + "step": 33345 + }, + { + "epoch": 0.48, + "grad_norm": 0.58203125, + "learning_rate": 0.00012473908530709782, + "loss": 0.847, + "step": 33350 + }, + { + "epoch": 0.48, + "grad_norm": 0.53125, + "learning_rate": 0.00012471482656147467, + "loss": 0.9713, + "step": 33355 + }, + { + "epoch": 0.48, + "grad_norm": 0.5625, + "learning_rate": 0.00012469056626669803, + "loss": 0.8042, + "step": 33360 + }, + { + "epoch": 0.48, + "grad_norm": 0.6171875, + "learning_rate": 0.0001246663044242886, + "loss": 0.937, + "step": 33365 + }, + { + "epoch": 0.48, + "grad_norm": 0.625, + "learning_rate": 0.0001246420410357671, + "loss": 0.9339, + "step": 33370 + }, + { + "epoch": 0.48, + "grad_norm": 0.59375, + "learning_rate": 0.00012461777610265444, + "loss": 0.8133, + "step": 33375 + }, + { + "epoch": 0.48, + "grad_norm": 0.55859375, + "learning_rate": 0.00012459350962647147, + "loss": 0.9549, + "step": 33380 + }, + { + "epoch": 0.48, + "grad_norm": 0.57421875, + "learning_rate": 0.00012456924160873936, + "loss": 0.9742, + "step": 33385 + }, + { + "epoch": 0.48, + "grad_norm": 0.55859375, + "learning_rate": 0.00012454497205097916, + "loss": 1.0086, + "step": 33390 + }, + { + "epoch": 0.48, + "grad_norm": 0.55078125, + "learning_rate": 0.0001245207009547122, + "loss": 0.9169, + "step": 33395 + }, + { + "epoch": 0.48, + "grad_norm": 0.62890625, + "learning_rate": 0.00012449642832145977, + "loss": 1.0439, + "step": 33400 + }, + { + "epoch": 0.48, + "grad_norm": 0.5546875, + "learning_rate": 0.0001244721541527433, + "loss": 0.9336, + "step": 33405 + }, + { + "epoch": 0.48, + "grad_norm": 0.609375, + "learning_rate": 0.00012444787845008432, + "loss": 0.972, + "step": 33410 + }, + { + "epoch": 0.48, + "grad_norm": 0.6953125, + "learning_rate": 0.00012442360121500448, + "loss": 1.0084, + "step": 33415 + }, + { + "epoch": 0.48, + "grad_norm": 0.5546875, + "learning_rate": 0.00012439932244902554, + "loss": 0.9818, + "step": 33420 + }, + { + "epoch": 0.48, + "grad_norm": 0.625, + "learning_rate": 0.00012437504215366926, + "loss": 0.9483, + "step": 33425 + }, + { + "epoch": 0.48, + "grad_norm": 0.71875, + "learning_rate": 0.00012435076033045757, + "loss": 0.9617, + "step": 33430 + }, + { + "epoch": 0.48, + "grad_norm": 0.56640625, + "learning_rate": 0.0001243264769809125, + "loss": 0.9819, + "step": 33435 + }, + { + "epoch": 0.48, + "grad_norm": 0.5390625, + "learning_rate": 0.0001243021921065561, + "loss": 1.0103, + "step": 33440 + }, + { + "epoch": 0.48, + "grad_norm": 0.59765625, + "learning_rate": 0.00012427790570891068, + "loss": 0.8925, + "step": 33445 + }, + { + "epoch": 0.48, + "grad_norm": 0.52734375, + "learning_rate": 0.00012425361778949846, + "loss": 0.8525, + "step": 33450 + }, + { + "epoch": 0.48, + "grad_norm": 0.60546875, + "learning_rate": 0.00012422932834984187, + "loss": 1.038, + "step": 33455 + }, + { + "epoch": 0.48, + "grad_norm": 0.53125, + "learning_rate": 0.00012420503739146333, + "loss": 0.9946, + "step": 33460 + }, + { + "epoch": 0.48, + "grad_norm": 0.58203125, + "learning_rate": 0.00012418074491588553, + "loss": 1.1663, + "step": 33465 + }, + { + "epoch": 0.48, + "grad_norm": 0.55859375, + "learning_rate": 0.0001241564509246311, + "loss": 0.9745, + "step": 33470 + }, + { + "epoch": 0.48, + "grad_norm": 0.423828125, + "learning_rate": 0.00012413215541922282, + "loss": 0.8693, + "step": 33475 + }, + { + "epoch": 0.48, + "grad_norm": 0.55078125, + "learning_rate": 0.00012410785840118353, + "loss": 1.0118, + "step": 33480 + }, + { + "epoch": 0.48, + "grad_norm": 0.66015625, + "learning_rate": 0.0001240835598720362, + "loss": 0.9253, + "step": 33485 + }, + { + "epoch": 0.48, + "grad_norm": 0.515625, + "learning_rate": 0.00012405925983330392, + "loss": 0.829, + "step": 33490 + }, + { + "epoch": 0.48, + "grad_norm": 0.5625, + "learning_rate": 0.00012403495828650985, + "loss": 0.8592, + "step": 33495 + }, + { + "epoch": 0.48, + "grad_norm": 0.578125, + "learning_rate": 0.00012401065523317723, + "loss": 0.9773, + "step": 33500 + }, + { + "epoch": 0.48, + "grad_norm": 0.50390625, + "learning_rate": 0.00012398635067482937, + "loss": 1.0826, + "step": 33505 + }, + { + "epoch": 0.48, + "grad_norm": 0.53125, + "learning_rate": 0.00012396204461298974, + "loss": 0.9736, + "step": 33510 + }, + { + "epoch": 0.48, + "grad_norm": 0.640625, + "learning_rate": 0.00012393773704918185, + "loss": 0.9207, + "step": 33515 + }, + { + "epoch": 0.48, + "grad_norm": 0.55078125, + "learning_rate": 0.0001239134279849294, + "loss": 1.0038, + "step": 33520 + }, + { + "epoch": 0.48, + "grad_norm": 0.5859375, + "learning_rate": 0.00012388911742175599, + "loss": 1.0029, + "step": 33525 + }, + { + "epoch": 0.48, + "grad_norm": 0.6015625, + "learning_rate": 0.0001238648053611855, + "loss": 1.0463, + "step": 33530 + }, + { + "epoch": 0.48, + "grad_norm": 0.6015625, + "learning_rate": 0.00012384049180474182, + "loss": 1.1262, + "step": 33535 + }, + { + "epoch": 0.48, + "grad_norm": 0.55859375, + "learning_rate": 0.00012381617675394897, + "loss": 0.8751, + "step": 33540 + }, + { + "epoch": 0.48, + "grad_norm": 0.6484375, + "learning_rate": 0.00012379186021033105, + "loss": 1.022, + "step": 33545 + }, + { + "epoch": 0.48, + "grad_norm": 0.58203125, + "learning_rate": 0.00012376754217541225, + "loss": 1.0283, + "step": 33550 + }, + { + "epoch": 0.48, + "grad_norm": 0.55078125, + "learning_rate": 0.00012374322265071682, + "loss": 1.0613, + "step": 33555 + }, + { + "epoch": 0.48, + "grad_norm": 0.578125, + "learning_rate": 0.00012371890163776912, + "loss": 0.9887, + "step": 33560 + }, + { + "epoch": 0.48, + "grad_norm": 0.6171875, + "learning_rate": 0.0001236945791380937, + "loss": 1.0686, + "step": 33565 + }, + { + "epoch": 0.48, + "grad_norm": 0.578125, + "learning_rate": 0.00012367025515321503, + "loss": 1.0544, + "step": 33570 + }, + { + "epoch": 0.48, + "grad_norm": 0.73046875, + "learning_rate": 0.00012364592968465784, + "loss": 1.1148, + "step": 33575 + }, + { + "epoch": 0.48, + "grad_norm": 0.5625, + "learning_rate": 0.00012362160273394685, + "loss": 1.0274, + "step": 33580 + }, + { + "epoch": 0.48, + "grad_norm": 0.6875, + "learning_rate": 0.0001235972743026069, + "loss": 1.0751, + "step": 33585 + }, + { + "epoch": 0.48, + "grad_norm": 0.53125, + "learning_rate": 0.0001235729443921629, + "loss": 1.0264, + "step": 33590 + }, + { + "epoch": 0.48, + "grad_norm": 0.55859375, + "learning_rate": 0.0001235486130041399, + "loss": 0.924, + "step": 33595 + }, + { + "epoch": 0.48, + "grad_norm": 0.5859375, + "learning_rate": 0.00012352428014006302, + "loss": 1.1058, + "step": 33600 + }, + { + "epoch": 0.48, + "grad_norm": 0.5625, + "learning_rate": 0.0001234999458014575, + "loss": 0.944, + "step": 33605 + }, + { + "epoch": 0.48, + "grad_norm": 0.54296875, + "learning_rate": 0.00012347560998984857, + "loss": 1.0496, + "step": 33610 + }, + { + "epoch": 0.48, + "grad_norm": 0.5703125, + "learning_rate": 0.0001234512727067617, + "loss": 1.0869, + "step": 33615 + }, + { + "epoch": 0.48, + "grad_norm": 0.53125, + "learning_rate": 0.00012342693395372232, + "loss": 0.9073, + "step": 33620 + }, + { + "epoch": 0.48, + "grad_norm": 0.61328125, + "learning_rate": 0.00012340259373225604, + "loss": 0.9367, + "step": 33625 + }, + { + "epoch": 0.48, + "grad_norm": 0.6328125, + "learning_rate": 0.00012337825204388858, + "loss": 0.9548, + "step": 33630 + }, + { + "epoch": 0.48, + "grad_norm": 0.5703125, + "learning_rate": 0.0001233539088901456, + "loss": 0.902, + "step": 33635 + }, + { + "epoch": 0.48, + "grad_norm": 0.609375, + "learning_rate": 0.000123329564272553, + "loss": 1.0901, + "step": 33640 + }, + { + "epoch": 0.48, + "grad_norm": 0.58984375, + "learning_rate": 0.0001233052181926368, + "loss": 0.9495, + "step": 33645 + }, + { + "epoch": 0.48, + "grad_norm": 0.515625, + "learning_rate": 0.00012328087065192295, + "loss": 1.1589, + "step": 33650 + }, + { + "epoch": 0.48, + "grad_norm": 0.58203125, + "learning_rate": 0.00012325652165193763, + "loss": 0.8439, + "step": 33655 + }, + { + "epoch": 0.48, + "grad_norm": 0.64453125, + "learning_rate": 0.00012323217119420706, + "loss": 0.9891, + "step": 33660 + }, + { + "epoch": 0.48, + "grad_norm": 0.5546875, + "learning_rate": 0.00012320781928025747, + "loss": 0.8962, + "step": 33665 + }, + { + "epoch": 0.48, + "grad_norm": 0.66015625, + "learning_rate": 0.0001231834659116154, + "loss": 0.9551, + "step": 33670 + }, + { + "epoch": 0.48, + "grad_norm": 0.56640625, + "learning_rate": 0.00012315911108980727, + "loss": 0.9165, + "step": 33675 + }, + { + "epoch": 0.48, + "grad_norm": 0.59765625, + "learning_rate": 0.00012313475481635965, + "loss": 0.8704, + "step": 33680 + }, + { + "epoch": 0.48, + "grad_norm": 0.5546875, + "learning_rate": 0.0001231103970927993, + "loss": 1.014, + "step": 33685 + }, + { + "epoch": 0.48, + "grad_norm": 0.515625, + "learning_rate": 0.0001230860379206529, + "loss": 0.9021, + "step": 33690 + }, + { + "epoch": 0.48, + "grad_norm": 0.51953125, + "learning_rate": 0.00012306167730144737, + "loss": 0.9223, + "step": 33695 + }, + { + "epoch": 0.48, + "grad_norm": 0.57421875, + "learning_rate": 0.00012303731523670964, + "loss": 0.8964, + "step": 33700 + }, + { + "epoch": 0.48, + "grad_norm": 0.5546875, + "learning_rate": 0.00012301295172796673, + "loss": 1.0116, + "step": 33705 + }, + { + "epoch": 0.48, + "grad_norm": 0.5390625, + "learning_rate": 0.00012298858677674585, + "loss": 0.9617, + "step": 33710 + }, + { + "epoch": 0.48, + "grad_norm": 0.84765625, + "learning_rate": 0.00012296422038457413, + "loss": 0.9698, + "step": 33715 + }, + { + "epoch": 0.48, + "grad_norm": 0.6171875, + "learning_rate": 0.0001229398525529789, + "loss": 0.9268, + "step": 33720 + }, + { + "epoch": 0.48, + "grad_norm": 0.515625, + "learning_rate": 0.00012291548328348764, + "loss": 1.0052, + "step": 33725 + }, + { + "epoch": 0.48, + "grad_norm": 0.546875, + "learning_rate": 0.00012289111257762775, + "loss": 1.0781, + "step": 33730 + }, + { + "epoch": 0.48, + "grad_norm": 0.62109375, + "learning_rate": 0.00012286674043692687, + "loss": 1.0105, + "step": 33735 + }, + { + "epoch": 0.48, + "grad_norm": 0.515625, + "learning_rate": 0.00012284236686291265, + "loss": 0.9892, + "step": 33740 + }, + { + "epoch": 0.48, + "grad_norm": 0.546875, + "learning_rate": 0.00012281799185711285, + "loss": 0.9825, + "step": 33745 + }, + { + "epoch": 0.48, + "grad_norm": 0.51171875, + "learning_rate": 0.00012279361542105537, + "loss": 0.9173, + "step": 33750 + }, + { + "epoch": 0.48, + "grad_norm": 0.58203125, + "learning_rate": 0.0001227692375562681, + "loss": 0.956, + "step": 33755 + }, + { + "epoch": 0.48, + "grad_norm": 0.609375, + "learning_rate": 0.00012274485826427905, + "loss": 1.0413, + "step": 33760 + }, + { + "epoch": 0.48, + "grad_norm": 0.59375, + "learning_rate": 0.00012272047754661642, + "loss": 0.9672, + "step": 33765 + }, + { + "epoch": 0.48, + "grad_norm": 0.5234375, + "learning_rate": 0.00012269609540480834, + "loss": 0.9924, + "step": 33770 + }, + { + "epoch": 0.48, + "grad_norm": 0.62109375, + "learning_rate": 0.0001226717118403832, + "loss": 0.915, + "step": 33775 + }, + { + "epoch": 0.48, + "grad_norm": 0.74609375, + "learning_rate": 0.00012264732685486932, + "loss": 0.9189, + "step": 33780 + }, + { + "epoch": 0.48, + "grad_norm": 0.5, + "learning_rate": 0.0001226229404497952, + "loss": 0.9149, + "step": 33785 + }, + { + "epoch": 0.48, + "grad_norm": 0.54296875, + "learning_rate": 0.0001225985526266894, + "loss": 0.9591, + "step": 33790 + }, + { + "epoch": 0.48, + "grad_norm": 0.58984375, + "learning_rate": 0.0001225741633870806, + "loss": 0.9421, + "step": 33795 + }, + { + "epoch": 0.48, + "grad_norm": 0.546875, + "learning_rate": 0.00012254977273249752, + "loss": 0.8335, + "step": 33800 + }, + { + "epoch": 0.48, + "grad_norm": 0.490234375, + "learning_rate": 0.000122525380664469, + "loss": 0.8758, + "step": 33805 + }, + { + "epoch": 0.48, + "grad_norm": 0.6328125, + "learning_rate": 0.00012250098718452398, + "loss": 0.9467, + "step": 33810 + }, + { + "epoch": 0.49, + "grad_norm": 0.57421875, + "learning_rate": 0.00012247659229419147, + "loss": 0.9301, + "step": 33815 + }, + { + "epoch": 0.49, + "grad_norm": 0.5390625, + "learning_rate": 0.0001224521959950005, + "loss": 0.9686, + "step": 33820 + }, + { + "epoch": 0.49, + "grad_norm": 0.5390625, + "learning_rate": 0.00012242779828848033, + "loss": 0.8002, + "step": 33825 + }, + { + "epoch": 0.49, + "grad_norm": 0.58984375, + "learning_rate": 0.00012240339917616027, + "loss": 0.9443, + "step": 33830 + }, + { + "epoch": 0.49, + "grad_norm": 0.5546875, + "learning_rate": 0.0001223789986595696, + "loss": 1.0118, + "step": 33835 + }, + { + "epoch": 0.49, + "grad_norm": 0.55078125, + "learning_rate": 0.0001223545967402378, + "loss": 0.9694, + "step": 33840 + }, + { + "epoch": 0.49, + "grad_norm": 0.5390625, + "learning_rate": 0.00012233019341969443, + "loss": 0.9285, + "step": 33845 + }, + { + "epoch": 0.49, + "grad_norm": 0.53125, + "learning_rate": 0.00012230578869946909, + "loss": 1.1095, + "step": 33850 + }, + { + "epoch": 0.49, + "grad_norm": 0.73046875, + "learning_rate": 0.00012228138258109153, + "loss": 1.1028, + "step": 33855 + }, + { + "epoch": 0.49, + "grad_norm": 0.56640625, + "learning_rate": 0.0001222569750660915, + "loss": 0.9773, + "step": 33860 + }, + { + "epoch": 0.49, + "grad_norm": 0.5703125, + "learning_rate": 0.00012223256615599896, + "loss": 0.9878, + "step": 33865 + }, + { + "epoch": 0.49, + "grad_norm": 0.59375, + "learning_rate": 0.00012220815585234384, + "loss": 1.0119, + "step": 33870 + }, + { + "epoch": 0.49, + "grad_norm": 0.59375, + "learning_rate": 0.00012218374415665624, + "loss": 1.0379, + "step": 33875 + }, + { + "epoch": 0.49, + "grad_norm": 0.51171875, + "learning_rate": 0.00012215933107046626, + "loss": 1.0315, + "step": 33880 + }, + { + "epoch": 0.49, + "grad_norm": 0.578125, + "learning_rate": 0.00012213491659530417, + "loss": 0.8895, + "step": 33885 + }, + { + "epoch": 0.49, + "grad_norm": 0.5390625, + "learning_rate": 0.0001221105007327003, + "loss": 0.8463, + "step": 33890 + }, + { + "epoch": 0.49, + "grad_norm": 0.50390625, + "learning_rate": 0.0001220860834841851, + "loss": 0.8936, + "step": 33895 + }, + { + "epoch": 0.49, + "grad_norm": 0.5546875, + "learning_rate": 0.00012206166485128898, + "loss": 0.9165, + "step": 33900 + }, + { + "epoch": 0.49, + "grad_norm": 0.68359375, + "learning_rate": 0.00012203724483554262, + "loss": 1.2033, + "step": 33905 + }, + { + "epoch": 0.49, + "grad_norm": 0.51171875, + "learning_rate": 0.00012201282343847662, + "loss": 1.0308, + "step": 33910 + }, + { + "epoch": 0.49, + "grad_norm": 0.55078125, + "learning_rate": 0.00012198840066162178, + "loss": 0.9276, + "step": 33915 + }, + { + "epoch": 0.49, + "grad_norm": 0.546875, + "learning_rate": 0.00012196397650650897, + "loss": 0.8148, + "step": 33920 + }, + { + "epoch": 0.49, + "grad_norm": 0.625, + "learning_rate": 0.00012193955097466909, + "loss": 1.0166, + "step": 33925 + }, + { + "epoch": 0.49, + "grad_norm": 0.54296875, + "learning_rate": 0.00012191512406763319, + "loss": 0.8964, + "step": 33930 + }, + { + "epoch": 0.49, + "grad_norm": 0.5234375, + "learning_rate": 0.0001218906957869323, + "loss": 0.7973, + "step": 33935 + }, + { + "epoch": 0.49, + "grad_norm": 0.6171875, + "learning_rate": 0.00012186626613409771, + "loss": 0.8976, + "step": 33940 + }, + { + "epoch": 0.49, + "grad_norm": 0.5390625, + "learning_rate": 0.00012184183511066065, + "loss": 0.8737, + "step": 33945 + }, + { + "epoch": 0.49, + "grad_norm": 0.53125, + "learning_rate": 0.00012181740271815248, + "loss": 0.8809, + "step": 33950 + }, + { + "epoch": 0.49, + "grad_norm": 0.57421875, + "learning_rate": 0.00012179296895810466, + "loss": 0.875, + "step": 33955 + }, + { + "epoch": 0.49, + "grad_norm": 0.53125, + "learning_rate": 0.00012176853383204873, + "loss": 0.8637, + "step": 33960 + }, + { + "epoch": 0.49, + "grad_norm": 0.60546875, + "learning_rate": 0.00012174409734151628, + "loss": 0.9441, + "step": 33965 + }, + { + "epoch": 0.49, + "grad_norm": 0.53125, + "learning_rate": 0.0001217196594880391, + "loss": 0.9632, + "step": 33970 + }, + { + "epoch": 0.49, + "grad_norm": 0.55859375, + "learning_rate": 0.00012169522027314888, + "loss": 1.0033, + "step": 33975 + }, + { + "epoch": 0.49, + "grad_norm": 0.57421875, + "learning_rate": 0.00012167077969837755, + "loss": 0.9105, + "step": 33980 + }, + { + "epoch": 0.49, + "grad_norm": 0.62109375, + "learning_rate": 0.0001216463377652571, + "loss": 1.0741, + "step": 33985 + }, + { + "epoch": 0.49, + "grad_norm": 0.58203125, + "learning_rate": 0.00012162189447531949, + "loss": 1.0443, + "step": 33990 + }, + { + "epoch": 0.49, + "grad_norm": 0.5546875, + "learning_rate": 0.00012159744983009695, + "loss": 1.0444, + "step": 33995 + }, + { + "epoch": 0.49, + "grad_norm": 0.54296875, + "learning_rate": 0.00012157300383112167, + "loss": 1.0718, + "step": 34000 + }, + { + "epoch": 0.49, + "grad_norm": 0.6171875, + "learning_rate": 0.00012154855647992591, + "loss": 0.9506, + "step": 34005 + }, + { + "epoch": 0.49, + "grad_norm": 0.51953125, + "learning_rate": 0.00012152410777804209, + "loss": 0.9411, + "step": 34010 + }, + { + "epoch": 0.49, + "grad_norm": 0.5234375, + "learning_rate": 0.00012149965772700269, + "loss": 0.9822, + "step": 34015 + }, + { + "epoch": 0.49, + "grad_norm": 0.56640625, + "learning_rate": 0.00012147520632834023, + "loss": 0.9988, + "step": 34020 + }, + { + "epoch": 0.49, + "grad_norm": 0.50390625, + "learning_rate": 0.00012145075358358744, + "loss": 0.9612, + "step": 34025 + }, + { + "epoch": 0.49, + "grad_norm": 0.6171875, + "learning_rate": 0.00012142629949427693, + "loss": 0.9243, + "step": 34030 + }, + { + "epoch": 0.49, + "grad_norm": 0.58203125, + "learning_rate": 0.0001214018440619416, + "loss": 1.0716, + "step": 34035 + }, + { + "epoch": 0.49, + "grad_norm": 0.55078125, + "learning_rate": 0.0001213773872881143, + "loss": 0.996, + "step": 34040 + }, + { + "epoch": 0.49, + "grad_norm": 0.765625, + "learning_rate": 0.00012135292917432799, + "loss": 0.842, + "step": 34045 + }, + { + "epoch": 0.49, + "grad_norm": 0.55078125, + "learning_rate": 0.0001213284697221158, + "loss": 0.915, + "step": 34050 + }, + { + "epoch": 0.49, + "grad_norm": 0.609375, + "learning_rate": 0.00012130400893301081, + "loss": 0.9155, + "step": 34055 + }, + { + "epoch": 0.49, + "grad_norm": 0.5625, + "learning_rate": 0.00012127954680854628, + "loss": 0.8287, + "step": 34060 + }, + { + "epoch": 0.49, + "grad_norm": 0.55859375, + "learning_rate": 0.00012125508335025552, + "loss": 0.829, + "step": 34065 + }, + { + "epoch": 0.49, + "grad_norm": 0.546875, + "learning_rate": 0.00012123061855967195, + "loss": 0.9948, + "step": 34070 + }, + { + "epoch": 0.49, + "grad_norm": 0.72265625, + "learning_rate": 0.00012120615243832903, + "loss": 0.9894, + "step": 34075 + }, + { + "epoch": 0.49, + "grad_norm": 0.5859375, + "learning_rate": 0.0001211816849877603, + "loss": 1.0433, + "step": 34080 + }, + { + "epoch": 0.49, + "grad_norm": 0.55078125, + "learning_rate": 0.00012115721620949942, + "loss": 0.9365, + "step": 34085 + }, + { + "epoch": 0.49, + "grad_norm": 0.458984375, + "learning_rate": 0.00012113274610508013, + "loss": 1.0574, + "step": 34090 + }, + { + "epoch": 0.49, + "grad_norm": 0.5, + "learning_rate": 0.00012110827467603629, + "loss": 0.8886, + "step": 34095 + }, + { + "epoch": 0.49, + "grad_norm": 0.4921875, + "learning_rate": 0.00012108380192390172, + "loss": 0.8493, + "step": 34100 + }, + { + "epoch": 0.49, + "grad_norm": 0.6484375, + "learning_rate": 0.00012105932785021046, + "loss": 0.9578, + "step": 34105 + }, + { + "epoch": 0.49, + "grad_norm": 0.65234375, + "learning_rate": 0.00012103485245649651, + "loss": 1.0486, + "step": 34110 + }, + { + "epoch": 0.49, + "grad_norm": 0.59765625, + "learning_rate": 0.00012101037574429409, + "loss": 0.9805, + "step": 34115 + }, + { + "epoch": 0.49, + "grad_norm": 0.546875, + "learning_rate": 0.00012098589771513736, + "loss": 0.9243, + "step": 34120 + }, + { + "epoch": 0.49, + "grad_norm": 0.53515625, + "learning_rate": 0.00012096141837056067, + "loss": 0.8967, + "step": 34125 + }, + { + "epoch": 0.49, + "grad_norm": 0.56640625, + "learning_rate": 0.0001209369377120984, + "loss": 0.902, + "step": 34130 + }, + { + "epoch": 0.49, + "grad_norm": 0.60546875, + "learning_rate": 0.00012091245574128505, + "loss": 1.0998, + "step": 34135 + }, + { + "epoch": 0.49, + "grad_norm": 0.6640625, + "learning_rate": 0.0001208879724596551, + "loss": 0.9651, + "step": 34140 + }, + { + "epoch": 0.49, + "grad_norm": 0.65625, + "learning_rate": 0.00012086348786874331, + "loss": 0.9981, + "step": 34145 + }, + { + "epoch": 0.49, + "grad_norm": 0.6328125, + "learning_rate": 0.0001208390019700843, + "loss": 1.0658, + "step": 34150 + }, + { + "epoch": 0.49, + "grad_norm": 0.51953125, + "learning_rate": 0.00012081451476521293, + "loss": 0.9321, + "step": 34155 + }, + { + "epoch": 0.49, + "grad_norm": 0.53125, + "learning_rate": 0.00012079002625566409, + "loss": 0.9644, + "step": 34160 + }, + { + "epoch": 0.49, + "grad_norm": 0.62890625, + "learning_rate": 0.00012076553644297268, + "loss": 1.0264, + "step": 34165 + }, + { + "epoch": 0.49, + "grad_norm": 0.5703125, + "learning_rate": 0.00012074104532867381, + "loss": 0.9996, + "step": 34170 + }, + { + "epoch": 0.49, + "grad_norm": 0.6484375, + "learning_rate": 0.00012071655291430261, + "loss": 0.9504, + "step": 34175 + }, + { + "epoch": 0.49, + "grad_norm": 0.5625, + "learning_rate": 0.00012069205920139428, + "loss": 0.9919, + "step": 34180 + }, + { + "epoch": 0.49, + "grad_norm": 0.53515625, + "learning_rate": 0.0001206675641914841, + "loss": 1.0253, + "step": 34185 + }, + { + "epoch": 0.49, + "grad_norm": 0.5859375, + "learning_rate": 0.00012064306788610749, + "loss": 1.0116, + "step": 34190 + }, + { + "epoch": 0.49, + "grad_norm": 0.546875, + "learning_rate": 0.00012061857028679982, + "loss": 0.9081, + "step": 34195 + }, + { + "epoch": 0.49, + "grad_norm": 0.61328125, + "learning_rate": 0.00012059407139509671, + "loss": 0.9639, + "step": 34200 + }, + { + "epoch": 0.49, + "grad_norm": 0.494140625, + "learning_rate": 0.00012056957121253377, + "loss": 0.9585, + "step": 34205 + }, + { + "epoch": 0.49, + "grad_norm": 0.62109375, + "learning_rate": 0.00012054506974064665, + "loss": 0.8973, + "step": 34210 + }, + { + "epoch": 0.49, + "grad_norm": 0.5625, + "learning_rate": 0.00012052056698097118, + "loss": 0.9475, + "step": 34215 + }, + { + "epoch": 0.49, + "grad_norm": 0.53125, + "learning_rate": 0.00012049606293504317, + "loss": 0.9987, + "step": 34220 + }, + { + "epoch": 0.49, + "grad_norm": 0.8671875, + "learning_rate": 0.00012047155760439861, + "loss": 0.9729, + "step": 34225 + }, + { + "epoch": 0.49, + "grad_norm": 0.5859375, + "learning_rate": 0.00012044705099057352, + "loss": 0.8508, + "step": 34230 + }, + { + "epoch": 0.49, + "grad_norm": 0.53515625, + "learning_rate": 0.00012042254309510398, + "loss": 1.0183, + "step": 34235 + }, + { + "epoch": 0.49, + "grad_norm": 0.52734375, + "learning_rate": 0.00012039803391952617, + "loss": 0.856, + "step": 34240 + }, + { + "epoch": 0.49, + "grad_norm": 0.91796875, + "learning_rate": 0.00012037352346537639, + "loss": 0.7685, + "step": 34245 + }, + { + "epoch": 0.49, + "grad_norm": 0.640625, + "learning_rate": 0.00012034901173419091, + "loss": 0.9855, + "step": 34250 + }, + { + "epoch": 0.49, + "grad_norm": 0.5625, + "learning_rate": 0.00012032449872750621, + "loss": 0.9725, + "step": 34255 + }, + { + "epoch": 0.49, + "grad_norm": 0.52734375, + "learning_rate": 0.00012029998444685881, + "loss": 0.9326, + "step": 34260 + }, + { + "epoch": 0.49, + "grad_norm": 0.68359375, + "learning_rate": 0.00012027546889378525, + "loss": 0.9455, + "step": 34265 + }, + { + "epoch": 0.49, + "grad_norm": 0.50390625, + "learning_rate": 0.0001202509520698222, + "loss": 0.9497, + "step": 34270 + }, + { + "epoch": 0.49, + "grad_norm": 0.73828125, + "learning_rate": 0.00012022643397650642, + "loss": 1.266, + "step": 34275 + }, + { + "epoch": 0.49, + "grad_norm": 0.49609375, + "learning_rate": 0.00012020191461537471, + "loss": 0.9617, + "step": 34280 + }, + { + "epoch": 0.49, + "grad_norm": 0.76171875, + "learning_rate": 0.00012017739398796401, + "loss": 0.9301, + "step": 34285 + }, + { + "epoch": 0.49, + "grad_norm": 0.58203125, + "learning_rate": 0.00012015287209581125, + "loss": 0.9156, + "step": 34290 + }, + { + "epoch": 0.49, + "grad_norm": 0.5625, + "learning_rate": 0.00012012834894045353, + "loss": 0.9515, + "step": 34295 + }, + { + "epoch": 0.49, + "grad_norm": 0.5390625, + "learning_rate": 0.00012010382452342797, + "loss": 0.8186, + "step": 34300 + }, + { + "epoch": 0.49, + "grad_norm": 0.64453125, + "learning_rate": 0.00012007929884627176, + "loss": 0.9397, + "step": 34305 + }, + { + "epoch": 0.49, + "grad_norm": 0.6015625, + "learning_rate": 0.00012005477191052228, + "loss": 0.9877, + "step": 34310 + }, + { + "epoch": 0.49, + "grad_norm": 0.5859375, + "learning_rate": 0.00012003024371771683, + "loss": 1.0068, + "step": 34315 + }, + { + "epoch": 0.49, + "grad_norm": 0.5703125, + "learning_rate": 0.00012000571426939289, + "loss": 0.913, + "step": 34320 + }, + { + "epoch": 0.49, + "grad_norm": 0.56640625, + "learning_rate": 0.000119981183567088, + "loss": 0.9533, + "step": 34325 + }, + { + "epoch": 0.49, + "grad_norm": 0.53515625, + "learning_rate": 0.00011995665161233977, + "loss": 0.9048, + "step": 34330 + }, + { + "epoch": 0.49, + "grad_norm": 0.53125, + "learning_rate": 0.00011993211840668588, + "loss": 0.9978, + "step": 34335 + }, + { + "epoch": 0.49, + "grad_norm": 0.5625, + "learning_rate": 0.00011990758395166415, + "loss": 0.9513, + "step": 34340 + }, + { + "epoch": 0.49, + "grad_norm": 0.71875, + "learning_rate": 0.00011988304824881234, + "loss": 0.9389, + "step": 34345 + }, + { + "epoch": 0.49, + "grad_norm": 0.54296875, + "learning_rate": 0.00011985851129966843, + "loss": 0.9301, + "step": 34350 + }, + { + "epoch": 0.49, + "grad_norm": 0.5703125, + "learning_rate": 0.0001198339731057704, + "loss": 1.1964, + "step": 34355 + }, + { + "epoch": 0.49, + "grad_norm": 0.5703125, + "learning_rate": 0.00011980943366865636, + "loss": 1.0127, + "step": 34360 + }, + { + "epoch": 0.49, + "grad_norm": 0.625, + "learning_rate": 0.00011978489298986448, + "loss": 0.8845, + "step": 34365 + }, + { + "epoch": 0.49, + "grad_norm": 0.55078125, + "learning_rate": 0.00011976035107093294, + "loss": 0.9555, + "step": 34370 + }, + { + "epoch": 0.49, + "grad_norm": 0.46875, + "learning_rate": 0.00011973580791340011, + "loss": 0.903, + "step": 34375 + }, + { + "epoch": 0.49, + "grad_norm": 0.5, + "learning_rate": 0.00011971126351880435, + "loss": 0.9846, + "step": 34380 + }, + { + "epoch": 0.49, + "grad_norm": 0.57421875, + "learning_rate": 0.00011968671788868413, + "loss": 0.9165, + "step": 34385 + }, + { + "epoch": 0.49, + "grad_norm": 0.5390625, + "learning_rate": 0.00011966217102457807, + "loss": 1.0022, + "step": 34390 + }, + { + "epoch": 0.49, + "grad_norm": 0.625, + "learning_rate": 0.0001196376229280247, + "loss": 1.0029, + "step": 34395 + }, + { + "epoch": 0.49, + "grad_norm": 0.5703125, + "learning_rate": 0.00011961307360056273, + "loss": 0.9662, + "step": 34400 + }, + { + "epoch": 0.49, + "grad_norm": 0.5625, + "learning_rate": 0.00011958852304373099, + "loss": 1.0455, + "step": 34405 + }, + { + "epoch": 0.49, + "grad_norm": 0.625, + "learning_rate": 0.00011956397125906834, + "loss": 0.8823, + "step": 34410 + }, + { + "epoch": 0.49, + "grad_norm": 0.5234375, + "learning_rate": 0.00011953941824811363, + "loss": 0.8869, + "step": 34415 + }, + { + "epoch": 0.49, + "grad_norm": 0.5390625, + "learning_rate": 0.00011951486401240601, + "loss": 0.8601, + "step": 34420 + }, + { + "epoch": 0.49, + "grad_norm": 0.609375, + "learning_rate": 0.00011949030855348445, + "loss": 0.9119, + "step": 34425 + }, + { + "epoch": 0.49, + "grad_norm": 0.53515625, + "learning_rate": 0.00011946575187288815, + "loss": 0.937, + "step": 34430 + }, + { + "epoch": 0.49, + "grad_norm": 0.51953125, + "learning_rate": 0.00011944119397215634, + "loss": 0.9595, + "step": 34435 + }, + { + "epoch": 0.49, + "grad_norm": 0.6328125, + "learning_rate": 0.00011941663485282837, + "loss": 0.9202, + "step": 34440 + }, + { + "epoch": 0.49, + "grad_norm": 0.5546875, + "learning_rate": 0.00011939207451644363, + "loss": 0.9054, + "step": 34445 + }, + { + "epoch": 0.49, + "grad_norm": 0.55859375, + "learning_rate": 0.00011936751296454155, + "loss": 1.0402, + "step": 34450 + }, + { + "epoch": 0.49, + "grad_norm": 0.6171875, + "learning_rate": 0.00011934295019866168, + "loss": 1.0998, + "step": 34455 + }, + { + "epoch": 0.49, + "grad_norm": 0.484375, + "learning_rate": 0.00011931838622034371, + "loss": 0.9521, + "step": 34460 + }, + { + "epoch": 0.49, + "grad_norm": 0.60546875, + "learning_rate": 0.00011929382103112725, + "loss": 0.9183, + "step": 34465 + }, + { + "epoch": 0.49, + "grad_norm": 0.57421875, + "learning_rate": 0.00011926925463255214, + "loss": 1.0728, + "step": 34470 + }, + { + "epoch": 0.49, + "grad_norm": 0.490234375, + "learning_rate": 0.00011924468702615818, + "loss": 0.8879, + "step": 34475 + }, + { + "epoch": 0.49, + "grad_norm": 0.74609375, + "learning_rate": 0.00011922011821348533, + "loss": 0.9262, + "step": 34480 + }, + { + "epoch": 0.49, + "grad_norm": 0.55078125, + "learning_rate": 0.00011919554819607359, + "loss": 0.9469, + "step": 34485 + }, + { + "epoch": 0.49, + "grad_norm": 0.609375, + "learning_rate": 0.00011917097697546303, + "loss": 1.0009, + "step": 34490 + }, + { + "epoch": 0.49, + "grad_norm": 0.65625, + "learning_rate": 0.00011914640455319377, + "loss": 0.8685, + "step": 34495 + }, + { + "epoch": 0.49, + "grad_norm": 0.71875, + "learning_rate": 0.00011912183093080611, + "loss": 0.942, + "step": 34500 + }, + { + "epoch": 0.49, + "grad_norm": 0.578125, + "learning_rate": 0.00011909725610984026, + "loss": 0.9429, + "step": 34505 + }, + { + "epoch": 0.5, + "grad_norm": 0.546875, + "learning_rate": 0.00011907268009183668, + "loss": 0.9046, + "step": 34510 + }, + { + "epoch": 0.5, + "grad_norm": 0.486328125, + "learning_rate": 0.00011904810287833579, + "loss": 0.9142, + "step": 34515 + }, + { + "epoch": 0.5, + "grad_norm": 0.58984375, + "learning_rate": 0.0001190235244708781, + "loss": 1.062, + "step": 34520 + }, + { + "epoch": 0.5, + "grad_norm": 0.5390625, + "learning_rate": 0.00011899894487100425, + "loss": 0.8536, + "step": 34525 + }, + { + "epoch": 0.5, + "grad_norm": 0.57421875, + "learning_rate": 0.00011897436408025488, + "loss": 1.0806, + "step": 34530 + }, + { + "epoch": 0.5, + "grad_norm": 0.546875, + "learning_rate": 0.00011894978210017076, + "loss": 1.1766, + "step": 34535 + }, + { + "epoch": 0.5, + "grad_norm": 0.52734375, + "learning_rate": 0.00011892519893229272, + "loss": 0.9887, + "step": 34540 + }, + { + "epoch": 0.5, + "grad_norm": 0.5, + "learning_rate": 0.00011890061457816167, + "loss": 0.9299, + "step": 34545 + }, + { + "epoch": 0.5, + "grad_norm": 0.5546875, + "learning_rate": 0.00011887602903931856, + "loss": 0.9877, + "step": 34550 + }, + { + "epoch": 0.5, + "grad_norm": 0.61328125, + "learning_rate": 0.00011885144231730445, + "loss": 0.8862, + "step": 34555 + }, + { + "epoch": 0.5, + "grad_norm": 0.53125, + "learning_rate": 0.00011882685441366046, + "loss": 0.9471, + "step": 34560 + }, + { + "epoch": 0.5, + "grad_norm": 0.55078125, + "learning_rate": 0.0001188022653299278, + "loss": 0.9956, + "step": 34565 + }, + { + "epoch": 0.5, + "grad_norm": 0.55859375, + "learning_rate": 0.00011877767506764774, + "loss": 1.0488, + "step": 34570 + }, + { + "epoch": 0.5, + "grad_norm": 0.66796875, + "learning_rate": 0.00011875308362836163, + "loss": 0.964, + "step": 34575 + }, + { + "epoch": 0.5, + "grad_norm": 0.482421875, + "learning_rate": 0.00011872849101361088, + "loss": 0.8103, + "step": 34580 + }, + { + "epoch": 0.5, + "grad_norm": 0.5078125, + "learning_rate": 0.00011870389722493698, + "loss": 0.9446, + "step": 34585 + }, + { + "epoch": 0.5, + "grad_norm": 0.439453125, + "learning_rate": 0.00011867930226388147, + "loss": 0.8547, + "step": 34590 + }, + { + "epoch": 0.5, + "grad_norm": 0.53125, + "learning_rate": 0.00011865470613198603, + "loss": 0.8854, + "step": 34595 + }, + { + "epoch": 0.5, + "grad_norm": 0.5234375, + "learning_rate": 0.0001186301088307924, + "loss": 1.0066, + "step": 34600 + }, + { + "epoch": 0.5, + "grad_norm": 0.58984375, + "learning_rate": 0.00011860551036184229, + "loss": 0.8916, + "step": 34605 + }, + { + "epoch": 0.5, + "grad_norm": 0.546875, + "learning_rate": 0.00011858091072667763, + "loss": 1.0072, + "step": 34610 + }, + { + "epoch": 0.5, + "grad_norm": 0.56640625, + "learning_rate": 0.00011855630992684028, + "loss": 0.8621, + "step": 34615 + }, + { + "epoch": 0.5, + "grad_norm": 1.8828125, + "learning_rate": 0.00011853170796387233, + "loss": 0.9904, + "step": 34620 + }, + { + "epoch": 0.5, + "grad_norm": 0.51953125, + "learning_rate": 0.00011850710483931581, + "loss": 1.0208, + "step": 34625 + }, + { + "epoch": 0.5, + "grad_norm": 0.66796875, + "learning_rate": 0.00011848250055471288, + "loss": 0.8806, + "step": 34630 + }, + { + "epoch": 0.5, + "grad_norm": 0.52734375, + "learning_rate": 0.00011845789511160579, + "loss": 0.9947, + "step": 34635 + }, + { + "epoch": 0.5, + "grad_norm": 0.58984375, + "learning_rate": 0.00011843328851153679, + "loss": 0.974, + "step": 34640 + }, + { + "epoch": 0.5, + "grad_norm": 0.6015625, + "learning_rate": 0.00011840868075604825, + "loss": 0.9877, + "step": 34645 + }, + { + "epoch": 0.5, + "grad_norm": 0.55859375, + "learning_rate": 0.00011838407184668265, + "loss": 0.9903, + "step": 34650 + }, + { + "epoch": 0.5, + "grad_norm": 0.640625, + "learning_rate": 0.0001183594617849825, + "loss": 0.9617, + "step": 34655 + }, + { + "epoch": 0.5, + "grad_norm": 0.5546875, + "learning_rate": 0.00011833485057249038, + "loss": 0.9091, + "step": 34660 + }, + { + "epoch": 0.5, + "grad_norm": 0.58203125, + "learning_rate": 0.00011831023821074893, + "loss": 0.9464, + "step": 34665 + }, + { + "epoch": 0.5, + "grad_norm": 0.52734375, + "learning_rate": 0.00011828562470130088, + "loss": 0.8344, + "step": 34670 + }, + { + "epoch": 0.5, + "grad_norm": 0.6640625, + "learning_rate": 0.00011826101004568908, + "loss": 0.9523, + "step": 34675 + }, + { + "epoch": 0.5, + "grad_norm": 0.55859375, + "learning_rate": 0.00011823639424545639, + "loss": 0.9397, + "step": 34680 + }, + { + "epoch": 0.5, + "grad_norm": 0.53125, + "learning_rate": 0.0001182117773021457, + "loss": 0.9879, + "step": 34685 + }, + { + "epoch": 0.5, + "grad_norm": 0.5078125, + "learning_rate": 0.00011818715921730006, + "loss": 0.9295, + "step": 34690 + }, + { + "epoch": 0.5, + "grad_norm": 0.625, + "learning_rate": 0.00011816253999246258, + "loss": 1.0233, + "step": 34695 + }, + { + "epoch": 0.5, + "grad_norm": 0.58984375, + "learning_rate": 0.00011813791962917642, + "loss": 1.0116, + "step": 34700 + }, + { + "epoch": 0.5, + "grad_norm": 0.62890625, + "learning_rate": 0.00011811329812898482, + "loss": 0.9946, + "step": 34705 + }, + { + "epoch": 0.5, + "grad_norm": 0.609375, + "learning_rate": 0.000118088675493431, + "loss": 1.0791, + "step": 34710 + }, + { + "epoch": 0.5, + "grad_norm": 0.5859375, + "learning_rate": 0.00011806405172405845, + "loss": 1.0352, + "step": 34715 + }, + { + "epoch": 0.5, + "grad_norm": 0.55859375, + "learning_rate": 0.00011803942682241053, + "loss": 0.8494, + "step": 34720 + }, + { + "epoch": 0.5, + "grad_norm": 0.515625, + "learning_rate": 0.0001180148007900308, + "loss": 0.955, + "step": 34725 + }, + { + "epoch": 0.5, + "grad_norm": 0.55078125, + "learning_rate": 0.00011799017362846287, + "loss": 1.0439, + "step": 34730 + }, + { + "epoch": 0.5, + "grad_norm": 0.58203125, + "learning_rate": 0.00011796554533925037, + "loss": 0.924, + "step": 34735 + }, + { + "epoch": 0.5, + "grad_norm": 0.5546875, + "learning_rate": 0.000117940915923937, + "loss": 0.8782, + "step": 34740 + }, + { + "epoch": 0.5, + "grad_norm": 0.5390625, + "learning_rate": 0.00011791628538406659, + "loss": 0.923, + "step": 34745 + }, + { + "epoch": 0.5, + "grad_norm": 0.482421875, + "learning_rate": 0.00011789165372118301, + "loss": 1.0874, + "step": 34750 + }, + { + "epoch": 0.5, + "grad_norm": 0.478515625, + "learning_rate": 0.00011786702093683018, + "loss": 0.7821, + "step": 34755 + }, + { + "epoch": 0.5, + "grad_norm": 0.53515625, + "learning_rate": 0.00011784238703255217, + "loss": 0.8877, + "step": 34760 + }, + { + "epoch": 0.5, + "grad_norm": 0.53515625, + "learning_rate": 0.00011781775200989298, + "loss": 0.8183, + "step": 34765 + }, + { + "epoch": 0.5, + "grad_norm": 0.53125, + "learning_rate": 0.0001177931158703968, + "loss": 0.933, + "step": 34770 + }, + { + "epoch": 0.5, + "grad_norm": 0.59765625, + "learning_rate": 0.0001177684786156079, + "loss": 1.0486, + "step": 34775 + }, + { + "epoch": 0.5, + "grad_norm": 0.53515625, + "learning_rate": 0.00011774384024707046, + "loss": 1.1156, + "step": 34780 + }, + { + "epoch": 0.5, + "grad_norm": 0.5546875, + "learning_rate": 0.00011771920076632897, + "loss": 1.1137, + "step": 34785 + }, + { + "epoch": 0.5, + "grad_norm": 0.5859375, + "learning_rate": 0.00011769456017492777, + "loss": 0.8434, + "step": 34790 + }, + { + "epoch": 0.5, + "grad_norm": 0.5859375, + "learning_rate": 0.00011766991847441136, + "loss": 1.0149, + "step": 34795 + }, + { + "epoch": 0.5, + "grad_norm": 0.62109375, + "learning_rate": 0.00011764527566632435, + "loss": 0.9178, + "step": 34800 + }, + { + "epoch": 0.5, + "grad_norm": 0.60546875, + "learning_rate": 0.00011762063175221139, + "loss": 0.9654, + "step": 34805 + }, + { + "epoch": 0.5, + "grad_norm": 0.458984375, + "learning_rate": 0.00011759598673361714, + "loss": 0.9076, + "step": 34810 + }, + { + "epoch": 0.5, + "grad_norm": 0.515625, + "learning_rate": 0.00011757134061208642, + "loss": 1.0185, + "step": 34815 + }, + { + "epoch": 0.5, + "grad_norm": 0.5390625, + "learning_rate": 0.00011754669338916401, + "loss": 0.9442, + "step": 34820 + }, + { + "epoch": 0.5, + "grad_norm": 0.578125, + "learning_rate": 0.00011752204506639493, + "loss": 0.9538, + "step": 34825 + }, + { + "epoch": 0.5, + "grad_norm": 0.546875, + "learning_rate": 0.00011749739564532407, + "loss": 1.0216, + "step": 34830 + }, + { + "epoch": 0.5, + "grad_norm": 0.5859375, + "learning_rate": 0.00011747274512749653, + "loss": 0.9645, + "step": 34835 + }, + { + "epoch": 0.5, + "grad_norm": 0.443359375, + "learning_rate": 0.00011744809351445747, + "loss": 0.9073, + "step": 34840 + }, + { + "epoch": 0.5, + "grad_norm": 0.54296875, + "learning_rate": 0.00011742344080775198, + "loss": 0.9201, + "step": 34845 + }, + { + "epoch": 0.5, + "grad_norm": 0.55859375, + "learning_rate": 0.00011739878700892541, + "loss": 1.1989, + "step": 34850 + }, + { + "epoch": 0.5, + "grad_norm": 0.52734375, + "learning_rate": 0.00011737413211952304, + "loss": 0.861, + "step": 34855 + }, + { + "epoch": 0.5, + "grad_norm": 0.59375, + "learning_rate": 0.00011734947614109029, + "loss": 1.0942, + "step": 34860 + }, + { + "epoch": 0.5, + "grad_norm": 0.5625, + "learning_rate": 0.00011732481907517261, + "loss": 1.0245, + "step": 34865 + }, + { + "epoch": 0.5, + "grad_norm": 0.83203125, + "learning_rate": 0.00011730016092331554, + "loss": 0.9959, + "step": 34870 + }, + { + "epoch": 0.5, + "grad_norm": 0.5546875, + "learning_rate": 0.00011727550168706467, + "loss": 0.9585, + "step": 34875 + }, + { + "epoch": 0.5, + "grad_norm": 0.62109375, + "learning_rate": 0.00011725084136796569, + "loss": 1.0257, + "step": 34880 + }, + { + "epoch": 0.5, + "grad_norm": 0.63671875, + "learning_rate": 0.00011722617996756433, + "loss": 0.9892, + "step": 34885 + }, + { + "epoch": 0.5, + "grad_norm": 0.51953125, + "learning_rate": 0.00011720151748740639, + "loss": 0.8525, + "step": 34890 + }, + { + "epoch": 0.5, + "grad_norm": 0.65234375, + "learning_rate": 0.00011717685392903774, + "loss": 1.065, + "step": 34895 + }, + { + "epoch": 0.5, + "grad_norm": 0.5546875, + "learning_rate": 0.0001171521892940043, + "loss": 0.9966, + "step": 34900 + }, + { + "epoch": 0.5, + "grad_norm": 0.65625, + "learning_rate": 0.00011712752358385216, + "loss": 0.9208, + "step": 34905 + }, + { + "epoch": 0.5, + "grad_norm": 0.55078125, + "learning_rate": 0.0001171028568001273, + "loss": 0.8902, + "step": 34910 + }, + { + "epoch": 0.5, + "grad_norm": 0.578125, + "learning_rate": 0.00011707818894437587, + "loss": 0.8528, + "step": 34915 + }, + { + "epoch": 0.5, + "grad_norm": 0.57421875, + "learning_rate": 0.00011705352001814415, + "loss": 1.0698, + "step": 34920 + }, + { + "epoch": 0.5, + "grad_norm": 0.5234375, + "learning_rate": 0.00011702885002297836, + "loss": 0.8962, + "step": 34925 + }, + { + "epoch": 0.5, + "grad_norm": 0.5078125, + "learning_rate": 0.00011700417896042484, + "loss": 1.0051, + "step": 34930 + }, + { + "epoch": 0.5, + "grad_norm": 0.55859375, + "learning_rate": 0.00011697950683203003, + "loss": 0.8504, + "step": 34935 + }, + { + "epoch": 0.5, + "grad_norm": 0.57421875, + "learning_rate": 0.00011695483363934038, + "loss": 0.8656, + "step": 34940 + }, + { + "epoch": 0.5, + "grad_norm": 0.5234375, + "learning_rate": 0.00011693015938390249, + "loss": 0.8445, + "step": 34945 + }, + { + "epoch": 0.5, + "grad_norm": 0.6015625, + "learning_rate": 0.0001169054840672629, + "loss": 1.068, + "step": 34950 + }, + { + "epoch": 0.5, + "grad_norm": 0.6328125, + "learning_rate": 0.00011688080769096827, + "loss": 1.0567, + "step": 34955 + }, + { + "epoch": 0.5, + "grad_norm": 0.609375, + "learning_rate": 0.00011685613025656543, + "loss": 1.0828, + "step": 34960 + }, + { + "epoch": 0.5, + "grad_norm": 0.5390625, + "learning_rate": 0.00011683145176560117, + "loss": 0.9981, + "step": 34965 + }, + { + "epoch": 0.5, + "grad_norm": 0.6796875, + "learning_rate": 0.00011680677221962233, + "loss": 0.963, + "step": 34970 + }, + { + "epoch": 0.5, + "grad_norm": 0.5703125, + "learning_rate": 0.00011678209162017586, + "loss": 0.9921, + "step": 34975 + }, + { + "epoch": 0.5, + "grad_norm": 0.578125, + "learning_rate": 0.00011675740996880877, + "loss": 0.9473, + "step": 34980 + }, + { + "epoch": 0.5, + "grad_norm": 0.61328125, + "learning_rate": 0.00011673272726706812, + "loss": 0.9033, + "step": 34985 + }, + { + "epoch": 0.5, + "grad_norm": 0.494140625, + "learning_rate": 0.00011670804351650109, + "loss": 0.9988, + "step": 34990 + }, + { + "epoch": 0.5, + "grad_norm": 0.5234375, + "learning_rate": 0.00011668335871865487, + "loss": 0.7865, + "step": 34995 + }, + { + "epoch": 0.5, + "grad_norm": 0.6171875, + "learning_rate": 0.00011665867287507672, + "loss": 0.9627, + "step": 35000 + }, + { + "epoch": 0.5, + "grad_norm": 0.59765625, + "learning_rate": 0.00011663398598731399, + "loss": 0.9706, + "step": 35005 + }, + { + "epoch": 0.5, + "grad_norm": 0.53515625, + "learning_rate": 0.00011660929805691402, + "loss": 0.8479, + "step": 35010 + }, + { + "epoch": 0.5, + "grad_norm": 0.52734375, + "learning_rate": 0.00011658460908542438, + "loss": 0.9575, + "step": 35015 + }, + { + "epoch": 0.5, + "grad_norm": 0.58984375, + "learning_rate": 0.00011655991907439257, + "loss": 1.0464, + "step": 35020 + }, + { + "epoch": 0.5, + "grad_norm": 0.6015625, + "learning_rate": 0.00011653522802536613, + "loss": 0.9209, + "step": 35025 + }, + { + "epoch": 0.5, + "grad_norm": 0.498046875, + "learning_rate": 0.0001165105359398928, + "loss": 0.855, + "step": 35030 + }, + { + "epoch": 0.5, + "grad_norm": 0.5625, + "learning_rate": 0.00011648584281952026, + "loss": 0.9487, + "step": 35035 + }, + { + "epoch": 0.5, + "grad_norm": 0.57421875, + "learning_rate": 0.00011646114866579632, + "loss": 0.9703, + "step": 35040 + }, + { + "epoch": 0.5, + "grad_norm": 0.51171875, + "learning_rate": 0.00011643645348026882, + "loss": 0.929, + "step": 35045 + }, + { + "epoch": 0.5, + "grad_norm": 0.609375, + "learning_rate": 0.0001164117572644857, + "loss": 0.9992, + "step": 35050 + }, + { + "epoch": 0.5, + "grad_norm": 0.53125, + "learning_rate": 0.00011638706001999495, + "loss": 0.8706, + "step": 35055 + }, + { + "epoch": 0.5, + "grad_norm": 0.55078125, + "learning_rate": 0.00011636236174834463, + "loss": 0.8983, + "step": 35060 + }, + { + "epoch": 0.5, + "grad_norm": 0.58203125, + "learning_rate": 0.0001163376624510828, + "loss": 1.0189, + "step": 35065 + }, + { + "epoch": 0.5, + "grad_norm": 0.609375, + "learning_rate": 0.00011631296212975771, + "loss": 0.9888, + "step": 35070 + }, + { + "epoch": 0.5, + "grad_norm": 0.62109375, + "learning_rate": 0.0001162882607859176, + "loss": 0.9474, + "step": 35075 + }, + { + "epoch": 0.5, + "grad_norm": 0.54296875, + "learning_rate": 0.0001162635584211107, + "loss": 0.9624, + "step": 35080 + }, + { + "epoch": 0.5, + "grad_norm": 0.5, + "learning_rate": 0.00011623885503688546, + "loss": 0.8918, + "step": 35085 + }, + { + "epoch": 0.5, + "grad_norm": 0.50390625, + "learning_rate": 0.00011621415063479028, + "loss": 0.8406, + "step": 35090 + }, + { + "epoch": 0.5, + "grad_norm": 0.58203125, + "learning_rate": 0.00011618944521637368, + "loss": 0.9026, + "step": 35095 + }, + { + "epoch": 0.5, + "grad_norm": 0.64453125, + "learning_rate": 0.00011616473878318424, + "loss": 1.0991, + "step": 35100 + }, + { + "epoch": 0.5, + "grad_norm": 0.53515625, + "learning_rate": 0.00011614003133677052, + "loss": 0.9176, + "step": 35105 + }, + { + "epoch": 0.5, + "grad_norm": 0.53515625, + "learning_rate": 0.00011611532287868128, + "loss": 0.9866, + "step": 35110 + }, + { + "epoch": 0.5, + "grad_norm": 0.640625, + "learning_rate": 0.00011609061341046523, + "loss": 0.9598, + "step": 35115 + }, + { + "epoch": 0.5, + "grad_norm": 0.5390625, + "learning_rate": 0.00011606590293367121, + "loss": 0.94, + "step": 35120 + }, + { + "epoch": 0.5, + "grad_norm": 0.546875, + "learning_rate": 0.0001160411914498481, + "loss": 0.9376, + "step": 35125 + }, + { + "epoch": 0.5, + "grad_norm": 0.64453125, + "learning_rate": 0.00011601647896054486, + "loss": 1.0, + "step": 35130 + }, + { + "epoch": 0.5, + "grad_norm": 0.5546875, + "learning_rate": 0.00011599176546731045, + "loss": 0.9325, + "step": 35135 + }, + { + "epoch": 0.5, + "grad_norm": 0.6015625, + "learning_rate": 0.000115967050971694, + "loss": 1.0462, + "step": 35140 + }, + { + "epoch": 0.5, + "grad_norm": 0.5234375, + "learning_rate": 0.00011594233547524458, + "loss": 1.0102, + "step": 35145 + }, + { + "epoch": 0.5, + "grad_norm": 0.60546875, + "learning_rate": 0.00011591761897951141, + "loss": 0.9795, + "step": 35150 + }, + { + "epoch": 0.5, + "grad_norm": 0.67578125, + "learning_rate": 0.0001158929014860438, + "loss": 0.9715, + "step": 35155 + }, + { + "epoch": 0.5, + "grad_norm": 0.609375, + "learning_rate": 0.00011586818299639097, + "loss": 0.9373, + "step": 35160 + }, + { + "epoch": 0.5, + "grad_norm": 0.55859375, + "learning_rate": 0.00011584346351210238, + "loss": 0.8551, + "step": 35165 + }, + { + "epoch": 0.5, + "grad_norm": 0.5703125, + "learning_rate": 0.00011581874303472746, + "loss": 1.0983, + "step": 35170 + }, + { + "epoch": 0.5, + "grad_norm": 0.57421875, + "learning_rate": 0.00011579402156581568, + "loss": 0.9248, + "step": 35175 + }, + { + "epoch": 0.5, + "grad_norm": 0.51171875, + "learning_rate": 0.0001157692991069167, + "loss": 0.7816, + "step": 35180 + }, + { + "epoch": 0.5, + "grad_norm": 0.625, + "learning_rate": 0.00011574457565958003, + "loss": 0.9678, + "step": 35185 + }, + { + "epoch": 0.5, + "grad_norm": 0.55859375, + "learning_rate": 0.00011571985122535547, + "loss": 1.0563, + "step": 35190 + }, + { + "epoch": 0.5, + "grad_norm": 0.58984375, + "learning_rate": 0.00011569512580579271, + "loss": 0.9766, + "step": 35195 + }, + { + "epoch": 0.5, + "grad_norm": 0.54296875, + "learning_rate": 0.0001156703994024416, + "loss": 1.0294, + "step": 35200 + }, + { + "epoch": 0.5, + "grad_norm": 0.47265625, + "learning_rate": 0.00011564567201685202, + "loss": 0.908, + "step": 35205 + }, + { + "epoch": 0.51, + "grad_norm": 0.6171875, + "learning_rate": 0.00011562094365057388, + "loss": 0.9647, + "step": 35210 + }, + { + "epoch": 0.51, + "grad_norm": 0.54296875, + "learning_rate": 0.00011559621430515717, + "loss": 0.7352, + "step": 35215 + }, + { + "epoch": 0.51, + "grad_norm": 0.59375, + "learning_rate": 0.00011557148398215203, + "loss": 0.8482, + "step": 35220 + }, + { + "epoch": 0.51, + "grad_norm": 0.5234375, + "learning_rate": 0.00011554675268310853, + "loss": 0.8756, + "step": 35225 + }, + { + "epoch": 0.51, + "grad_norm": 0.59375, + "learning_rate": 0.00011552202040957684, + "loss": 0.926, + "step": 35230 + }, + { + "epoch": 0.51, + "grad_norm": 0.5625, + "learning_rate": 0.00011549728716310728, + "loss": 0.7878, + "step": 35235 + }, + { + "epoch": 0.51, + "grad_norm": 0.57421875, + "learning_rate": 0.00011547255294525006, + "loss": 1.1045, + "step": 35240 + }, + { + "epoch": 0.51, + "grad_norm": 0.5390625, + "learning_rate": 0.00011544781775755565, + "loss": 0.9171, + "step": 35245 + }, + { + "epoch": 0.51, + "grad_norm": 0.58984375, + "learning_rate": 0.0001154230816015744, + "loss": 0.8814, + "step": 35250 + }, + { + "epoch": 0.51, + "grad_norm": 0.5625, + "learning_rate": 0.0001153983444788568, + "loss": 0.7096, + "step": 35255 + }, + { + "epoch": 0.51, + "grad_norm": 0.9296875, + "learning_rate": 0.00011537360639095349, + "loss": 1.052, + "step": 35260 + }, + { + "epoch": 0.51, + "grad_norm": 0.5859375, + "learning_rate": 0.00011534886733941502, + "loss": 0.8847, + "step": 35265 + }, + { + "epoch": 0.51, + "grad_norm": 0.5859375, + "learning_rate": 0.00011532412732579201, + "loss": 0.9098, + "step": 35270 + }, + { + "epoch": 0.51, + "grad_norm": 0.53515625, + "learning_rate": 0.00011529938635163529, + "loss": 0.9207, + "step": 35275 + }, + { + "epoch": 0.51, + "grad_norm": 0.56640625, + "learning_rate": 0.0001152746444184956, + "loss": 0.8311, + "step": 35280 + }, + { + "epoch": 0.51, + "grad_norm": 0.56640625, + "learning_rate": 0.00011524990152792381, + "loss": 0.9728, + "step": 35285 + }, + { + "epoch": 0.51, + "grad_norm": 0.5390625, + "learning_rate": 0.00011522515768147082, + "loss": 0.846, + "step": 35290 + }, + { + "epoch": 0.51, + "grad_norm": 0.640625, + "learning_rate": 0.00011520041288068757, + "loss": 0.919, + "step": 35295 + }, + { + "epoch": 0.51, + "grad_norm": 0.55078125, + "learning_rate": 0.00011517566712712516, + "loss": 1.0759, + "step": 35300 + }, + { + "epoch": 0.51, + "grad_norm": 0.60546875, + "learning_rate": 0.00011515092042233466, + "loss": 0.9652, + "step": 35305 + }, + { + "epoch": 0.51, + "grad_norm": 0.625, + "learning_rate": 0.00011512617276786719, + "loss": 1.0368, + "step": 35310 + }, + { + "epoch": 0.51, + "grad_norm": 0.59765625, + "learning_rate": 0.00011510142416527401, + "loss": 1.1348, + "step": 35315 + }, + { + "epoch": 0.51, + "grad_norm": 0.64453125, + "learning_rate": 0.00011507667461610637, + "loss": 0.8869, + "step": 35320 + }, + { + "epoch": 0.51, + "grad_norm": 0.5703125, + "learning_rate": 0.00011505192412191554, + "loss": 0.9564, + "step": 35325 + }, + { + "epoch": 0.51, + "grad_norm": 0.62109375, + "learning_rate": 0.000115027172684253, + "loss": 1.0288, + "step": 35330 + }, + { + "epoch": 0.51, + "grad_norm": 0.56640625, + "learning_rate": 0.00011500242030467017, + "loss": 0.8114, + "step": 35335 + }, + { + "epoch": 0.51, + "grad_norm": 0.578125, + "learning_rate": 0.00011497766698471852, + "loss": 1.0091, + "step": 35340 + }, + { + "epoch": 0.51, + "grad_norm": 0.5390625, + "learning_rate": 0.00011495291272594968, + "loss": 0.9346, + "step": 35345 + }, + { + "epoch": 0.51, + "grad_norm": 0.578125, + "learning_rate": 0.00011492815752991521, + "loss": 1.0135, + "step": 35350 + }, + { + "epoch": 0.51, + "grad_norm": 0.5703125, + "learning_rate": 0.00011490340139816684, + "loss": 0.8858, + "step": 35355 + }, + { + "epoch": 0.51, + "grad_norm": 0.6875, + "learning_rate": 0.0001148786443322563, + "loss": 0.9391, + "step": 35360 + }, + { + "epoch": 0.51, + "grad_norm": 0.6015625, + "learning_rate": 0.00011485388633373538, + "loss": 0.9083, + "step": 35365 + }, + { + "epoch": 0.51, + "grad_norm": 0.5, + "learning_rate": 0.00011482912740415595, + "loss": 0.86, + "step": 35370 + }, + { + "epoch": 0.51, + "grad_norm": 0.58984375, + "learning_rate": 0.00011480436754506993, + "loss": 0.9986, + "step": 35375 + }, + { + "epoch": 0.51, + "grad_norm": 0.53125, + "learning_rate": 0.00011477960675802926, + "loss": 0.8895, + "step": 35380 + }, + { + "epoch": 0.51, + "grad_norm": 0.5703125, + "learning_rate": 0.00011475484504458604, + "loss": 1.0366, + "step": 35385 + }, + { + "epoch": 0.51, + "grad_norm": 0.498046875, + "learning_rate": 0.00011473008240629233, + "loss": 0.8077, + "step": 35390 + }, + { + "epoch": 0.51, + "grad_norm": 0.51171875, + "learning_rate": 0.00011470531884470026, + "loss": 0.9371, + "step": 35395 + }, + { + "epoch": 0.51, + "grad_norm": 0.474609375, + "learning_rate": 0.00011468055436136209, + "loss": 0.892, + "step": 35400 + }, + { + "epoch": 0.51, + "grad_norm": 0.71484375, + "learning_rate": 0.00011465578895783001, + "loss": 0.9811, + "step": 35405 + }, + { + "epoch": 0.51, + "grad_norm": 0.57421875, + "learning_rate": 0.00011463102263565639, + "loss": 0.9964, + "step": 35410 + }, + { + "epoch": 0.51, + "grad_norm": 0.60546875, + "learning_rate": 0.00011460625539639367, + "loss": 0.9848, + "step": 35415 + }, + { + "epoch": 0.51, + "grad_norm": 0.5703125, + "learning_rate": 0.00011458148724159414, + "loss": 0.9357, + "step": 35420 + }, + { + "epoch": 0.51, + "grad_norm": 0.50390625, + "learning_rate": 0.00011455671817281044, + "loss": 1.0116, + "step": 35425 + }, + { + "epoch": 0.51, + "grad_norm": 0.640625, + "learning_rate": 0.00011453194819159506, + "loss": 0.9715, + "step": 35430 + }, + { + "epoch": 0.51, + "grad_norm": 0.59375, + "learning_rate": 0.00011450717729950058, + "loss": 0.901, + "step": 35435 + }, + { + "epoch": 0.51, + "grad_norm": 0.5703125, + "learning_rate": 0.00011448240549807974, + "loss": 0.9623, + "step": 35440 + }, + { + "epoch": 0.51, + "grad_norm": 0.5078125, + "learning_rate": 0.00011445763278888522, + "loss": 0.9551, + "step": 35445 + }, + { + "epoch": 0.51, + "grad_norm": 0.6328125, + "learning_rate": 0.00011443285917346981, + "loss": 1.0564, + "step": 35450 + }, + { + "epoch": 0.51, + "grad_norm": 0.490234375, + "learning_rate": 0.00011440808465338634, + "loss": 0.9243, + "step": 35455 + }, + { + "epoch": 0.51, + "grad_norm": 0.5234375, + "learning_rate": 0.00011438330923018771, + "loss": 0.8432, + "step": 35460 + }, + { + "epoch": 0.51, + "grad_norm": 0.53515625, + "learning_rate": 0.0001143585329054269, + "loss": 0.925, + "step": 35465 + }, + { + "epoch": 0.51, + "grad_norm": 0.5625, + "learning_rate": 0.00011433375568065692, + "loss": 0.9936, + "step": 35470 + }, + { + "epoch": 0.51, + "grad_norm": 0.5390625, + "learning_rate": 0.00011430897755743075, + "loss": 0.8261, + "step": 35475 + }, + { + "epoch": 0.51, + "grad_norm": 0.6640625, + "learning_rate": 0.0001142841985373016, + "loss": 1.0874, + "step": 35480 + }, + { + "epoch": 0.51, + "grad_norm": 0.53125, + "learning_rate": 0.00011425941862182261, + "loss": 0.8031, + "step": 35485 + }, + { + "epoch": 0.51, + "grad_norm": 0.53125, + "learning_rate": 0.00011423463781254702, + "loss": 0.9591, + "step": 35490 + }, + { + "epoch": 0.51, + "grad_norm": 0.61328125, + "learning_rate": 0.00011420985611102814, + "loss": 0.8532, + "step": 35495 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011418507351881927, + "loss": 0.8923, + "step": 35500 + }, + { + "epoch": 0.51, + "grad_norm": 0.7109375, + "learning_rate": 0.00011416029003747383, + "loss": 1.0846, + "step": 35505 + }, + { + "epoch": 0.51, + "grad_norm": 0.625, + "learning_rate": 0.00011413550566854532, + "loss": 0.9509, + "step": 35510 + }, + { + "epoch": 0.51, + "grad_norm": 0.66015625, + "learning_rate": 0.00011411072041358717, + "loss": 0.8973, + "step": 35515 + }, + { + "epoch": 0.51, + "grad_norm": 0.5078125, + "learning_rate": 0.00011408593427415304, + "loss": 1.0902, + "step": 35520 + }, + { + "epoch": 0.51, + "grad_norm": 0.53515625, + "learning_rate": 0.00011406114725179647, + "loss": 0.8178, + "step": 35525 + }, + { + "epoch": 0.51, + "grad_norm": 0.52734375, + "learning_rate": 0.0001140363593480712, + "loss": 1.0575, + "step": 35530 + }, + { + "epoch": 0.51, + "grad_norm": 0.53515625, + "learning_rate": 0.00011401157056453091, + "loss": 0.9071, + "step": 35535 + }, + { + "epoch": 0.51, + "grad_norm": 0.66015625, + "learning_rate": 0.00011398678090272945, + "loss": 1.016, + "step": 35540 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011396199036422062, + "loss": 0.9777, + "step": 35545 + }, + { + "epoch": 0.51, + "grad_norm": 0.609375, + "learning_rate": 0.00011393719895055834, + "loss": 0.8702, + "step": 35550 + }, + { + "epoch": 0.51, + "grad_norm": 0.60546875, + "learning_rate": 0.00011391240666329655, + "loss": 0.83, + "step": 35555 + }, + { + "epoch": 0.51, + "grad_norm": 0.51953125, + "learning_rate": 0.00011388761350398927, + "loss": 1.0824, + "step": 35560 + }, + { + "epoch": 0.51, + "grad_norm": 0.5703125, + "learning_rate": 0.00011386281947419055, + "loss": 0.9833, + "step": 35565 + }, + { + "epoch": 0.51, + "grad_norm": 0.53125, + "learning_rate": 0.00011383802457545452, + "loss": 1.0281, + "step": 35570 + }, + { + "epoch": 0.51, + "grad_norm": 0.828125, + "learning_rate": 0.00011381322880933536, + "loss": 0.8782, + "step": 35575 + }, + { + "epoch": 0.51, + "grad_norm": 0.466796875, + "learning_rate": 0.00011378843217738726, + "loss": 1.0211, + "step": 35580 + }, + { + "epoch": 0.51, + "grad_norm": 0.53515625, + "learning_rate": 0.00011376363468116456, + "loss": 0.956, + "step": 35585 + }, + { + "epoch": 0.51, + "grad_norm": 0.546875, + "learning_rate": 0.00011373883632222156, + "loss": 0.8512, + "step": 35590 + }, + { + "epoch": 0.51, + "grad_norm": 0.54296875, + "learning_rate": 0.00011371403710211262, + "loss": 0.9951, + "step": 35595 + }, + { + "epoch": 0.51, + "grad_norm": 0.50390625, + "learning_rate": 0.00011368923702239225, + "loss": 0.9283, + "step": 35600 + }, + { + "epoch": 0.51, + "grad_norm": 0.5078125, + "learning_rate": 0.0001136644360846149, + "loss": 0.8853, + "step": 35605 + }, + { + "epoch": 0.51, + "grad_norm": 0.5703125, + "learning_rate": 0.00011363963429033513, + "loss": 0.865, + "step": 35610 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011361483164110756, + "loss": 0.962, + "step": 35615 + }, + { + "epoch": 0.51, + "grad_norm": 0.515625, + "learning_rate": 0.00011359002813848682, + "loss": 0.9415, + "step": 35620 + }, + { + "epoch": 0.51, + "grad_norm": 0.6171875, + "learning_rate": 0.00011356522378402765, + "loss": 1.0479, + "step": 35625 + }, + { + "epoch": 0.51, + "grad_norm": 0.51953125, + "learning_rate": 0.00011354041857928481, + "loss": 0.9286, + "step": 35630 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011351561252581308, + "loss": 0.978, + "step": 35635 + }, + { + "epoch": 0.51, + "grad_norm": 0.50390625, + "learning_rate": 0.00011349080562516738, + "loss": 0.9409, + "step": 35640 + }, + { + "epoch": 0.51, + "grad_norm": 0.5625, + "learning_rate": 0.00011346599787890264, + "loss": 0.8311, + "step": 35645 + }, + { + "epoch": 0.51, + "grad_norm": 0.58203125, + "learning_rate": 0.00011344118928857379, + "loss": 0.9399, + "step": 35650 + }, + { + "epoch": 0.51, + "grad_norm": 0.5859375, + "learning_rate": 0.00011341637985573592, + "loss": 0.987, + "step": 35655 + }, + { + "epoch": 0.51, + "grad_norm": 0.484375, + "learning_rate": 0.00011339156958194405, + "loss": 1.0669, + "step": 35660 + }, + { + "epoch": 0.51, + "grad_norm": 0.65234375, + "learning_rate": 0.00011336675846875335, + "loss": 1.0025, + "step": 35665 + }, + { + "epoch": 0.51, + "grad_norm": 0.57421875, + "learning_rate": 0.000113341946517719, + "loss": 0.9605, + "step": 35670 + }, + { + "epoch": 0.51, + "grad_norm": 0.494140625, + "learning_rate": 0.00011331713373039628, + "loss": 0.9151, + "step": 35675 + }, + { + "epoch": 0.51, + "grad_norm": 0.5859375, + "learning_rate": 0.00011329232010834043, + "loss": 0.8997, + "step": 35680 + }, + { + "epoch": 0.51, + "grad_norm": 0.5703125, + "learning_rate": 0.00011326750565310681, + "loss": 0.9927, + "step": 35685 + }, + { + "epoch": 0.51, + "grad_norm": 0.56640625, + "learning_rate": 0.00011324269036625082, + "loss": 1.0114, + "step": 35690 + }, + { + "epoch": 0.51, + "grad_norm": 0.52734375, + "learning_rate": 0.00011321787424932793, + "loss": 0.9672, + "step": 35695 + }, + { + "epoch": 0.51, + "grad_norm": 0.55078125, + "learning_rate": 0.00011319305730389363, + "loss": 0.9736, + "step": 35700 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011316823953150349, + "loss": 0.8902, + "step": 35705 + }, + { + "epoch": 0.51, + "grad_norm": 0.546875, + "learning_rate": 0.00011314342093371307, + "loss": 0.8646, + "step": 35710 + }, + { + "epoch": 0.51, + "grad_norm": 0.51171875, + "learning_rate": 0.00011311860151207807, + "loss": 0.9136, + "step": 35715 + }, + { + "epoch": 0.51, + "grad_norm": 0.54296875, + "learning_rate": 0.00011309378126815416, + "loss": 1.0051, + "step": 35720 + }, + { + "epoch": 0.51, + "grad_norm": 0.53515625, + "learning_rate": 0.00011306896020349715, + "loss": 0.8496, + "step": 35725 + }, + { + "epoch": 0.51, + "grad_norm": 0.64453125, + "learning_rate": 0.00011304413831966282, + "loss": 0.9563, + "step": 35730 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011301931561820706, + "loss": 1.019, + "step": 35735 + }, + { + "epoch": 0.51, + "grad_norm": 0.52734375, + "learning_rate": 0.00011299449210068577, + "loss": 1.0119, + "step": 35740 + }, + { + "epoch": 0.51, + "grad_norm": 0.5234375, + "learning_rate": 0.0001129696677686549, + "loss": 0.8935, + "step": 35745 + }, + { + "epoch": 0.51, + "grad_norm": 0.49609375, + "learning_rate": 0.00011294484262367049, + "loss": 1.0451, + "step": 35750 + }, + { + "epoch": 0.51, + "grad_norm": 0.55078125, + "learning_rate": 0.0001129200166672886, + "loss": 0.9686, + "step": 35755 + }, + { + "epoch": 0.51, + "grad_norm": 0.515625, + "learning_rate": 0.00011289518990106535, + "loss": 0.7722, + "step": 35760 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011287036232655692, + "loss": 0.9714, + "step": 35765 + }, + { + "epoch": 0.51, + "grad_norm": 0.546875, + "learning_rate": 0.00011284553394531951, + "loss": 1.0167, + "step": 35770 + }, + { + "epoch": 0.51, + "grad_norm": 0.53515625, + "learning_rate": 0.0001128207047589094, + "loss": 0.8774, + "step": 35775 + }, + { + "epoch": 0.51, + "grad_norm": 0.63671875, + "learning_rate": 0.00011279587476888292, + "loss": 1.0096, + "step": 35780 + }, + { + "epoch": 0.51, + "grad_norm": 0.609375, + "learning_rate": 0.00011277104397679646, + "loss": 1.0358, + "step": 35785 + }, + { + "epoch": 0.51, + "grad_norm": 0.49609375, + "learning_rate": 0.00011274621238420637, + "loss": 0.9213, + "step": 35790 + }, + { + "epoch": 0.51, + "grad_norm": 0.5703125, + "learning_rate": 0.0001127213799926692, + "loss": 0.9913, + "step": 35795 + }, + { + "epoch": 0.51, + "grad_norm": 0.625, + "learning_rate": 0.00011269654680374144, + "loss": 0.8914, + "step": 35800 + }, + { + "epoch": 0.51, + "grad_norm": 0.5390625, + "learning_rate": 0.00011267171281897966, + "loss": 0.8998, + "step": 35805 + }, + { + "epoch": 0.51, + "grad_norm": 0.5625, + "learning_rate": 0.00011264687803994052, + "loss": 0.8822, + "step": 35810 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011262204246818062, + "loss": 0.8956, + "step": 35815 + }, + { + "epoch": 0.51, + "grad_norm": 0.5234375, + "learning_rate": 0.00011259720610525674, + "loss": 0.895, + "step": 35820 + }, + { + "epoch": 0.51, + "grad_norm": 0.5234375, + "learning_rate": 0.00011257236895272565, + "loss": 0.9545, + "step": 35825 + }, + { + "epoch": 0.51, + "grad_norm": 0.58203125, + "learning_rate": 0.00011254753101214412, + "loss": 1.0074, + "step": 35830 + }, + { + "epoch": 0.51, + "grad_norm": 0.53515625, + "learning_rate": 0.00011252269228506909, + "loss": 0.8765, + "step": 35835 + }, + { + "epoch": 0.51, + "grad_norm": 0.7890625, + "learning_rate": 0.00011249785277305743, + "loss": 0.9155, + "step": 35840 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011247301247766611, + "loss": 0.9397, + "step": 35845 + }, + { + "epoch": 0.51, + "grad_norm": 0.55859375, + "learning_rate": 0.00011244817140045219, + "loss": 1.038, + "step": 35850 + }, + { + "epoch": 0.51, + "grad_norm": 0.55859375, + "learning_rate": 0.00011242332954297268, + "loss": 0.9239, + "step": 35855 + }, + { + "epoch": 0.51, + "grad_norm": 0.546875, + "learning_rate": 0.00011239848690678474, + "loss": 0.9346, + "step": 35860 + }, + { + "epoch": 0.51, + "grad_norm": 0.51171875, + "learning_rate": 0.00011237364349344553, + "loss": 1.0483, + "step": 35865 + }, + { + "epoch": 0.51, + "grad_norm": 0.625, + "learning_rate": 0.00011234879930451223, + "loss": 0.8625, + "step": 35870 + }, + { + "epoch": 0.51, + "grad_norm": 0.484375, + "learning_rate": 0.00011232395434154215, + "loss": 0.8454, + "step": 35875 + }, + { + "epoch": 0.51, + "grad_norm": 0.5546875, + "learning_rate": 0.00011229910860609256, + "loss": 0.85, + "step": 35880 + }, + { + "epoch": 0.51, + "grad_norm": 0.515625, + "learning_rate": 0.00011227426209972082, + "loss": 0.9926, + "step": 35885 + }, + { + "epoch": 0.51, + "grad_norm": 0.671875, + "learning_rate": 0.00011224941482398441, + "loss": 1.1048, + "step": 35890 + }, + { + "epoch": 0.51, + "grad_norm": 0.55078125, + "learning_rate": 0.00011222456678044067, + "loss": 0.8748, + "step": 35895 + }, + { + "epoch": 0.51, + "grad_norm": 0.57421875, + "learning_rate": 0.00011219971797064718, + "loss": 1.0172, + "step": 35900 + }, + { + "epoch": 0.52, + "grad_norm": 0.578125, + "learning_rate": 0.0001121748683961615, + "loss": 1.0032, + "step": 35905 + }, + { + "epoch": 0.52, + "grad_norm": 0.478515625, + "learning_rate": 0.00011215001805854116, + "loss": 0.9065, + "step": 35910 + }, + { + "epoch": 0.52, + "grad_norm": 0.55078125, + "learning_rate": 0.00011212516695934388, + "loss": 0.9394, + "step": 35915 + }, + { + "epoch": 0.52, + "grad_norm": 0.515625, + "learning_rate": 0.0001121003151001273, + "loss": 0.9367, + "step": 35920 + }, + { + "epoch": 0.52, + "grad_norm": 0.59765625, + "learning_rate": 0.0001120754624824492, + "loss": 0.9802, + "step": 35925 + }, + { + "epoch": 0.52, + "grad_norm": 0.6640625, + "learning_rate": 0.00011205060910786737, + "loss": 0.8837, + "step": 35930 + }, + { + "epoch": 0.52, + "grad_norm": 0.5625, + "learning_rate": 0.00011202575497793961, + "loss": 1.0204, + "step": 35935 + }, + { + "epoch": 0.52, + "grad_norm": 0.546875, + "learning_rate": 0.00011200090009422388, + "loss": 0.9781, + "step": 35940 + }, + { + "epoch": 0.52, + "grad_norm": 0.55859375, + "learning_rate": 0.00011197604445827803, + "loss": 0.9268, + "step": 35945 + }, + { + "epoch": 0.52, + "grad_norm": 0.6171875, + "learning_rate": 0.00011195118807166008, + "loss": 0.7944, + "step": 35950 + }, + { + "epoch": 0.52, + "grad_norm": 0.58984375, + "learning_rate": 0.00011192633093592803, + "loss": 0.8921, + "step": 35955 + }, + { + "epoch": 0.52, + "grad_norm": 0.50390625, + "learning_rate": 0.00011190147305264, + "loss": 1.0664, + "step": 35960 + }, + { + "epoch": 0.52, + "grad_norm": 0.59765625, + "learning_rate": 0.00011187661442335407, + "loss": 0.9301, + "step": 35965 + }, + { + "epoch": 0.52, + "grad_norm": 0.53125, + "learning_rate": 0.00011185175504962846, + "loss": 0.833, + "step": 35970 + }, + { + "epoch": 0.52, + "grad_norm": 0.53125, + "learning_rate": 0.00011182689493302128, + "loss": 0.8505, + "step": 35975 + }, + { + "epoch": 0.52, + "grad_norm": 0.55859375, + "learning_rate": 0.0001118020340750909, + "loss": 0.9573, + "step": 35980 + }, + { + "epoch": 0.52, + "grad_norm": 0.60546875, + "learning_rate": 0.00011177717247739559, + "loss": 0.9987, + "step": 35985 + }, + { + "epoch": 0.52, + "grad_norm": 0.5078125, + "learning_rate": 0.00011175231014149366, + "loss": 0.8967, + "step": 35990 + }, + { + "epoch": 0.52, + "grad_norm": 0.609375, + "learning_rate": 0.00011172744706894357, + "loss": 0.9154, + "step": 35995 + }, + { + "epoch": 0.52, + "grad_norm": 0.515625, + "learning_rate": 0.00011170258326130374, + "loss": 0.8346, + "step": 36000 + }, + { + "epoch": 0.52, + "grad_norm": 0.6328125, + "learning_rate": 0.00011167771872013264, + "loss": 1.0448, + "step": 36005 + }, + { + "epoch": 0.52, + "grad_norm": 0.62109375, + "learning_rate": 0.00011165285344698886, + "loss": 0.912, + "step": 36010 + }, + { + "epoch": 0.52, + "grad_norm": 0.56640625, + "learning_rate": 0.00011162798744343094, + "loss": 0.8778, + "step": 36015 + }, + { + "epoch": 0.52, + "grad_norm": 0.54296875, + "learning_rate": 0.00011160312071101755, + "loss": 0.9186, + "step": 36020 + }, + { + "epoch": 0.52, + "grad_norm": 0.5234375, + "learning_rate": 0.0001115782532513073, + "loss": 0.8453, + "step": 36025 + }, + { + "epoch": 0.52, + "grad_norm": 0.58203125, + "learning_rate": 0.00011155338506585895, + "loss": 1.0438, + "step": 36030 + }, + { + "epoch": 0.52, + "grad_norm": 0.578125, + "learning_rate": 0.0001115285161562313, + "loss": 1.0465, + "step": 36035 + }, + { + "epoch": 0.52, + "grad_norm": 0.60546875, + "learning_rate": 0.0001115036465239831, + "loss": 0.6984, + "step": 36040 + }, + { + "epoch": 0.52, + "grad_norm": 0.486328125, + "learning_rate": 0.00011147877617067326, + "loss": 0.9824, + "step": 36045 + }, + { + "epoch": 0.52, + "grad_norm": 0.55859375, + "learning_rate": 0.00011145390509786065, + "loss": 1.0159, + "step": 36050 + }, + { + "epoch": 0.52, + "grad_norm": 0.546875, + "learning_rate": 0.00011142903330710422, + "loss": 0.8533, + "step": 36055 + }, + { + "epoch": 0.52, + "grad_norm": 0.470703125, + "learning_rate": 0.00011140416079996294, + "loss": 0.8495, + "step": 36060 + }, + { + "epoch": 0.52, + "grad_norm": 0.6171875, + "learning_rate": 0.0001113792875779959, + "loss": 0.9059, + "step": 36065 + }, + { + "epoch": 0.52, + "grad_norm": 0.95703125, + "learning_rate": 0.00011135441364276214, + "loss": 0.9416, + "step": 36070 + }, + { + "epoch": 0.52, + "grad_norm": 0.5390625, + "learning_rate": 0.00011132953899582081, + "loss": 1.1856, + "step": 36075 + }, + { + "epoch": 0.52, + "grad_norm": 0.5703125, + "learning_rate": 0.00011130466363873109, + "loss": 0.9599, + "step": 36080 + }, + { + "epoch": 0.52, + "grad_norm": 0.58203125, + "learning_rate": 0.00011127978757305213, + "loss": 0.8942, + "step": 36085 + }, + { + "epoch": 0.52, + "grad_norm": 0.5078125, + "learning_rate": 0.00011125491080034327, + "loss": 0.9854, + "step": 36090 + }, + { + "epoch": 0.52, + "grad_norm": 0.6328125, + "learning_rate": 0.0001112300333221638, + "loss": 0.9776, + "step": 36095 + }, + { + "epoch": 0.52, + "grad_norm": 0.5390625, + "learning_rate": 0.00011120515514007302, + "loss": 0.9301, + "step": 36100 + }, + { + "epoch": 0.52, + "grad_norm": 0.53515625, + "learning_rate": 0.00011118027625563037, + "loss": 1.0208, + "step": 36105 + }, + { + "epoch": 0.52, + "grad_norm": 0.58203125, + "learning_rate": 0.00011115539667039528, + "loss": 0.8565, + "step": 36110 + }, + { + "epoch": 0.52, + "grad_norm": 0.578125, + "learning_rate": 0.00011113051638592717, + "loss": 0.9049, + "step": 36115 + }, + { + "epoch": 0.52, + "grad_norm": 0.5546875, + "learning_rate": 0.00011110563540378567, + "loss": 0.9627, + "step": 36120 + }, + { + "epoch": 0.52, + "grad_norm": 0.57421875, + "learning_rate": 0.00011108075372553029, + "loss": 0.9632, + "step": 36125 + }, + { + "epoch": 0.52, + "grad_norm": 0.5625, + "learning_rate": 0.00011105587135272064, + "loss": 0.9762, + "step": 36130 + }, + { + "epoch": 0.52, + "grad_norm": 0.494140625, + "learning_rate": 0.00011103098828691639, + "loss": 0.9035, + "step": 36135 + }, + { + "epoch": 0.52, + "grad_norm": 0.59375, + "learning_rate": 0.00011100610452967719, + "loss": 0.9729, + "step": 36140 + }, + { + "epoch": 0.52, + "grad_norm": 0.66015625, + "learning_rate": 0.00011098122008256286, + "loss": 0.9325, + "step": 36145 + }, + { + "epoch": 0.52, + "grad_norm": 0.52734375, + "learning_rate": 0.00011095633494713315, + "loss": 0.9717, + "step": 36150 + }, + { + "epoch": 0.52, + "grad_norm": 0.65625, + "learning_rate": 0.00011093144912494788, + "loss": 1.0258, + "step": 36155 + }, + { + "epoch": 0.52, + "grad_norm": 0.546875, + "learning_rate": 0.00011090656261756694, + "loss": 0.946, + "step": 36160 + }, + { + "epoch": 0.52, + "grad_norm": 0.56640625, + "learning_rate": 0.00011088167542655022, + "loss": 0.8508, + "step": 36165 + }, + { + "epoch": 0.52, + "grad_norm": 0.609375, + "learning_rate": 0.00011085678755345768, + "loss": 0.9069, + "step": 36170 + }, + { + "epoch": 0.52, + "grad_norm": 0.55859375, + "learning_rate": 0.00011083189899984933, + "loss": 0.8887, + "step": 36175 + }, + { + "epoch": 0.52, + "grad_norm": 0.50390625, + "learning_rate": 0.00011080700976728525, + "loss": 1.1359, + "step": 36180 + }, + { + "epoch": 0.52, + "grad_norm": 0.58203125, + "learning_rate": 0.00011078211985732545, + "loss": 0.9425, + "step": 36185 + }, + { + "epoch": 0.52, + "grad_norm": 0.6015625, + "learning_rate": 0.00011075722927153012, + "loss": 0.8805, + "step": 36190 + }, + { + "epoch": 0.52, + "grad_norm": 0.61328125, + "learning_rate": 0.0001107323380114594, + "loss": 1.0638, + "step": 36195 + }, + { + "epoch": 0.52, + "grad_norm": 0.55078125, + "learning_rate": 0.00011070744607867352, + "loss": 0.8841, + "step": 36200 + }, + { + "epoch": 0.52, + "grad_norm": 0.482421875, + "learning_rate": 0.00011068255347473271, + "loss": 0.8163, + "step": 36205 + }, + { + "epoch": 0.52, + "grad_norm": 0.486328125, + "learning_rate": 0.0001106576602011973, + "loss": 0.9477, + "step": 36210 + }, + { + "epoch": 0.52, + "grad_norm": 0.6796875, + "learning_rate": 0.0001106327662596276, + "loss": 0.9684, + "step": 36215 + }, + { + "epoch": 0.52, + "grad_norm": 0.55859375, + "learning_rate": 0.000110607871651584, + "loss": 0.9523, + "step": 36220 + }, + { + "epoch": 0.52, + "grad_norm": 0.5078125, + "learning_rate": 0.00011058297637862691, + "loss": 0.8194, + "step": 36225 + }, + { + "epoch": 0.52, + "grad_norm": 0.6015625, + "learning_rate": 0.00011055808044231686, + "loss": 1.1832, + "step": 36230 + }, + { + "epoch": 0.52, + "grad_norm": 0.51171875, + "learning_rate": 0.00011053318384421423, + "loss": 1.05, + "step": 36235 + }, + { + "epoch": 0.52, + "grad_norm": 0.57421875, + "learning_rate": 0.00011050828658587968, + "loss": 0.959, + "step": 36240 + }, + { + "epoch": 0.52, + "grad_norm": 0.6328125, + "learning_rate": 0.00011048338866887376, + "loss": 0.8375, + "step": 36245 + }, + { + "epoch": 0.52, + "grad_norm": 0.6953125, + "learning_rate": 0.00011045849009475709, + "loss": 1.179, + "step": 36250 + }, + { + "epoch": 0.52, + "grad_norm": 0.5625, + "learning_rate": 0.00011043359086509038, + "loss": 0.9546, + "step": 36255 + }, + { + "epoch": 0.52, + "grad_norm": 0.55859375, + "learning_rate": 0.00011040869098143425, + "loss": 0.9842, + "step": 36260 + }, + { + "epoch": 0.52, + "grad_norm": 0.6015625, + "learning_rate": 0.00011038379044534957, + "loss": 0.865, + "step": 36265 + }, + { + "epoch": 0.52, + "grad_norm": 0.5390625, + "learning_rate": 0.00011035888925839705, + "loss": 0.958, + "step": 36270 + }, + { + "epoch": 0.52, + "grad_norm": 0.5703125, + "learning_rate": 0.00011033398742213755, + "loss": 0.9291, + "step": 36275 + }, + { + "epoch": 0.52, + "grad_norm": 0.53515625, + "learning_rate": 0.00011030908493813197, + "loss": 0.9853, + "step": 36280 + }, + { + "epoch": 0.52, + "grad_norm": 0.6015625, + "learning_rate": 0.00011028418180794122, + "loss": 0.982, + "step": 36285 + }, + { + "epoch": 0.52, + "grad_norm": 0.5546875, + "learning_rate": 0.00011025927803312619, + "loss": 0.9879, + "step": 36290 + }, + { + "epoch": 0.52, + "grad_norm": 0.6796875, + "learning_rate": 0.00011023437361524795, + "loss": 0.9501, + "step": 36295 + }, + { + "epoch": 0.52, + "grad_norm": 0.53515625, + "learning_rate": 0.00011020946855586753, + "loss": 1.0892, + "step": 36300 + }, + { + "epoch": 0.52, + "grad_norm": 0.5078125, + "learning_rate": 0.00011018456285654597, + "loss": 0.9984, + "step": 36305 + }, + { + "epoch": 0.52, + "grad_norm": 0.61328125, + "learning_rate": 0.00011015965651884446, + "loss": 1.0251, + "step": 36310 + }, + { + "epoch": 0.52, + "grad_norm": 0.58984375, + "learning_rate": 0.00011013474954432405, + "loss": 0.9086, + "step": 36315 + }, + { + "epoch": 0.52, + "grad_norm": 0.546875, + "learning_rate": 0.00011010984193454603, + "loss": 0.9331, + "step": 36320 + }, + { + "epoch": 0.52, + "grad_norm": 0.515625, + "learning_rate": 0.0001100849336910716, + "loss": 0.7747, + "step": 36325 + }, + { + "epoch": 0.52, + "grad_norm": 0.6328125, + "learning_rate": 0.00011006002481546201, + "loss": 1.2171, + "step": 36330 + }, + { + "epoch": 0.52, + "grad_norm": 0.48046875, + "learning_rate": 0.00011003511530927866, + "loss": 1.0336, + "step": 36335 + }, + { + "epoch": 0.52, + "grad_norm": 0.515625, + "learning_rate": 0.00011001020517408283, + "loss": 0.9015, + "step": 36340 + }, + { + "epoch": 0.52, + "grad_norm": 0.5625, + "learning_rate": 0.00010998529441143593, + "loss": 0.9482, + "step": 36345 + }, + { + "epoch": 0.52, + "grad_norm": 0.56640625, + "learning_rate": 0.00010996038302289939, + "loss": 0.9363, + "step": 36350 + }, + { + "epoch": 0.52, + "grad_norm": 0.64453125, + "learning_rate": 0.00010993547101003474, + "loss": 1.0001, + "step": 36355 + }, + { + "epoch": 0.52, + "grad_norm": 0.53515625, + "learning_rate": 0.00010991055837440343, + "loss": 1.002, + "step": 36360 + }, + { + "epoch": 0.52, + "grad_norm": 0.515625, + "learning_rate": 0.00010988564511756703, + "loss": 0.8675, + "step": 36365 + }, + { + "epoch": 0.52, + "grad_norm": 0.57421875, + "learning_rate": 0.00010986073124108711, + "loss": 1.0402, + "step": 36370 + }, + { + "epoch": 0.52, + "grad_norm": 0.5234375, + "learning_rate": 0.00010983581674652536, + "loss": 0.952, + "step": 36375 + }, + { + "epoch": 0.52, + "grad_norm": 0.58203125, + "learning_rate": 0.00010981090163544341, + "loss": 0.9668, + "step": 36380 + }, + { + "epoch": 0.52, + "grad_norm": 0.53515625, + "learning_rate": 0.00010978598590940294, + "loss": 0.9341, + "step": 36385 + }, + { + "epoch": 0.52, + "grad_norm": 0.640625, + "learning_rate": 0.00010976106956996579, + "loss": 0.7929, + "step": 36390 + }, + { + "epoch": 0.52, + "grad_norm": 0.578125, + "learning_rate": 0.00010973615261869365, + "loss": 0.9433, + "step": 36395 + }, + { + "epoch": 0.52, + "grad_norm": 0.578125, + "learning_rate": 0.00010971123505714835, + "loss": 0.843, + "step": 36400 + }, + { + "epoch": 0.52, + "grad_norm": 0.6484375, + "learning_rate": 0.00010968631688689181, + "loss": 1.108, + "step": 36405 + }, + { + "epoch": 0.52, + "grad_norm": 0.52734375, + "learning_rate": 0.00010966139810948591, + "loss": 0.8542, + "step": 36410 + }, + { + "epoch": 0.52, + "grad_norm": 0.54296875, + "learning_rate": 0.00010963647872649255, + "loss": 1.1927, + "step": 36415 + }, + { + "epoch": 0.52, + "grad_norm": 0.56640625, + "learning_rate": 0.00010961155873947373, + "loss": 0.9521, + "step": 36420 + }, + { + "epoch": 0.52, + "grad_norm": 0.5078125, + "learning_rate": 0.00010958663814999145, + "loss": 0.8526, + "step": 36425 + }, + { + "epoch": 0.52, + "grad_norm": 0.60546875, + "learning_rate": 0.00010956171695960778, + "loss": 0.9811, + "step": 36430 + }, + { + "epoch": 0.52, + "grad_norm": 0.50390625, + "learning_rate": 0.0001095367951698848, + "loss": 0.8424, + "step": 36435 + }, + { + "epoch": 0.52, + "grad_norm": 0.53515625, + "learning_rate": 0.00010951187278238464, + "loss": 0.8957, + "step": 36440 + }, + { + "epoch": 0.52, + "grad_norm": 0.62109375, + "learning_rate": 0.00010948694979866947, + "loss": 0.9119, + "step": 36445 + }, + { + "epoch": 0.52, + "grad_norm": 0.5625, + "learning_rate": 0.00010946202622030149, + "loss": 0.9794, + "step": 36450 + }, + { + "epoch": 0.52, + "grad_norm": 0.52734375, + "learning_rate": 0.00010943710204884288, + "loss": 0.8748, + "step": 36455 + }, + { + "epoch": 0.52, + "grad_norm": 0.61328125, + "learning_rate": 0.000109412177285856, + "loss": 0.9861, + "step": 36460 + }, + { + "epoch": 0.52, + "grad_norm": 0.75390625, + "learning_rate": 0.00010938725193290313, + "loss": 1.0779, + "step": 36465 + }, + { + "epoch": 0.52, + "grad_norm": 0.462890625, + "learning_rate": 0.00010936232599154664, + "loss": 0.8525, + "step": 36470 + }, + { + "epoch": 0.52, + "grad_norm": 0.58203125, + "learning_rate": 0.00010933739946334886, + "loss": 0.953, + "step": 36475 + }, + { + "epoch": 0.52, + "grad_norm": 0.470703125, + "learning_rate": 0.00010931247234987223, + "loss": 1.0332, + "step": 36480 + }, + { + "epoch": 0.52, + "grad_norm": 0.5390625, + "learning_rate": 0.00010928754465267925, + "loss": 0.9057, + "step": 36485 + }, + { + "epoch": 0.52, + "grad_norm": 0.5390625, + "learning_rate": 0.00010926261637333242, + "loss": 1.0217, + "step": 36490 + }, + { + "epoch": 0.52, + "grad_norm": 0.5703125, + "learning_rate": 0.00010923768751339422, + "loss": 0.9859, + "step": 36495 + }, + { + "epoch": 0.52, + "grad_norm": 0.5546875, + "learning_rate": 0.00010921275807442726, + "loss": 0.9719, + "step": 36500 + }, + { + "epoch": 0.52, + "grad_norm": 0.6015625, + "learning_rate": 0.00010918782805799412, + "loss": 0.9264, + "step": 36505 + }, + { + "epoch": 0.52, + "grad_norm": 0.5703125, + "learning_rate": 0.00010916289746565742, + "loss": 0.9057, + "step": 36510 + }, + { + "epoch": 0.52, + "grad_norm": 0.49609375, + "learning_rate": 0.00010913796629897993, + "loss": 0.9249, + "step": 36515 + }, + { + "epoch": 0.52, + "grad_norm": 0.625, + "learning_rate": 0.00010911303455952427, + "loss": 0.9619, + "step": 36520 + }, + { + "epoch": 0.52, + "grad_norm": 0.484375, + "learning_rate": 0.00010908810224885325, + "loss": 0.9483, + "step": 36525 + }, + { + "epoch": 0.52, + "grad_norm": 0.5625, + "learning_rate": 0.0001090631693685296, + "loss": 1.0105, + "step": 36530 + }, + { + "epoch": 0.52, + "grad_norm": 0.5546875, + "learning_rate": 0.00010903823592011617, + "loss": 1.0082, + "step": 36535 + }, + { + "epoch": 0.52, + "grad_norm": 0.59765625, + "learning_rate": 0.00010901330190517585, + "loss": 0.9012, + "step": 36540 + }, + { + "epoch": 0.52, + "grad_norm": 0.54296875, + "learning_rate": 0.00010898836732527148, + "loss": 0.8343, + "step": 36545 + }, + { + "epoch": 0.52, + "grad_norm": 0.55859375, + "learning_rate": 0.00010896343218196603, + "loss": 0.8575, + "step": 36550 + }, + { + "epoch": 0.52, + "grad_norm": 0.52734375, + "learning_rate": 0.00010893849647682244, + "loss": 0.9455, + "step": 36555 + }, + { + "epoch": 0.52, + "grad_norm": 0.63671875, + "learning_rate": 0.0001089135602114037, + "loss": 0.9434, + "step": 36560 + }, + { + "epoch": 0.52, + "grad_norm": 0.5390625, + "learning_rate": 0.00010888862338727284, + "loss": 0.8242, + "step": 36565 + }, + { + "epoch": 0.52, + "grad_norm": 0.515625, + "learning_rate": 0.00010886368600599297, + "loss": 0.8579, + "step": 36570 + }, + { + "epoch": 0.52, + "grad_norm": 0.5546875, + "learning_rate": 0.00010883874806912715, + "loss": 0.9151, + "step": 36575 + }, + { + "epoch": 0.52, + "grad_norm": 0.5859375, + "learning_rate": 0.00010881380957823853, + "loss": 0.8738, + "step": 36580 + }, + { + "epoch": 0.52, + "grad_norm": 0.59375, + "learning_rate": 0.00010878887053489031, + "loss": 0.9275, + "step": 36585 + }, + { + "epoch": 0.52, + "grad_norm": 0.58984375, + "learning_rate": 0.00010876393094064563, + "loss": 0.957, + "step": 36590 + }, + { + "epoch": 0.52, + "grad_norm": 0.51953125, + "learning_rate": 0.00010873899079706782, + "loss": 0.9025, + "step": 36595 + }, + { + "epoch": 0.53, + "grad_norm": 0.56640625, + "learning_rate": 0.00010871405010572009, + "loss": 1.0292, + "step": 36600 + }, + { + "epoch": 0.53, + "grad_norm": 0.6015625, + "learning_rate": 0.00010868910886816579, + "loss": 1.007, + "step": 36605 + }, + { + "epoch": 0.53, + "grad_norm": 0.5234375, + "learning_rate": 0.00010866416708596827, + "loss": 0.8085, + "step": 36610 + }, + { + "epoch": 0.53, + "grad_norm": 0.5390625, + "learning_rate": 0.00010863922476069084, + "loss": 0.9712, + "step": 36615 + }, + { + "epoch": 0.53, + "grad_norm": 0.55859375, + "learning_rate": 0.00010861428189389699, + "loss": 0.8855, + "step": 36620 + }, + { + "epoch": 0.53, + "grad_norm": 0.5078125, + "learning_rate": 0.00010858933848715016, + "loss": 1.0071, + "step": 36625 + }, + { + "epoch": 0.53, + "grad_norm": 0.5703125, + "learning_rate": 0.00010856439454201376, + "loss": 1.0599, + "step": 36630 + }, + { + "epoch": 0.53, + "grad_norm": 0.61328125, + "learning_rate": 0.0001085394500600514, + "loss": 1.0069, + "step": 36635 + }, + { + "epoch": 0.53, + "grad_norm": 0.6640625, + "learning_rate": 0.00010851450504282658, + "loss": 1.013, + "step": 36640 + }, + { + "epoch": 0.53, + "grad_norm": 0.5703125, + "learning_rate": 0.00010848955949190286, + "loss": 0.9143, + "step": 36645 + }, + { + "epoch": 0.53, + "grad_norm": 0.6015625, + "learning_rate": 0.00010846461340884393, + "loss": 0.9171, + "step": 36650 + }, + { + "epoch": 0.53, + "grad_norm": 0.515625, + "learning_rate": 0.00010843966679521333, + "loss": 1.0172, + "step": 36655 + }, + { + "epoch": 0.53, + "grad_norm": 0.58984375, + "learning_rate": 0.00010841471965257483, + "loss": 0.8871, + "step": 36660 + }, + { + "epoch": 0.53, + "grad_norm": 0.5625, + "learning_rate": 0.00010838977198249214, + "loss": 0.9314, + "step": 36665 + }, + { + "epoch": 0.53, + "grad_norm": 0.578125, + "learning_rate": 0.00010836482378652895, + "loss": 0.9426, + "step": 36670 + }, + { + "epoch": 0.53, + "grad_norm": 0.59765625, + "learning_rate": 0.00010833987506624912, + "loss": 0.9492, + "step": 36675 + }, + { + "epoch": 0.53, + "grad_norm": 0.92578125, + "learning_rate": 0.00010831492582321641, + "loss": 1.0501, + "step": 36680 + }, + { + "epoch": 0.53, + "grad_norm": 0.58203125, + "learning_rate": 0.00010828997605899465, + "loss": 0.898, + "step": 36685 + }, + { + "epoch": 0.53, + "grad_norm": 0.49609375, + "learning_rate": 0.00010826502577514777, + "loss": 0.8869, + "step": 36690 + }, + { + "epoch": 0.53, + "grad_norm": 0.58203125, + "learning_rate": 0.00010824007497323967, + "loss": 1.022, + "step": 36695 + }, + { + "epoch": 0.53, + "grad_norm": 0.5625, + "learning_rate": 0.00010821512365483426, + "loss": 0.9217, + "step": 36700 + }, + { + "epoch": 0.53, + "grad_norm": 0.59765625, + "learning_rate": 0.00010819017182149558, + "loss": 0.9814, + "step": 36705 + }, + { + "epoch": 0.53, + "grad_norm": 0.56640625, + "learning_rate": 0.00010816521947478757, + "loss": 1.0245, + "step": 36710 + }, + { + "epoch": 0.53, + "grad_norm": 0.61328125, + "learning_rate": 0.00010814026661627432, + "loss": 0.909, + "step": 36715 + }, + { + "epoch": 0.53, + "grad_norm": 0.51171875, + "learning_rate": 0.00010811531324751989, + "loss": 1.0503, + "step": 36720 + }, + { + "epoch": 0.53, + "grad_norm": 0.5078125, + "learning_rate": 0.00010809035937008835, + "loss": 1.059, + "step": 36725 + }, + { + "epoch": 0.53, + "grad_norm": 0.55078125, + "learning_rate": 0.00010806540498554392, + "loss": 0.9477, + "step": 36730 + }, + { + "epoch": 0.53, + "grad_norm": 0.5703125, + "learning_rate": 0.0001080404500954507, + "loss": 0.8944, + "step": 36735 + }, + { + "epoch": 0.53, + "grad_norm": 0.60546875, + "learning_rate": 0.00010801549470137289, + "loss": 1.0347, + "step": 36740 + }, + { + "epoch": 0.53, + "grad_norm": 0.52734375, + "learning_rate": 0.00010799053880487477, + "loss": 0.997, + "step": 36745 + }, + { + "epoch": 0.53, + "grad_norm": 0.59765625, + "learning_rate": 0.00010796558240752057, + "loss": 1.064, + "step": 36750 + }, + { + "epoch": 0.53, + "grad_norm": 0.76953125, + "learning_rate": 0.00010794062551087461, + "loss": 1.0009, + "step": 36755 + }, + { + "epoch": 0.53, + "grad_norm": 0.7421875, + "learning_rate": 0.00010791566811650118, + "loss": 1.0184, + "step": 36760 + }, + { + "epoch": 0.53, + "grad_norm": 0.4921875, + "learning_rate": 0.00010789071022596461, + "loss": 0.9217, + "step": 36765 + }, + { + "epoch": 0.53, + "grad_norm": 0.56640625, + "learning_rate": 0.0001078657518408294, + "loss": 0.9953, + "step": 36770 + }, + { + "epoch": 0.53, + "grad_norm": 0.5546875, + "learning_rate": 0.0001078407929626599, + "loss": 0.9915, + "step": 36775 + }, + { + "epoch": 0.53, + "grad_norm": 0.59765625, + "learning_rate": 0.00010781583359302056, + "loss": 0.8871, + "step": 36780 + }, + { + "epoch": 0.53, + "grad_norm": 0.57421875, + "learning_rate": 0.00010779087373347587, + "loss": 1.0015, + "step": 36785 + }, + { + "epoch": 0.53, + "grad_norm": 0.70703125, + "learning_rate": 0.00010776591338559034, + "loss": 1.1607, + "step": 36790 + }, + { + "epoch": 0.53, + "grad_norm": 0.640625, + "learning_rate": 0.00010774095255092848, + "loss": 0.9473, + "step": 36795 + }, + { + "epoch": 0.53, + "grad_norm": 0.515625, + "learning_rate": 0.00010771599123105495, + "loss": 1.0485, + "step": 36800 + }, + { + "epoch": 0.53, + "grad_norm": 0.52734375, + "learning_rate": 0.00010769102942753429, + "loss": 1.1098, + "step": 36805 + }, + { + "epoch": 0.53, + "grad_norm": 0.53515625, + "learning_rate": 0.00010766606714193113, + "loss": 0.9416, + "step": 36810 + }, + { + "epoch": 0.53, + "grad_norm": 0.56640625, + "learning_rate": 0.00010764110437581017, + "loss": 0.9518, + "step": 36815 + }, + { + "epoch": 0.53, + "grad_norm": 0.51171875, + "learning_rate": 0.00010761614113073605, + "loss": 0.9433, + "step": 36820 + }, + { + "epoch": 0.53, + "grad_norm": 0.69140625, + "learning_rate": 0.00010759117740827356, + "loss": 0.9649, + "step": 36825 + }, + { + "epoch": 0.53, + "grad_norm": 0.58203125, + "learning_rate": 0.00010756621320998743, + "loss": 0.9854, + "step": 36830 + }, + { + "epoch": 0.53, + "grad_norm": 0.62109375, + "learning_rate": 0.00010754124853744242, + "loss": 1.0369, + "step": 36835 + }, + { + "epoch": 0.53, + "grad_norm": 0.6484375, + "learning_rate": 0.00010751628339220336, + "loss": 0.9787, + "step": 36840 + }, + { + "epoch": 0.53, + "grad_norm": 0.4921875, + "learning_rate": 0.00010749131777583512, + "loss": 0.8924, + "step": 36845 + }, + { + "epoch": 0.53, + "grad_norm": 0.5625, + "learning_rate": 0.0001074663516899025, + "loss": 0.8231, + "step": 36850 + }, + { + "epoch": 0.53, + "grad_norm": 0.51953125, + "learning_rate": 0.00010744138513597051, + "loss": 1.0391, + "step": 36855 + }, + { + "epoch": 0.53, + "grad_norm": 0.498046875, + "learning_rate": 0.000107416418115604, + "loss": 0.983, + "step": 36860 + }, + { + "epoch": 0.53, + "grad_norm": 0.55859375, + "learning_rate": 0.00010739145063036797, + "loss": 0.9169, + "step": 36865 + }, + { + "epoch": 0.53, + "grad_norm": 0.51953125, + "learning_rate": 0.00010736648268182738, + "loss": 0.9787, + "step": 36870 + }, + { + "epoch": 0.53, + "grad_norm": 0.51953125, + "learning_rate": 0.00010734151427154726, + "loss": 0.7626, + "step": 36875 + }, + { + "epoch": 0.53, + "grad_norm": 0.62890625, + "learning_rate": 0.00010731654540109268, + "loss": 0.9184, + "step": 36880 + }, + { + "epoch": 0.53, + "grad_norm": 0.65625, + "learning_rate": 0.00010729157607202873, + "loss": 1.0985, + "step": 36885 + }, + { + "epoch": 0.53, + "grad_norm": 0.6640625, + "learning_rate": 0.00010726660628592045, + "loss": 0.9564, + "step": 36890 + }, + { + "epoch": 0.53, + "grad_norm": 0.578125, + "learning_rate": 0.00010724163604433302, + "loss": 0.9134, + "step": 36895 + }, + { + "epoch": 0.53, + "grad_norm": 0.54296875, + "learning_rate": 0.0001072166653488316, + "loss": 0.8505, + "step": 36900 + }, + { + "epoch": 0.53, + "grad_norm": 0.60546875, + "learning_rate": 0.00010719169420098134, + "loss": 0.9296, + "step": 36905 + }, + { + "epoch": 0.53, + "grad_norm": 0.55078125, + "learning_rate": 0.00010716672260234753, + "loss": 0.9877, + "step": 36910 + }, + { + "epoch": 0.53, + "grad_norm": 0.57421875, + "learning_rate": 0.00010714175055449538, + "loss": 0.8699, + "step": 36915 + }, + { + "epoch": 0.53, + "grad_norm": 0.609375, + "learning_rate": 0.00010711677805899017, + "loss": 0.9097, + "step": 36920 + }, + { + "epoch": 0.53, + "grad_norm": 0.478515625, + "learning_rate": 0.0001070918051173972, + "loss": 0.9882, + "step": 36925 + }, + { + "epoch": 0.53, + "grad_norm": 0.55859375, + "learning_rate": 0.0001070668317312818, + "loss": 0.9172, + "step": 36930 + }, + { + "epoch": 0.53, + "grad_norm": 0.5, + "learning_rate": 0.00010704185790220938, + "loss": 0.9566, + "step": 36935 + }, + { + "epoch": 0.53, + "grad_norm": 0.57421875, + "learning_rate": 0.00010701688363174524, + "loss": 1.0493, + "step": 36940 + }, + { + "epoch": 0.53, + "grad_norm": 0.4765625, + "learning_rate": 0.00010699190892145487, + "loss": 0.897, + "step": 36945 + }, + { + "epoch": 0.53, + "grad_norm": 0.578125, + "learning_rate": 0.0001069669337729037, + "loss": 0.9825, + "step": 36950 + }, + { + "epoch": 0.53, + "grad_norm": 0.59765625, + "learning_rate": 0.00010694195818765713, + "loss": 0.893, + "step": 36955 + }, + { + "epoch": 0.53, + "grad_norm": 0.55078125, + "learning_rate": 0.00010691698216728076, + "loss": 0.9239, + "step": 36960 + }, + { + "epoch": 0.53, + "grad_norm": 0.54296875, + "learning_rate": 0.00010689200571334009, + "loss": 0.8994, + "step": 36965 + }, + { + "epoch": 0.53, + "grad_norm": 0.5234375, + "learning_rate": 0.00010686702882740061, + "loss": 0.8999, + "step": 36970 + }, + { + "epoch": 0.53, + "grad_norm": 0.51171875, + "learning_rate": 0.00010684205151102795, + "loss": 0.9204, + "step": 36975 + }, + { + "epoch": 0.53, + "grad_norm": 0.56640625, + "learning_rate": 0.00010681707376578771, + "loss": 0.9035, + "step": 36980 + }, + { + "epoch": 0.53, + "grad_norm": 0.498046875, + "learning_rate": 0.00010679209559324552, + "loss": 1.1007, + "step": 36985 + }, + { + "epoch": 0.53, + "grad_norm": 0.60546875, + "learning_rate": 0.00010676711699496706, + "loss": 1.1832, + "step": 36990 + }, + { + "epoch": 0.53, + "grad_norm": 0.5234375, + "learning_rate": 0.00010674213797251798, + "loss": 0.9332, + "step": 36995 + }, + { + "epoch": 0.53, + "grad_norm": 0.55078125, + "learning_rate": 0.00010671715852746403, + "loss": 0.9942, + "step": 37000 + }, + { + "epoch": 0.53, + "grad_norm": 0.58203125, + "learning_rate": 0.00010669217866137094, + "loss": 0.9642, + "step": 37005 + }, + { + "epoch": 0.53, + "grad_norm": 0.53515625, + "learning_rate": 0.00010666719837580443, + "loss": 0.8863, + "step": 37010 + }, + { + "epoch": 0.53, + "grad_norm": 0.52734375, + "learning_rate": 0.00010664221767233036, + "loss": 0.8984, + "step": 37015 + }, + { + "epoch": 0.53, + "grad_norm": 0.53125, + "learning_rate": 0.00010661723655251454, + "loss": 0.9529, + "step": 37020 + }, + { + "epoch": 0.53, + "grad_norm": 0.55859375, + "learning_rate": 0.00010659225501792277, + "loss": 0.8908, + "step": 37025 + }, + { + "epoch": 0.53, + "grad_norm": 0.55078125, + "learning_rate": 0.00010656727307012096, + "loss": 0.9531, + "step": 37030 + }, + { + "epoch": 0.53, + "grad_norm": 0.55078125, + "learning_rate": 0.00010654229071067499, + "loss": 0.8834, + "step": 37035 + }, + { + "epoch": 0.53, + "grad_norm": 0.546875, + "learning_rate": 0.00010651730794115075, + "loss": 0.9505, + "step": 37040 + }, + { + "epoch": 0.53, + "grad_norm": 0.484375, + "learning_rate": 0.00010649232476311428, + "loss": 0.9008, + "step": 37045 + }, + { + "epoch": 0.53, + "grad_norm": 0.5859375, + "learning_rate": 0.00010646734117813146, + "loss": 0.8971, + "step": 37050 + }, + { + "epoch": 0.53, + "grad_norm": 0.5234375, + "learning_rate": 0.00010644235718776836, + "loss": 1.0166, + "step": 37055 + }, + { + "epoch": 0.53, + "grad_norm": 0.5, + "learning_rate": 0.00010641737279359097, + "loss": 1.0054, + "step": 37060 + }, + { + "epoch": 0.53, + "grad_norm": 0.5390625, + "learning_rate": 0.00010639238799716533, + "loss": 0.9391, + "step": 37065 + }, + { + "epoch": 0.53, + "grad_norm": 0.6328125, + "learning_rate": 0.00010636740280005754, + "loss": 0.9439, + "step": 37070 + }, + { + "epoch": 0.53, + "grad_norm": 0.55859375, + "learning_rate": 0.00010634241720383372, + "loss": 1.0102, + "step": 37075 + }, + { + "epoch": 0.53, + "grad_norm": 0.6171875, + "learning_rate": 0.0001063174312100599, + "loss": 0.9331, + "step": 37080 + }, + { + "epoch": 0.53, + "grad_norm": 0.6328125, + "learning_rate": 0.00010629244482030234, + "loss": 1.0283, + "step": 37085 + }, + { + "epoch": 0.53, + "grad_norm": 0.53515625, + "learning_rate": 0.00010626745803612717, + "loss": 0.8927, + "step": 37090 + }, + { + "epoch": 0.53, + "grad_norm": 0.58203125, + "learning_rate": 0.0001062424708591006, + "loss": 0.9256, + "step": 37095 + }, + { + "epoch": 0.53, + "grad_norm": 0.5546875, + "learning_rate": 0.00010621748329078882, + "loss": 0.9316, + "step": 37100 + }, + { + "epoch": 0.53, + "grad_norm": 0.578125, + "learning_rate": 0.0001061924953327581, + "loss": 0.7805, + "step": 37105 + }, + { + "epoch": 0.53, + "grad_norm": 0.55078125, + "learning_rate": 0.00010616750698657474, + "loss": 0.9389, + "step": 37110 + }, + { + "epoch": 0.53, + "grad_norm": 0.5859375, + "learning_rate": 0.00010614251825380504, + "loss": 0.9466, + "step": 37115 + }, + { + "epoch": 0.53, + "grad_norm": 0.60546875, + "learning_rate": 0.00010611752913601525, + "loss": 1.0314, + "step": 37120 + }, + { + "epoch": 0.53, + "grad_norm": 0.50390625, + "learning_rate": 0.00010609253963477182, + "loss": 0.903, + "step": 37125 + }, + { + "epoch": 0.53, + "grad_norm": 0.5546875, + "learning_rate": 0.00010606754975164103, + "loss": 0.8865, + "step": 37130 + }, + { + "epoch": 0.53, + "grad_norm": 0.5859375, + "learning_rate": 0.00010604255948818931, + "loss": 0.8133, + "step": 37135 + }, + { + "epoch": 0.53, + "grad_norm": 0.60546875, + "learning_rate": 0.0001060175688459831, + "loss": 0.925, + "step": 37140 + }, + { + "epoch": 0.53, + "grad_norm": 0.5625, + "learning_rate": 0.00010599257782658882, + "loss": 1.0755, + "step": 37145 + }, + { + "epoch": 0.53, + "grad_norm": 0.609375, + "learning_rate": 0.00010596758643157294, + "loss": 1.0568, + "step": 37150 + }, + { + "epoch": 0.53, + "grad_norm": 0.59765625, + "learning_rate": 0.00010594259466250194, + "loss": 1.0214, + "step": 37155 + }, + { + "epoch": 0.53, + "grad_norm": 0.5390625, + "learning_rate": 0.0001059176025209423, + "loss": 0.9462, + "step": 37160 + }, + { + "epoch": 0.53, + "grad_norm": 0.65234375, + "learning_rate": 0.00010589261000846065, + "loss": 0.9693, + "step": 37165 + }, + { + "epoch": 0.53, + "grad_norm": 0.61328125, + "learning_rate": 0.00010586761712662345, + "loss": 1.0498, + "step": 37170 + }, + { + "epoch": 0.53, + "grad_norm": 0.65234375, + "learning_rate": 0.00010584262387699737, + "loss": 1.0542, + "step": 37175 + }, + { + "epoch": 0.53, + "grad_norm": 0.5, + "learning_rate": 0.00010581763026114894, + "loss": 0.9571, + "step": 37180 + }, + { + "epoch": 0.53, + "grad_norm": 0.5546875, + "learning_rate": 0.00010579263628064484, + "loss": 1.0827, + "step": 37185 + }, + { + "epoch": 0.53, + "grad_norm": 0.6171875, + "learning_rate": 0.00010576764193705167, + "loss": 0.9859, + "step": 37190 + }, + { + "epoch": 0.53, + "grad_norm": 0.60546875, + "learning_rate": 0.00010574264723193617, + "loss": 1.0388, + "step": 37195 + }, + { + "epoch": 0.53, + "grad_norm": 0.52734375, + "learning_rate": 0.00010571765216686498, + "loss": 0.9622, + "step": 37200 + }, + { + "epoch": 0.53, + "grad_norm": 0.5078125, + "learning_rate": 0.00010569265674340485, + "loss": 0.8824, + "step": 37205 + }, + { + "epoch": 0.53, + "grad_norm": 0.53515625, + "learning_rate": 0.00010566766096312251, + "loss": 0.8486, + "step": 37210 + }, + { + "epoch": 0.53, + "grad_norm": 0.640625, + "learning_rate": 0.0001056426648275847, + "loss": 1.0735, + "step": 37215 + }, + { + "epoch": 0.53, + "grad_norm": 0.5234375, + "learning_rate": 0.00010561766833835826, + "loss": 0.9757, + "step": 37220 + }, + { + "epoch": 0.53, + "grad_norm": 0.53515625, + "learning_rate": 0.00010559267149700998, + "loss": 0.9932, + "step": 37225 + }, + { + "epoch": 0.53, + "grad_norm": 0.5703125, + "learning_rate": 0.00010556767430510665, + "loss": 0.9202, + "step": 37230 + }, + { + "epoch": 0.53, + "grad_norm": 0.6015625, + "learning_rate": 0.00010554267676421518, + "loss": 0.8795, + "step": 37235 + }, + { + "epoch": 0.53, + "grad_norm": 0.5, + "learning_rate": 0.0001055176788759024, + "loss": 0.8951, + "step": 37240 + }, + { + "epoch": 0.53, + "grad_norm": 0.625, + "learning_rate": 0.00010549268064173523, + "loss": 0.9054, + "step": 37245 + }, + { + "epoch": 0.53, + "grad_norm": 0.515625, + "learning_rate": 0.0001054676820632806, + "loss": 0.8105, + "step": 37250 + }, + { + "epoch": 0.53, + "grad_norm": 0.5546875, + "learning_rate": 0.00010544268314210541, + "loss": 1.041, + "step": 37255 + }, + { + "epoch": 0.53, + "grad_norm": 0.5703125, + "learning_rate": 0.00010541768387977664, + "loss": 1.0327, + "step": 37260 + }, + { + "epoch": 0.53, + "grad_norm": 0.546875, + "learning_rate": 0.00010539268427786129, + "loss": 0.8874, + "step": 37265 + }, + { + "epoch": 0.53, + "grad_norm": 0.5859375, + "learning_rate": 0.00010536768433792632, + "loss": 0.9032, + "step": 37270 + }, + { + "epoch": 0.53, + "grad_norm": 0.5078125, + "learning_rate": 0.0001053426840615388, + "loss": 0.9239, + "step": 37275 + }, + { + "epoch": 0.53, + "grad_norm": 0.61328125, + "learning_rate": 0.00010531768345026576, + "loss": 0.9904, + "step": 37280 + }, + { + "epoch": 0.53, + "grad_norm": 0.515625, + "learning_rate": 0.00010529268250567427, + "loss": 0.8399, + "step": 37285 + }, + { + "epoch": 0.53, + "grad_norm": 0.51171875, + "learning_rate": 0.00010526768122933142, + "loss": 0.9121, + "step": 37290 + }, + { + "epoch": 0.53, + "grad_norm": 0.53515625, + "learning_rate": 0.0001052426796228043, + "loss": 0.9694, + "step": 37295 + }, + { + "epoch": 0.54, + "grad_norm": 0.69140625, + "learning_rate": 0.00010521767768766002, + "loss": 0.8676, + "step": 37300 + }, + { + "epoch": 0.54, + "grad_norm": 0.6328125, + "learning_rate": 0.0001051926754254658, + "loss": 0.9611, + "step": 37305 + }, + { + "epoch": 0.54, + "grad_norm": 0.5546875, + "learning_rate": 0.00010516767283778877, + "loss": 1.1587, + "step": 37310 + }, + { + "epoch": 0.54, + "grad_norm": 0.59375, + "learning_rate": 0.0001051426699261961, + "loss": 0.9311, + "step": 37315 + }, + { + "epoch": 0.54, + "grad_norm": 0.55078125, + "learning_rate": 0.00010511766669225505, + "loss": 0.9896, + "step": 37320 + }, + { + "epoch": 0.54, + "grad_norm": 0.53125, + "learning_rate": 0.00010509266313753279, + "loss": 0.9138, + "step": 37325 + }, + { + "epoch": 0.54, + "grad_norm": 0.52734375, + "learning_rate": 0.00010506765926359661, + "loss": 0.8587, + "step": 37330 + }, + { + "epoch": 0.54, + "grad_norm": 0.56640625, + "learning_rate": 0.00010504265507201378, + "loss": 0.999, + "step": 37335 + }, + { + "epoch": 0.54, + "grad_norm": 0.55078125, + "learning_rate": 0.0001050176505643516, + "loss": 0.8369, + "step": 37340 + }, + { + "epoch": 0.54, + "grad_norm": 0.58984375, + "learning_rate": 0.00010499264574217738, + "loss": 0.9698, + "step": 37345 + }, + { + "epoch": 0.54, + "grad_norm": 0.61328125, + "learning_rate": 0.0001049676406070584, + "loss": 1.0164, + "step": 37350 + }, + { + "epoch": 0.54, + "grad_norm": 0.5859375, + "learning_rate": 0.00010494263516056206, + "loss": 1.0847, + "step": 37355 + }, + { + "epoch": 0.54, + "grad_norm": 0.54296875, + "learning_rate": 0.00010491762940425576, + "loss": 0.9465, + "step": 37360 + }, + { + "epoch": 0.54, + "grad_norm": 0.5390625, + "learning_rate": 0.0001048926233397068, + "loss": 0.9101, + "step": 37365 + }, + { + "epoch": 0.54, + "grad_norm": 0.59375, + "learning_rate": 0.00010486761696848263, + "loss": 0.9026, + "step": 37370 + }, + { + "epoch": 0.54, + "grad_norm": 0.62890625, + "learning_rate": 0.00010484261029215073, + "loss": 1.0973, + "step": 37375 + }, + { + "epoch": 0.54, + "grad_norm": 0.54296875, + "learning_rate": 0.00010481760331227845, + "loss": 1.0309, + "step": 37380 + }, + { + "epoch": 0.54, + "grad_norm": 0.6171875, + "learning_rate": 0.00010479259603043336, + "loss": 0.9052, + "step": 37385 + }, + { + "epoch": 0.54, + "grad_norm": 0.5546875, + "learning_rate": 0.00010476758844818286, + "loss": 0.9943, + "step": 37390 + }, + { + "epoch": 0.54, + "grad_norm": 0.5390625, + "learning_rate": 0.00010474258056709449, + "loss": 0.9287, + "step": 37395 + }, + { + "epoch": 0.54, + "grad_norm": 0.466796875, + "learning_rate": 0.00010471757238873578, + "loss": 1.0231, + "step": 37400 + }, + { + "epoch": 0.54, + "grad_norm": 0.54296875, + "learning_rate": 0.00010469256391467424, + "loss": 1.0086, + "step": 37405 + }, + { + "epoch": 0.54, + "grad_norm": 0.55859375, + "learning_rate": 0.00010466755514647749, + "loss": 0.9275, + "step": 37410 + }, + { + "epoch": 0.54, + "grad_norm": 0.6171875, + "learning_rate": 0.00010464254608571304, + "loss": 1.0473, + "step": 37415 + }, + { + "epoch": 0.54, + "grad_norm": 0.54296875, + "learning_rate": 0.0001046175367339485, + "loss": 0.9569, + "step": 37420 + }, + { + "epoch": 0.54, + "grad_norm": 0.57421875, + "learning_rate": 0.00010459252709275152, + "loss": 1.0617, + "step": 37425 + }, + { + "epoch": 0.54, + "grad_norm": 0.49609375, + "learning_rate": 0.00010456751716368971, + "loss": 0.9215, + "step": 37430 + }, + { + "epoch": 0.54, + "grad_norm": 0.68359375, + "learning_rate": 0.0001045425069483307, + "loss": 0.9922, + "step": 37435 + }, + { + "epoch": 0.54, + "grad_norm": 0.59765625, + "learning_rate": 0.00010451749644824222, + "loss": 0.9558, + "step": 37440 + }, + { + "epoch": 0.54, + "grad_norm": 0.546875, + "learning_rate": 0.00010449248566499188, + "loss": 0.8627, + "step": 37445 + }, + { + "epoch": 0.54, + "grad_norm": 0.59375, + "learning_rate": 0.00010446747460014743, + "loss": 1.0908, + "step": 37450 + }, + { + "epoch": 0.54, + "grad_norm": 0.58984375, + "learning_rate": 0.0001044424632552766, + "loss": 0.9257, + "step": 37455 + }, + { + "epoch": 0.54, + "grad_norm": 0.6328125, + "learning_rate": 0.00010441745163194709, + "loss": 0.8122, + "step": 37460 + }, + { + "epoch": 0.54, + "grad_norm": 0.5625, + "learning_rate": 0.00010439243973172673, + "loss": 1.0586, + "step": 37465 + }, + { + "epoch": 0.54, + "grad_norm": 0.6015625, + "learning_rate": 0.0001043674275561832, + "loss": 0.9671, + "step": 37470 + }, + { + "epoch": 0.54, + "grad_norm": 0.65234375, + "learning_rate": 0.00010434241510688434, + "loss": 0.9491, + "step": 37475 + }, + { + "epoch": 0.54, + "grad_norm": 0.56640625, + "learning_rate": 0.00010431740238539796, + "loss": 0.9244, + "step": 37480 + }, + { + "epoch": 0.54, + "grad_norm": 0.515625, + "learning_rate": 0.00010429238939329189, + "loss": 0.8859, + "step": 37485 + }, + { + "epoch": 0.54, + "grad_norm": 0.53515625, + "learning_rate": 0.00010426737613213395, + "loss": 0.9897, + "step": 37490 + }, + { + "epoch": 0.54, + "grad_norm": 0.578125, + "learning_rate": 0.00010424236260349203, + "loss": 0.9511, + "step": 37495 + }, + { + "epoch": 0.54, + "grad_norm": 0.63671875, + "learning_rate": 0.00010421734880893396, + "loss": 0.8938, + "step": 37500 + }, + { + "epoch": 0.54, + "grad_norm": 0.5390625, + "learning_rate": 0.0001041923347500277, + "loss": 0.9602, + "step": 37505 + }, + { + "epoch": 0.54, + "grad_norm": 0.62890625, + "learning_rate": 0.00010416732042834112, + "loss": 0.9406, + "step": 37510 + }, + { + "epoch": 0.54, + "grad_norm": 0.58203125, + "learning_rate": 0.0001041423058454421, + "loss": 0.9569, + "step": 37515 + }, + { + "epoch": 0.54, + "grad_norm": 0.462890625, + "learning_rate": 0.0001041172910028987, + "loss": 0.8252, + "step": 37520 + }, + { + "epoch": 0.54, + "grad_norm": 0.56640625, + "learning_rate": 0.00010409227590227879, + "loss": 1.0166, + "step": 37525 + }, + { + "epoch": 0.54, + "grad_norm": 0.490234375, + "learning_rate": 0.00010406726054515035, + "loss": 0.9833, + "step": 37530 + }, + { + "epoch": 0.54, + "grad_norm": 0.515625, + "learning_rate": 0.00010404224493308139, + "loss": 1.1919, + "step": 37535 + }, + { + "epoch": 0.54, + "grad_norm": 0.49609375, + "learning_rate": 0.00010401722906763993, + "loss": 0.8367, + "step": 37540 + }, + { + "epoch": 0.54, + "grad_norm": 0.48828125, + "learning_rate": 0.00010399221295039396, + "loss": 1.0228, + "step": 37545 + }, + { + "epoch": 0.54, + "grad_norm": 0.8125, + "learning_rate": 0.00010396719658291155, + "loss": 1.0634, + "step": 37550 + }, + { + "epoch": 0.54, + "grad_norm": 0.515625, + "learning_rate": 0.0001039421799667607, + "loss": 0.9437, + "step": 37555 + }, + { + "epoch": 0.54, + "grad_norm": 0.59765625, + "learning_rate": 0.00010391716310350957, + "loss": 0.9024, + "step": 37560 + }, + { + "epoch": 0.54, + "grad_norm": 0.578125, + "learning_rate": 0.00010389214599472617, + "loss": 0.9552, + "step": 37565 + }, + { + "epoch": 0.54, + "grad_norm": 0.62109375, + "learning_rate": 0.00010386712864197863, + "loss": 1.1085, + "step": 37570 + }, + { + "epoch": 0.54, + "grad_norm": 0.55859375, + "learning_rate": 0.00010384211104683508, + "loss": 0.8691, + "step": 37575 + }, + { + "epoch": 0.54, + "grad_norm": 0.63671875, + "learning_rate": 0.00010381709321086361, + "loss": 1.0081, + "step": 37580 + }, + { + "epoch": 0.54, + "grad_norm": 0.60546875, + "learning_rate": 0.00010379207513563239, + "loss": 0.951, + "step": 37585 + }, + { + "epoch": 0.54, + "grad_norm": 0.63671875, + "learning_rate": 0.00010376705682270958, + "loss": 1.0732, + "step": 37590 + }, + { + "epoch": 0.54, + "grad_norm": 0.58203125, + "learning_rate": 0.00010374203827366338, + "loss": 1.0202, + "step": 37595 + }, + { + "epoch": 0.54, + "grad_norm": 0.5703125, + "learning_rate": 0.00010371701949006195, + "loss": 0.9479, + "step": 37600 + }, + { + "epoch": 0.54, + "grad_norm": 0.5078125, + "learning_rate": 0.0001036920004734735, + "loss": 0.8367, + "step": 37605 + }, + { + "epoch": 0.54, + "grad_norm": 0.58203125, + "learning_rate": 0.00010366698122546623, + "loss": 0.891, + "step": 37610 + }, + { + "epoch": 0.54, + "grad_norm": 0.54296875, + "learning_rate": 0.00010364196174760845, + "loss": 0.8879, + "step": 37615 + }, + { + "epoch": 0.54, + "grad_norm": 0.6015625, + "learning_rate": 0.00010361694204146833, + "loss": 1.0175, + "step": 37620 + }, + { + "epoch": 0.54, + "grad_norm": 0.56640625, + "learning_rate": 0.00010359192210861417, + "loss": 0.9633, + "step": 37625 + }, + { + "epoch": 0.54, + "grad_norm": 0.62890625, + "learning_rate": 0.00010356690195061424, + "loss": 1.1454, + "step": 37630 + }, + { + "epoch": 0.54, + "grad_norm": 0.63671875, + "learning_rate": 0.00010354188156903686, + "loss": 0.8291, + "step": 37635 + }, + { + "epoch": 0.54, + "grad_norm": 0.64453125, + "learning_rate": 0.00010351686096545026, + "loss": 0.9924, + "step": 37640 + }, + { + "epoch": 0.54, + "grad_norm": 0.57421875, + "learning_rate": 0.00010349184014142284, + "loss": 0.8741, + "step": 37645 + }, + { + "epoch": 0.54, + "grad_norm": 0.474609375, + "learning_rate": 0.0001034668190985229, + "loss": 0.8813, + "step": 37650 + }, + { + "epoch": 0.54, + "grad_norm": 0.486328125, + "learning_rate": 0.0001034417978383188, + "loss": 0.822, + "step": 37655 + }, + { + "epoch": 0.54, + "grad_norm": 0.5703125, + "learning_rate": 0.00010341677636237887, + "loss": 0.9509, + "step": 37660 + }, + { + "epoch": 0.54, + "grad_norm": 0.546875, + "learning_rate": 0.0001033917546722715, + "loss": 0.81, + "step": 37665 + }, + { + "epoch": 0.54, + "grad_norm": 0.53515625, + "learning_rate": 0.0001033667327695651, + "loss": 0.9395, + "step": 37670 + }, + { + "epoch": 0.54, + "grad_norm": 0.53125, + "learning_rate": 0.00010334171065582807, + "loss": 0.8975, + "step": 37675 + }, + { + "epoch": 0.54, + "grad_norm": 0.55078125, + "learning_rate": 0.0001033166883326288, + "loss": 0.9838, + "step": 37680 + }, + { + "epoch": 0.54, + "grad_norm": 0.58984375, + "learning_rate": 0.00010329166580153573, + "loss": 0.7565, + "step": 37685 + }, + { + "epoch": 0.54, + "grad_norm": 0.56640625, + "learning_rate": 0.00010326664306411727, + "loss": 0.9554, + "step": 37690 + }, + { + "epoch": 0.54, + "grad_norm": 0.5234375, + "learning_rate": 0.00010324162012194194, + "loss": 1.0028, + "step": 37695 + }, + { + "epoch": 0.54, + "grad_norm": 0.5390625, + "learning_rate": 0.00010321659697657818, + "loss": 0.9018, + "step": 37700 + }, + { + "epoch": 0.54, + "grad_norm": 0.5234375, + "learning_rate": 0.0001031915736295944, + "loss": 0.943, + "step": 37705 + }, + { + "epoch": 0.54, + "grad_norm": 0.490234375, + "learning_rate": 0.0001031665500825592, + "loss": 0.8955, + "step": 37710 + }, + { + "epoch": 0.54, + "grad_norm": 0.58203125, + "learning_rate": 0.00010314152633704102, + "loss": 1.1076, + "step": 37715 + }, + { + "epoch": 0.54, + "grad_norm": 0.53515625, + "learning_rate": 0.00010311650239460834, + "loss": 1.0408, + "step": 37720 + }, + { + "epoch": 0.54, + "grad_norm": 0.5859375, + "learning_rate": 0.00010309147825682982, + "loss": 0.8527, + "step": 37725 + }, + { + "epoch": 0.54, + "grad_norm": 0.59765625, + "learning_rate": 0.00010306645392527388, + "loss": 0.9127, + "step": 37730 + }, + { + "epoch": 0.54, + "grad_norm": 0.498046875, + "learning_rate": 0.00010304142940150913, + "loss": 0.8388, + "step": 37735 + }, + { + "epoch": 0.54, + "grad_norm": 0.498046875, + "learning_rate": 0.00010301640468710412, + "loss": 1.0603, + "step": 37740 + }, + { + "epoch": 0.54, + "grad_norm": 0.56640625, + "learning_rate": 0.0001029913797836274, + "loss": 1.1031, + "step": 37745 + }, + { + "epoch": 0.54, + "grad_norm": 0.58984375, + "learning_rate": 0.00010296635469264764, + "loss": 0.9181, + "step": 37750 + }, + { + "epoch": 0.54, + "grad_norm": 0.56640625, + "learning_rate": 0.00010294132941573338, + "loss": 0.8225, + "step": 37755 + }, + { + "epoch": 0.54, + "grad_norm": 0.5859375, + "learning_rate": 0.0001029163039544532, + "loss": 1.0889, + "step": 37760 + }, + { + "epoch": 0.54, + "grad_norm": 0.494140625, + "learning_rate": 0.00010289127831037579, + "loss": 0.8887, + "step": 37765 + }, + { + "epoch": 0.54, + "grad_norm": 0.57421875, + "learning_rate": 0.00010286625248506979, + "loss": 1.0263, + "step": 37770 + }, + { + "epoch": 0.54, + "grad_norm": 0.53125, + "learning_rate": 0.00010284122648010377, + "loss": 0.9872, + "step": 37775 + }, + { + "epoch": 0.54, + "grad_norm": 0.5546875, + "learning_rate": 0.00010281620029704649, + "loss": 0.977, + "step": 37780 + }, + { + "epoch": 0.54, + "grad_norm": 0.5546875, + "learning_rate": 0.0001027911739374665, + "loss": 1.1302, + "step": 37785 + }, + { + "epoch": 0.54, + "grad_norm": 0.56640625, + "learning_rate": 0.00010276614740293262, + "loss": 1.0363, + "step": 37790 + }, + { + "epoch": 0.54, + "grad_norm": 0.49609375, + "learning_rate": 0.00010274112069501344, + "loss": 0.9864, + "step": 37795 + }, + { + "epoch": 0.54, + "grad_norm": 0.51953125, + "learning_rate": 0.00010271609381527767, + "loss": 0.8938, + "step": 37800 + }, + { + "epoch": 0.54, + "grad_norm": 0.50390625, + "learning_rate": 0.0001026910667652941, + "loss": 0.9837, + "step": 37805 + }, + { + "epoch": 0.54, + "grad_norm": 0.66796875, + "learning_rate": 0.00010266603954663136, + "loss": 1.0605, + "step": 37810 + }, + { + "epoch": 0.54, + "grad_norm": 0.6015625, + "learning_rate": 0.00010264101216085821, + "loss": 0.9841, + "step": 37815 + }, + { + "epoch": 0.54, + "grad_norm": 0.59375, + "learning_rate": 0.00010261598460954345, + "loss": 0.8508, + "step": 37820 + }, + { + "epoch": 0.54, + "grad_norm": 0.7578125, + "learning_rate": 0.0001025909568942558, + "loss": 0.9525, + "step": 37825 + }, + { + "epoch": 0.54, + "grad_norm": 0.490234375, + "learning_rate": 0.00010256592901656397, + "loss": 0.9262, + "step": 37830 + }, + { + "epoch": 0.54, + "grad_norm": 0.5625, + "learning_rate": 0.00010254090097803685, + "loss": 0.94, + "step": 37835 + }, + { + "epoch": 0.54, + "grad_norm": 0.5390625, + "learning_rate": 0.0001025158727802431, + "loss": 0.8998, + "step": 37840 + }, + { + "epoch": 0.54, + "grad_norm": 0.52734375, + "learning_rate": 0.00010249084442475163, + "loss": 0.9269, + "step": 37845 + }, + { + "epoch": 0.54, + "grad_norm": 0.5234375, + "learning_rate": 0.00010246581591313118, + "loss": 0.8688, + "step": 37850 + }, + { + "epoch": 0.54, + "grad_norm": 0.52734375, + "learning_rate": 0.00010244078724695055, + "loss": 1.0001, + "step": 37855 + }, + { + "epoch": 0.54, + "grad_norm": 0.60546875, + "learning_rate": 0.00010241575842777864, + "loss": 0.8315, + "step": 37860 + }, + { + "epoch": 0.54, + "grad_norm": 0.5703125, + "learning_rate": 0.00010239072945718422, + "loss": 0.882, + "step": 37865 + }, + { + "epoch": 0.54, + "grad_norm": 0.5, + "learning_rate": 0.00010236570033673614, + "loss": 1.1545, + "step": 37870 + }, + { + "epoch": 0.54, + "grad_norm": 0.50390625, + "learning_rate": 0.00010234067106800329, + "loss": 0.9951, + "step": 37875 + }, + { + "epoch": 0.54, + "grad_norm": 0.5546875, + "learning_rate": 0.00010231564165255452, + "loss": 0.9681, + "step": 37880 + }, + { + "epoch": 0.54, + "grad_norm": 0.56640625, + "learning_rate": 0.00010229061209195867, + "loss": 0.9191, + "step": 37885 + }, + { + "epoch": 0.54, + "grad_norm": 0.57421875, + "learning_rate": 0.00010226558238778466, + "loss": 0.9133, + "step": 37890 + }, + { + "epoch": 0.54, + "grad_norm": 0.57421875, + "learning_rate": 0.00010224055254160134, + "loss": 0.9152, + "step": 37895 + }, + { + "epoch": 0.54, + "grad_norm": 0.50390625, + "learning_rate": 0.00010221552255497767, + "loss": 1.0462, + "step": 37900 + }, + { + "epoch": 0.54, + "grad_norm": 0.5546875, + "learning_rate": 0.00010219049242948251, + "loss": 1.047, + "step": 37905 + }, + { + "epoch": 0.54, + "grad_norm": 0.6640625, + "learning_rate": 0.00010216546216668479, + "loss": 1.0363, + "step": 37910 + }, + { + "epoch": 0.54, + "grad_norm": 0.58203125, + "learning_rate": 0.00010214043176815344, + "loss": 1.0124, + "step": 37915 + }, + { + "epoch": 0.54, + "grad_norm": 0.52734375, + "learning_rate": 0.00010211540123545737, + "loss": 1.0344, + "step": 37920 + }, + { + "epoch": 0.54, + "grad_norm": 0.6171875, + "learning_rate": 0.00010209037057016556, + "loss": 0.9466, + "step": 37925 + }, + { + "epoch": 0.54, + "grad_norm": 0.53125, + "learning_rate": 0.00010206533977384694, + "loss": 1.0074, + "step": 37930 + }, + { + "epoch": 0.54, + "grad_norm": 0.50390625, + "learning_rate": 0.00010204030884807046, + "loss": 0.982, + "step": 37935 + }, + { + "epoch": 0.54, + "grad_norm": 0.625, + "learning_rate": 0.00010201527779440509, + "loss": 0.8657, + "step": 37940 + }, + { + "epoch": 0.54, + "grad_norm": 0.5, + "learning_rate": 0.00010199024661441985, + "loss": 1.0115, + "step": 37945 + }, + { + "epoch": 0.54, + "grad_norm": 0.52734375, + "learning_rate": 0.00010196521530968363, + "loss": 0.7979, + "step": 37950 + }, + { + "epoch": 0.54, + "grad_norm": 0.57421875, + "learning_rate": 0.00010194018388176552, + "loss": 0.8499, + "step": 37955 + }, + { + "epoch": 0.54, + "grad_norm": 0.515625, + "learning_rate": 0.00010191515233223448, + "loss": 1.0006, + "step": 37960 + }, + { + "epoch": 0.54, + "grad_norm": 0.51953125, + "learning_rate": 0.00010189012066265949, + "loss": 0.8995, + "step": 37965 + }, + { + "epoch": 0.54, + "grad_norm": 0.49609375, + "learning_rate": 0.00010186508887460959, + "loss": 1.0087, + "step": 37970 + }, + { + "epoch": 0.54, + "grad_norm": 0.52734375, + "learning_rate": 0.0001018400569696538, + "loss": 0.8561, + "step": 37975 + }, + { + "epoch": 0.54, + "grad_norm": 0.58203125, + "learning_rate": 0.0001018150249493611, + "loss": 1.0744, + "step": 37980 + }, + { + "epoch": 0.54, + "grad_norm": 0.55078125, + "learning_rate": 0.00010178999281530062, + "loss": 0.9463, + "step": 37985 + }, + { + "epoch": 0.54, + "grad_norm": 0.494140625, + "learning_rate": 0.00010176496056904135, + "loss": 0.8571, + "step": 37990 + }, + { + "epoch": 0.55, + "grad_norm": 0.5703125, + "learning_rate": 0.00010173992821215232, + "loss": 0.7871, + "step": 37995 + }, + { + "epoch": 0.55, + "grad_norm": 0.64453125, + "learning_rate": 0.00010171489574620263, + "loss": 1.1391, + "step": 38000 + }, + { + "epoch": 0.55, + "grad_norm": 0.5546875, + "learning_rate": 0.00010168986317276128, + "loss": 1.0766, + "step": 38005 + }, + { + "epoch": 0.55, + "grad_norm": 0.5078125, + "learning_rate": 0.00010166483049339741, + "loss": 0.9119, + "step": 38010 + }, + { + "epoch": 0.55, + "grad_norm": 0.546875, + "learning_rate": 0.00010163979770968008, + "loss": 0.8999, + "step": 38015 + }, + { + "epoch": 0.55, + "grad_norm": 0.5546875, + "learning_rate": 0.00010161476482317835, + "loss": 1.039, + "step": 38020 + }, + { + "epoch": 0.55, + "grad_norm": 0.578125, + "learning_rate": 0.00010158973183546132, + "loss": 1.0054, + "step": 38025 + }, + { + "epoch": 0.55, + "grad_norm": 0.52734375, + "learning_rate": 0.00010156469874809808, + "loss": 0.891, + "step": 38030 + }, + { + "epoch": 0.55, + "grad_norm": 0.55859375, + "learning_rate": 0.00010153966556265775, + "loss": 1.0695, + "step": 38035 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 0.00010151463228070943, + "loss": 0.8359, + "step": 38040 + }, + { + "epoch": 0.55, + "grad_norm": 0.578125, + "learning_rate": 0.00010148959890382224, + "loss": 0.9787, + "step": 38045 + }, + { + "epoch": 0.55, + "grad_norm": 0.515625, + "learning_rate": 0.0001014645654335653, + "loss": 0.9187, + "step": 38050 + }, + { + "epoch": 0.55, + "grad_norm": 0.5078125, + "learning_rate": 0.00010143953187150772, + "loss": 0.9577, + "step": 38055 + }, + { + "epoch": 0.55, + "grad_norm": 0.58203125, + "learning_rate": 0.00010141449821921862, + "loss": 1.0231, + "step": 38060 + }, + { + "epoch": 0.55, + "grad_norm": 0.5, + "learning_rate": 0.00010138946447826718, + "loss": 0.936, + "step": 38065 + }, + { + "epoch": 0.55, + "grad_norm": 0.51953125, + "learning_rate": 0.00010136443065022254, + "loss": 0.8882, + "step": 38070 + }, + { + "epoch": 0.55, + "grad_norm": 0.6640625, + "learning_rate": 0.00010133939673665382, + "loss": 0.8248, + "step": 38075 + }, + { + "epoch": 0.55, + "grad_norm": 0.56640625, + "learning_rate": 0.0001013143627391302, + "loss": 0.9949, + "step": 38080 + }, + { + "epoch": 0.55, + "grad_norm": 0.5546875, + "learning_rate": 0.00010128932865922078, + "loss": 1.0271, + "step": 38085 + }, + { + "epoch": 0.55, + "grad_norm": 0.66796875, + "learning_rate": 0.00010126429449849482, + "loss": 0.9705, + "step": 38090 + }, + { + "epoch": 0.55, + "grad_norm": 0.53515625, + "learning_rate": 0.00010123926025852144, + "loss": 0.8858, + "step": 38095 + }, + { + "epoch": 0.55, + "grad_norm": 0.6015625, + "learning_rate": 0.00010121422594086978, + "loss": 0.9708, + "step": 38100 + }, + { + "epoch": 0.55, + "grad_norm": 0.6171875, + "learning_rate": 0.00010118919154710909, + "loss": 0.9252, + "step": 38105 + }, + { + "epoch": 0.55, + "grad_norm": 0.578125, + "learning_rate": 0.00010116415707880848, + "loss": 0.9082, + "step": 38110 + }, + { + "epoch": 0.55, + "grad_norm": 0.60546875, + "learning_rate": 0.00010113912253753719, + "loss": 0.9346, + "step": 38115 + }, + { + "epoch": 0.55, + "grad_norm": 0.63671875, + "learning_rate": 0.00010111408792486446, + "loss": 0.8487, + "step": 38120 + }, + { + "epoch": 0.55, + "grad_norm": 0.58203125, + "learning_rate": 0.00010108905324235935, + "loss": 0.8382, + "step": 38125 + }, + { + "epoch": 0.55, + "grad_norm": 0.546875, + "learning_rate": 0.0001010640184915912, + "loss": 0.9948, + "step": 38130 + }, + { + "epoch": 0.55, + "grad_norm": 0.6953125, + "learning_rate": 0.00010103898367412913, + "loss": 1.1, + "step": 38135 + }, + { + "epoch": 0.55, + "grad_norm": 0.55859375, + "learning_rate": 0.00010101394879154238, + "loss": 1.0851, + "step": 38140 + }, + { + "epoch": 0.55, + "grad_norm": 0.5078125, + "learning_rate": 0.00010098891384540017, + "loss": 1.0185, + "step": 38145 + }, + { + "epoch": 0.55, + "grad_norm": 0.5703125, + "learning_rate": 0.00010096387883727174, + "loss": 1.0252, + "step": 38150 + }, + { + "epoch": 0.55, + "grad_norm": 0.55859375, + "learning_rate": 0.00010093884376872625, + "loss": 1.0436, + "step": 38155 + }, + { + "epoch": 0.55, + "grad_norm": 0.486328125, + "learning_rate": 0.00010091380864133297, + "loss": 0.9197, + "step": 38160 + }, + { + "epoch": 0.55, + "grad_norm": 0.53125, + "learning_rate": 0.00010088877345666112, + "loss": 0.9213, + "step": 38165 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 0.00010086373821627995, + "loss": 1.0176, + "step": 38170 + }, + { + "epoch": 0.55, + "grad_norm": 0.59375, + "learning_rate": 0.0001008387029217587, + "loss": 0.9032, + "step": 38175 + }, + { + "epoch": 0.55, + "grad_norm": 0.53515625, + "learning_rate": 0.00010081366757466655, + "loss": 0.9536, + "step": 38180 + }, + { + "epoch": 0.55, + "grad_norm": 0.59765625, + "learning_rate": 0.00010078863217657282, + "loss": 0.8605, + "step": 38185 + }, + { + "epoch": 0.55, + "grad_norm": 0.52734375, + "learning_rate": 0.00010076359672904673, + "loss": 1.0488, + "step": 38190 + }, + { + "epoch": 0.55, + "grad_norm": 0.609375, + "learning_rate": 0.0001007385612336575, + "loss": 1.0442, + "step": 38195 + }, + { + "epoch": 0.55, + "grad_norm": 0.62890625, + "learning_rate": 0.00010071352569197446, + "loss": 1.0319, + "step": 38200 + }, + { + "epoch": 0.55, + "grad_norm": 0.5859375, + "learning_rate": 0.0001006884901055668, + "loss": 0.9562, + "step": 38205 + }, + { + "epoch": 0.55, + "grad_norm": 0.55859375, + "learning_rate": 0.00010066345447600375, + "loss": 0.9968, + "step": 38210 + }, + { + "epoch": 0.55, + "grad_norm": 0.578125, + "learning_rate": 0.00010063841880485467, + "loss": 0.9077, + "step": 38215 + }, + { + "epoch": 0.55, + "grad_norm": 0.625, + "learning_rate": 0.00010061338309368875, + "loss": 1.001, + "step": 38220 + }, + { + "epoch": 0.55, + "grad_norm": 0.58203125, + "learning_rate": 0.00010058834734407529, + "loss": 0.966, + "step": 38225 + }, + { + "epoch": 0.55, + "grad_norm": 0.52734375, + "learning_rate": 0.00010056331155758356, + "loss": 0.8654, + "step": 38230 + }, + { + "epoch": 0.55, + "grad_norm": 0.54296875, + "learning_rate": 0.00010053827573578278, + "loss": 1.0531, + "step": 38235 + }, + { + "epoch": 0.55, + "grad_norm": 0.59765625, + "learning_rate": 0.00010051323988024229, + "loss": 0.9787, + "step": 38240 + }, + { + "epoch": 0.55, + "grad_norm": 0.61328125, + "learning_rate": 0.00010048820399253134, + "loss": 0.9843, + "step": 38245 + }, + { + "epoch": 0.55, + "grad_norm": 0.62109375, + "learning_rate": 0.00010046316807421918, + "loss": 0.9878, + "step": 38250 + }, + { + "epoch": 0.55, + "grad_norm": 0.55078125, + "learning_rate": 0.00010043813212687516, + "loss": 0.8344, + "step": 38255 + }, + { + "epoch": 0.55, + "grad_norm": 0.5, + "learning_rate": 0.00010041309615206851, + "loss": 0.8606, + "step": 38260 + }, + { + "epoch": 0.55, + "grad_norm": 0.625, + "learning_rate": 0.00010038806015136851, + "loss": 1.0538, + "step": 38265 + }, + { + "epoch": 0.55, + "grad_norm": 0.54296875, + "learning_rate": 0.00010036302412634446, + "loss": 0.8955, + "step": 38270 + }, + { + "epoch": 0.55, + "grad_norm": 0.50390625, + "learning_rate": 0.00010033798807856565, + "loss": 1.0416, + "step": 38275 + }, + { + "epoch": 0.55, + "grad_norm": 0.59765625, + "learning_rate": 0.00010031295200960136, + "loss": 0.88, + "step": 38280 + }, + { + "epoch": 0.55, + "grad_norm": 0.63671875, + "learning_rate": 0.00010028791592102087, + "loss": 0.9125, + "step": 38285 + }, + { + "epoch": 0.55, + "grad_norm": 0.5703125, + "learning_rate": 0.00010026287981439348, + "loss": 0.9274, + "step": 38290 + }, + { + "epoch": 0.55, + "grad_norm": 0.51953125, + "learning_rate": 0.0001002378436912885, + "loss": 0.9349, + "step": 38295 + }, + { + "epoch": 0.55, + "grad_norm": 0.5390625, + "learning_rate": 0.0001002128075532752, + "loss": 0.834, + "step": 38300 + }, + { + "epoch": 0.55, + "grad_norm": 0.58203125, + "learning_rate": 0.00010018777140192288, + "loss": 0.9952, + "step": 38305 + }, + { + "epoch": 0.55, + "grad_norm": 0.53125, + "learning_rate": 0.00010016273523880084, + "loss": 0.9502, + "step": 38310 + }, + { + "epoch": 0.55, + "grad_norm": 0.66015625, + "learning_rate": 0.00010013769906547839, + "loss": 0.9224, + "step": 38315 + }, + { + "epoch": 0.55, + "grad_norm": 0.62890625, + "learning_rate": 0.00010011266288352477, + "loss": 1.001, + "step": 38320 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 0.00010008762669450931, + "loss": 0.9386, + "step": 38325 + }, + { + "epoch": 0.55, + "grad_norm": 0.462890625, + "learning_rate": 0.00010006259050000133, + "loss": 0.8958, + "step": 38330 + }, + { + "epoch": 0.55, + "grad_norm": 0.5234375, + "learning_rate": 0.00010003755430157012, + "loss": 0.8092, + "step": 38335 + }, + { + "epoch": 0.55, + "grad_norm": 0.5, + "learning_rate": 0.00010001251810078493, + "loss": 0.9878, + "step": 38340 + }, + { + "epoch": 0.55, + "grad_norm": 0.515625, + "learning_rate": 9.998748189921509e-05, + "loss": 1.0938, + "step": 38345 + }, + { + "epoch": 0.55, + "grad_norm": 0.6015625, + "learning_rate": 9.996244569842992e-05, + "loss": 0.9138, + "step": 38350 + }, + { + "epoch": 0.55, + "grad_norm": 0.5546875, + "learning_rate": 9.993740949999869e-05, + "loss": 0.9054, + "step": 38355 + }, + { + "epoch": 0.55, + "grad_norm": 0.6640625, + "learning_rate": 9.991237330549067e-05, + "loss": 0.9356, + "step": 38360 + }, + { + "epoch": 0.55, + "grad_norm": 0.55859375, + "learning_rate": 9.988733711647524e-05, + "loss": 0.8874, + "step": 38365 + }, + { + "epoch": 0.55, + "grad_norm": 0.64453125, + "learning_rate": 9.986230093452166e-05, + "loss": 0.9673, + "step": 38370 + }, + { + "epoch": 0.55, + "grad_norm": 0.6953125, + "learning_rate": 9.983726476119918e-05, + "loss": 0.9727, + "step": 38375 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 9.981222859807715e-05, + "loss": 0.9835, + "step": 38380 + }, + { + "epoch": 0.55, + "grad_norm": 0.5234375, + "learning_rate": 9.978719244672481e-05, + "loss": 0.9609, + "step": 38385 + }, + { + "epoch": 0.55, + "grad_norm": 0.61328125, + "learning_rate": 9.976215630871152e-05, + "loss": 0.9405, + "step": 38390 + }, + { + "epoch": 0.55, + "grad_norm": 0.578125, + "learning_rate": 9.973712018560654e-05, + "loss": 0.8642, + "step": 38395 + }, + { + "epoch": 0.55, + "grad_norm": 0.6484375, + "learning_rate": 9.971208407897914e-05, + "loss": 1.0028, + "step": 38400 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 9.968704799039867e-05, + "loss": 0.9833, + "step": 38405 + }, + { + "epoch": 0.55, + "grad_norm": 0.55078125, + "learning_rate": 9.966201192143439e-05, + "loss": 0.991, + "step": 38410 + }, + { + "epoch": 0.55, + "grad_norm": 0.53125, + "learning_rate": 9.963697587365555e-05, + "loss": 0.8559, + "step": 38415 + }, + { + "epoch": 0.55, + "grad_norm": 0.56640625, + "learning_rate": 9.96119398486315e-05, + "loss": 1.0733, + "step": 38420 + }, + { + "epoch": 0.55, + "grad_norm": 0.5859375, + "learning_rate": 9.958690384793154e-05, + "loss": 0.8843, + "step": 38425 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 9.956186787312488e-05, + "loss": 0.9691, + "step": 38430 + }, + { + "epoch": 0.55, + "grad_norm": 0.50390625, + "learning_rate": 9.953683192578083e-05, + "loss": 0.9404, + "step": 38435 + }, + { + "epoch": 0.55, + "grad_norm": 0.546875, + "learning_rate": 9.951179600746868e-05, + "loss": 1.0152, + "step": 38440 + }, + { + "epoch": 0.55, + "grad_norm": 0.515625, + "learning_rate": 9.948676011975773e-05, + "loss": 1.011, + "step": 38445 + }, + { + "epoch": 0.55, + "grad_norm": 0.7421875, + "learning_rate": 9.946172426421725e-05, + "loss": 1.028, + "step": 38450 + }, + { + "epoch": 0.55, + "grad_norm": 0.49609375, + "learning_rate": 9.943668844241647e-05, + "loss": 1.0303, + "step": 38455 + }, + { + "epoch": 0.55, + "grad_norm": 0.56640625, + "learning_rate": 9.941165265592472e-05, + "loss": 0.9126, + "step": 38460 + }, + { + "epoch": 0.55, + "grad_norm": 0.51171875, + "learning_rate": 9.938661690631127e-05, + "loss": 0.991, + "step": 38465 + }, + { + "epoch": 0.55, + "grad_norm": 0.5546875, + "learning_rate": 9.936158119514533e-05, + "loss": 0.9697, + "step": 38470 + }, + { + "epoch": 0.55, + "grad_norm": 0.5, + "learning_rate": 9.933654552399628e-05, + "loss": 0.8526, + "step": 38475 + }, + { + "epoch": 0.55, + "grad_norm": 0.53125, + "learning_rate": 9.931150989443325e-05, + "loss": 0.9171, + "step": 38480 + }, + { + "epoch": 0.55, + "grad_norm": 0.546875, + "learning_rate": 9.928647430802558e-05, + "loss": 1.0696, + "step": 38485 + }, + { + "epoch": 0.55, + "grad_norm": 0.57421875, + "learning_rate": 9.926143876634252e-05, + "loss": 0.9272, + "step": 38490 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 9.923640327095329e-05, + "loss": 0.9579, + "step": 38495 + }, + { + "epoch": 0.55, + "grad_norm": 0.51953125, + "learning_rate": 9.921136782342719e-05, + "loss": 0.9735, + "step": 38500 + }, + { + "epoch": 0.55, + "grad_norm": 0.55078125, + "learning_rate": 9.918633242533347e-05, + "loss": 0.8839, + "step": 38505 + }, + { + "epoch": 0.55, + "grad_norm": 0.61328125, + "learning_rate": 9.916129707824133e-05, + "loss": 1.0197, + "step": 38510 + }, + { + "epoch": 0.55, + "grad_norm": 0.59765625, + "learning_rate": 9.913626178372006e-05, + "loss": 1.0029, + "step": 38515 + }, + { + "epoch": 0.55, + "grad_norm": 0.63671875, + "learning_rate": 9.911122654333889e-05, + "loss": 0.9344, + "step": 38520 + }, + { + "epoch": 0.55, + "grad_norm": 0.54296875, + "learning_rate": 9.908619135866704e-05, + "loss": 0.8733, + "step": 38525 + }, + { + "epoch": 0.55, + "grad_norm": 0.6015625, + "learning_rate": 9.906115623127381e-05, + "loss": 1.0772, + "step": 38530 + }, + { + "epoch": 0.55, + "grad_norm": 0.59765625, + "learning_rate": 9.90361211627283e-05, + "loss": 1.0062, + "step": 38535 + }, + { + "epoch": 0.55, + "grad_norm": 0.51171875, + "learning_rate": 9.901108615459986e-05, + "loss": 0.8779, + "step": 38540 + }, + { + "epoch": 0.55, + "grad_norm": 0.546875, + "learning_rate": 9.898605120845766e-05, + "loss": 0.9713, + "step": 38545 + }, + { + "epoch": 0.55, + "grad_norm": 0.5859375, + "learning_rate": 9.896101632587089e-05, + "loss": 0.971, + "step": 38550 + }, + { + "epoch": 0.55, + "grad_norm": 0.578125, + "learning_rate": 9.893598150840884e-05, + "loss": 1.0255, + "step": 38555 + }, + { + "epoch": 0.55, + "grad_norm": 0.54296875, + "learning_rate": 9.891094675764067e-05, + "loss": 0.9138, + "step": 38560 + }, + { + "epoch": 0.55, + "grad_norm": 0.640625, + "learning_rate": 9.888591207513556e-05, + "loss": 0.9943, + "step": 38565 + }, + { + "epoch": 0.55, + "grad_norm": 0.515625, + "learning_rate": 9.88608774624628e-05, + "loss": 0.8789, + "step": 38570 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 9.883584292119153e-05, + "loss": 1.0116, + "step": 38575 + }, + { + "epoch": 0.55, + "grad_norm": 0.60546875, + "learning_rate": 9.881080845289097e-05, + "loss": 0.8831, + "step": 38580 + }, + { + "epoch": 0.55, + "grad_norm": 0.6171875, + "learning_rate": 9.878577405913027e-05, + "loss": 1.0177, + "step": 38585 + }, + { + "epoch": 0.55, + "grad_norm": 0.5546875, + "learning_rate": 9.87607397414786e-05, + "loss": 1.1244, + "step": 38590 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 9.873570550150522e-05, + "loss": 0.9846, + "step": 38595 + }, + { + "epoch": 0.55, + "grad_norm": 0.59375, + "learning_rate": 9.871067134077924e-05, + "loss": 0.9154, + "step": 38600 + }, + { + "epoch": 0.55, + "grad_norm": 0.6484375, + "learning_rate": 9.868563726086983e-05, + "loss": 1.0878, + "step": 38605 + }, + { + "epoch": 0.55, + "grad_norm": 0.55078125, + "learning_rate": 9.866060326334621e-05, + "loss": 0.8965, + "step": 38610 + }, + { + "epoch": 0.55, + "grad_norm": 0.53515625, + "learning_rate": 9.86355693497775e-05, + "loss": 0.8671, + "step": 38615 + }, + { + "epoch": 0.55, + "grad_norm": 0.55078125, + "learning_rate": 9.861053552173281e-05, + "loss": 0.8291, + "step": 38620 + }, + { + "epoch": 0.55, + "grad_norm": 0.65625, + "learning_rate": 9.858550178078137e-05, + "loss": 0.9788, + "step": 38625 + }, + { + "epoch": 0.55, + "grad_norm": 0.57421875, + "learning_rate": 9.85604681284923e-05, + "loss": 1.0503, + "step": 38630 + }, + { + "epoch": 0.55, + "grad_norm": 0.58984375, + "learning_rate": 9.853543456643475e-05, + "loss": 1.1138, + "step": 38635 + }, + { + "epoch": 0.55, + "grad_norm": 0.5625, + "learning_rate": 9.851040109617777e-05, + "loss": 1.1198, + "step": 38640 + }, + { + "epoch": 0.55, + "grad_norm": 0.546875, + "learning_rate": 9.84853677192906e-05, + "loss": 0.9442, + "step": 38645 + }, + { + "epoch": 0.55, + "grad_norm": 0.578125, + "learning_rate": 9.846033443734227e-05, + "loss": 0.863, + "step": 38650 + }, + { + "epoch": 0.55, + "grad_norm": 0.546875, + "learning_rate": 9.843530125190194e-05, + "loss": 1.0133, + "step": 38655 + }, + { + "epoch": 0.55, + "grad_norm": 0.6015625, + "learning_rate": 9.841026816453869e-05, + "loss": 1.0073, + "step": 38660 + }, + { + "epoch": 0.55, + "grad_norm": 0.54296875, + "learning_rate": 9.838523517682166e-05, + "loss": 1.006, + "step": 38665 + }, + { + "epoch": 0.55, + "grad_norm": 0.5703125, + "learning_rate": 9.836020229031995e-05, + "loss": 0.9199, + "step": 38670 + }, + { + "epoch": 0.55, + "grad_norm": 0.5859375, + "learning_rate": 9.833516950660259e-05, + "loss": 1.0166, + "step": 38675 + }, + { + "epoch": 0.55, + "grad_norm": 0.54296875, + "learning_rate": 9.831013682723872e-05, + "loss": 0.9378, + "step": 38680 + }, + { + "epoch": 0.55, + "grad_norm": 0.60546875, + "learning_rate": 9.828510425379742e-05, + "loss": 1.1848, + "step": 38685 + }, + { + "epoch": 0.55, + "grad_norm": 0.58203125, + "learning_rate": 9.826007178784772e-05, + "loss": 0.9421, + "step": 38690 + }, + { + "epoch": 0.56, + "grad_norm": 0.59375, + "learning_rate": 9.823503943095869e-05, + "loss": 0.9063, + "step": 38695 + }, + { + "epoch": 0.56, + "grad_norm": 0.51953125, + "learning_rate": 9.82100071846994e-05, + "loss": 0.9038, + "step": 38700 + }, + { + "epoch": 0.56, + "grad_norm": 0.5703125, + "learning_rate": 9.818497505063891e-05, + "loss": 1.0482, + "step": 38705 + }, + { + "epoch": 0.56, + "grad_norm": 0.5390625, + "learning_rate": 9.815994303034623e-05, + "loss": 1.0177, + "step": 38710 + }, + { + "epoch": 0.56, + "grad_norm": 0.53125, + "learning_rate": 9.813491112539043e-05, + "loss": 0.9322, + "step": 38715 + }, + { + "epoch": 0.56, + "grad_norm": 0.546875, + "learning_rate": 9.810987933734054e-05, + "loss": 0.85, + "step": 38720 + }, + { + "epoch": 0.56, + "grad_norm": 0.60546875, + "learning_rate": 9.808484766776556e-05, + "loss": 1.0295, + "step": 38725 + }, + { + "epoch": 0.56, + "grad_norm": 0.578125, + "learning_rate": 9.805981611823448e-05, + "loss": 1.066, + "step": 38730 + }, + { + "epoch": 0.56, + "grad_norm": 0.59765625, + "learning_rate": 9.803478469031636e-05, + "loss": 1.0679, + "step": 38735 + }, + { + "epoch": 0.56, + "grad_norm": 0.50390625, + "learning_rate": 9.80097533855802e-05, + "loss": 0.9112, + "step": 38740 + }, + { + "epoch": 0.56, + "grad_norm": 0.51953125, + "learning_rate": 9.798472220559493e-05, + "loss": 1.1377, + "step": 38745 + }, + { + "epoch": 0.56, + "grad_norm": 0.52734375, + "learning_rate": 9.795969115192957e-05, + "loss": 0.955, + "step": 38750 + }, + { + "epoch": 0.56, + "grad_norm": 0.48828125, + "learning_rate": 9.79346602261531e-05, + "loss": 0.9294, + "step": 38755 + }, + { + "epoch": 0.56, + "grad_norm": 0.49609375, + "learning_rate": 9.790962942983447e-05, + "loss": 0.827, + "step": 38760 + }, + { + "epoch": 0.56, + "grad_norm": 0.61328125, + "learning_rate": 9.788459876454264e-05, + "loss": 0.9328, + "step": 38765 + }, + { + "epoch": 0.56, + "grad_norm": 0.58203125, + "learning_rate": 9.785956823184659e-05, + "loss": 0.9699, + "step": 38770 + }, + { + "epoch": 0.56, + "grad_norm": 0.498046875, + "learning_rate": 9.783453783331524e-05, + "loss": 0.8915, + "step": 38775 + }, + { + "epoch": 0.56, + "grad_norm": 0.66015625, + "learning_rate": 9.780950757051749e-05, + "loss": 0.9012, + "step": 38780 + }, + { + "epoch": 0.56, + "grad_norm": 0.5390625, + "learning_rate": 9.778447744502234e-05, + "loss": 0.8871, + "step": 38785 + }, + { + "epoch": 0.56, + "grad_norm": 0.51953125, + "learning_rate": 9.775944745839867e-05, + "loss": 1.0527, + "step": 38790 + }, + { + "epoch": 0.56, + "grad_norm": 0.50390625, + "learning_rate": 9.773441761221538e-05, + "loss": 0.9303, + "step": 38795 + }, + { + "epoch": 0.56, + "grad_norm": 0.51171875, + "learning_rate": 9.770938790804138e-05, + "loss": 0.8257, + "step": 38800 + }, + { + "epoch": 0.56, + "grad_norm": 0.52734375, + "learning_rate": 9.768435834744552e-05, + "loss": 0.9429, + "step": 38805 + }, + { + "epoch": 0.56, + "grad_norm": 0.53125, + "learning_rate": 9.765932893199673e-05, + "loss": 0.9351, + "step": 38810 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.763429966326387e-05, + "loss": 0.9168, + "step": 38815 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.760927054281579e-05, + "loss": 0.9941, + "step": 38820 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.758424157222138e-05, + "loss": 0.9234, + "step": 38825 + }, + { + "epoch": 0.56, + "grad_norm": 0.59765625, + "learning_rate": 9.755921275304945e-05, + "loss": 0.8665, + "step": 38830 + }, + { + "epoch": 0.56, + "grad_norm": 0.5859375, + "learning_rate": 9.753418408686883e-05, + "loss": 1.0219, + "step": 38835 + }, + { + "epoch": 0.56, + "grad_norm": 0.6171875, + "learning_rate": 9.750915557524838e-05, + "loss": 0.9246, + "step": 38840 + }, + { + "epoch": 0.56, + "grad_norm": 0.46484375, + "learning_rate": 9.748412721975691e-05, + "loss": 1.0578, + "step": 38845 + }, + { + "epoch": 0.56, + "grad_norm": 0.52734375, + "learning_rate": 9.74590990219632e-05, + "loss": 0.8982, + "step": 38850 + }, + { + "epoch": 0.56, + "grad_norm": 0.58984375, + "learning_rate": 9.743407098343604e-05, + "loss": 0.8119, + "step": 38855 + }, + { + "epoch": 0.56, + "grad_norm": 0.5703125, + "learning_rate": 9.740904310574424e-05, + "loss": 1.0587, + "step": 38860 + }, + { + "epoch": 0.56, + "grad_norm": 0.5859375, + "learning_rate": 9.738401539045656e-05, + "loss": 0.9207, + "step": 38865 + }, + { + "epoch": 0.56, + "grad_norm": 0.6171875, + "learning_rate": 9.73589878391418e-05, + "loss": 0.9301, + "step": 38870 + }, + { + "epoch": 0.56, + "grad_norm": 0.5078125, + "learning_rate": 9.733396045336865e-05, + "loss": 0.8845, + "step": 38875 + }, + { + "epoch": 0.56, + "grad_norm": 0.59765625, + "learning_rate": 9.730893323470593e-05, + "loss": 1.0785, + "step": 38880 + }, + { + "epoch": 0.56, + "grad_norm": 0.53515625, + "learning_rate": 9.728390618472232e-05, + "loss": 0.9223, + "step": 38885 + }, + { + "epoch": 0.56, + "grad_norm": 0.54296875, + "learning_rate": 9.725887930498657e-05, + "loss": 0.9278, + "step": 38890 + }, + { + "epoch": 0.56, + "grad_norm": 0.56640625, + "learning_rate": 9.723385259706743e-05, + "loss": 1.0404, + "step": 38895 + }, + { + "epoch": 0.56, + "grad_norm": 0.65625, + "learning_rate": 9.72088260625335e-05, + "loss": 1.114, + "step": 38900 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.718379970295356e-05, + "loss": 0.9147, + "step": 38905 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.715877351989625e-05, + "loss": 0.9504, + "step": 38910 + }, + { + "epoch": 0.56, + "grad_norm": 0.6171875, + "learning_rate": 9.713374751493024e-05, + "loss": 0.9138, + "step": 38915 + }, + { + "epoch": 0.56, + "grad_norm": 0.58203125, + "learning_rate": 9.710872168962422e-05, + "loss": 1.0339, + "step": 38920 + }, + { + "epoch": 0.56, + "grad_norm": 0.57421875, + "learning_rate": 9.708369604554681e-05, + "loss": 0.977, + "step": 38925 + }, + { + "epoch": 0.56, + "grad_norm": 0.5625, + "learning_rate": 9.705867058426664e-05, + "loss": 0.9473, + "step": 38930 + }, + { + "epoch": 0.56, + "grad_norm": 0.62109375, + "learning_rate": 9.703364530735237e-05, + "loss": 0.8984, + "step": 38935 + }, + { + "epoch": 0.56, + "grad_norm": 0.578125, + "learning_rate": 9.70086202163726e-05, + "loss": 0.961, + "step": 38940 + }, + { + "epoch": 0.56, + "grad_norm": 0.5625, + "learning_rate": 9.698359531289588e-05, + "loss": 0.9424, + "step": 38945 + }, + { + "epoch": 0.56, + "grad_norm": 0.50390625, + "learning_rate": 9.695857059849092e-05, + "loss": 0.9191, + "step": 38950 + }, + { + "epoch": 0.56, + "grad_norm": 0.578125, + "learning_rate": 9.693354607472613e-05, + "loss": 0.8682, + "step": 38955 + }, + { + "epoch": 0.56, + "grad_norm": 0.671875, + "learning_rate": 9.690852174317021e-05, + "loss": 1.0405, + "step": 38960 + }, + { + "epoch": 0.56, + "grad_norm": 0.486328125, + "learning_rate": 9.688349760539167e-05, + "loss": 1.0359, + "step": 38965 + }, + { + "epoch": 0.56, + "grad_norm": 0.51171875, + "learning_rate": 9.685847366295902e-05, + "loss": 0.9881, + "step": 38970 + }, + { + "epoch": 0.56, + "grad_norm": 0.5, + "learning_rate": 9.683344991744083e-05, + "loss": 0.9328, + "step": 38975 + }, + { + "epoch": 0.56, + "grad_norm": 0.6328125, + "learning_rate": 9.680842637040561e-05, + "loss": 0.8895, + "step": 38980 + }, + { + "epoch": 0.56, + "grad_norm": 0.56640625, + "learning_rate": 9.678340302342184e-05, + "loss": 0.9235, + "step": 38985 + }, + { + "epoch": 0.56, + "grad_norm": 0.54296875, + "learning_rate": 9.675837987805807e-05, + "loss": 1.0094, + "step": 38990 + }, + { + "epoch": 0.56, + "grad_norm": 0.50390625, + "learning_rate": 9.673335693588273e-05, + "loss": 0.9143, + "step": 38995 + }, + { + "epoch": 0.56, + "grad_norm": 0.5859375, + "learning_rate": 9.670833419846432e-05, + "loss": 0.9175, + "step": 39000 + }, + { + "epoch": 0.56, + "grad_norm": 0.5390625, + "learning_rate": 9.668331166737124e-05, + "loss": 0.8666, + "step": 39005 + }, + { + "epoch": 0.56, + "grad_norm": 0.63671875, + "learning_rate": 9.665828934417196e-05, + "loss": 0.9682, + "step": 39010 + }, + { + "epoch": 0.56, + "grad_norm": 0.5234375, + "learning_rate": 9.66332672304349e-05, + "loss": 0.9945, + "step": 39015 + }, + { + "epoch": 0.56, + "grad_norm": 0.60546875, + "learning_rate": 9.660824532772852e-05, + "loss": 0.9521, + "step": 39020 + }, + { + "epoch": 0.56, + "grad_norm": 0.5625, + "learning_rate": 9.658322363762115e-05, + "loss": 0.969, + "step": 39025 + }, + { + "epoch": 0.56, + "grad_norm": 0.625, + "learning_rate": 9.655820216168123e-05, + "loss": 0.9418, + "step": 39030 + }, + { + "epoch": 0.56, + "grad_norm": 0.60546875, + "learning_rate": 9.653318090147711e-05, + "loss": 0.9502, + "step": 39035 + }, + { + "epoch": 0.56, + "grad_norm": 0.54296875, + "learning_rate": 9.650815985857716e-05, + "loss": 0.9308, + "step": 39040 + }, + { + "epoch": 0.56, + "grad_norm": 0.5078125, + "learning_rate": 9.648313903454975e-05, + "loss": 1.0048, + "step": 39045 + }, + { + "epoch": 0.56, + "grad_norm": 0.546875, + "learning_rate": 9.645811843096316e-05, + "loss": 1.0241, + "step": 39050 + }, + { + "epoch": 0.56, + "grad_norm": 0.5390625, + "learning_rate": 9.643309804938578e-05, + "loss": 0.8488, + "step": 39055 + }, + { + "epoch": 0.56, + "grad_norm": 0.55859375, + "learning_rate": 9.640807789138586e-05, + "loss": 0.9414, + "step": 39060 + }, + { + "epoch": 0.56, + "grad_norm": 0.50390625, + "learning_rate": 9.638305795853168e-05, + "loss": 0.888, + "step": 39065 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.635803825239158e-05, + "loss": 1.0104, + "step": 39070 + }, + { + "epoch": 0.56, + "grad_norm": 0.60546875, + "learning_rate": 9.633301877453378e-05, + "loss": 1.0704, + "step": 39075 + }, + { + "epoch": 0.56, + "grad_norm": 0.57421875, + "learning_rate": 9.630799952652651e-05, + "loss": 0.8919, + "step": 39080 + }, + { + "epoch": 0.56, + "grad_norm": 0.58203125, + "learning_rate": 9.628298050993806e-05, + "loss": 1.0273, + "step": 39085 + }, + { + "epoch": 0.56, + "grad_norm": 0.54296875, + "learning_rate": 9.625796172633664e-05, + "loss": 1.0297, + "step": 39090 + }, + { + "epoch": 0.56, + "grad_norm": 0.5625, + "learning_rate": 9.623294317729042e-05, + "loss": 0.7903, + "step": 39095 + }, + { + "epoch": 0.56, + "grad_norm": 0.58984375, + "learning_rate": 9.620792486436762e-05, + "loss": 0.9657, + "step": 39100 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.618290678913642e-05, + "loss": 0.932, + "step": 39105 + }, + { + "epoch": 0.56, + "grad_norm": 0.5234375, + "learning_rate": 9.615788895316498e-05, + "loss": 1.0416, + "step": 39110 + }, + { + "epoch": 0.56, + "grad_norm": 0.53515625, + "learning_rate": 9.613287135802142e-05, + "loss": 0.8526, + "step": 39115 + }, + { + "epoch": 0.56, + "grad_norm": 0.69140625, + "learning_rate": 9.610785400527385e-05, + "loss": 0.9282, + "step": 39120 + }, + { + "epoch": 0.56, + "grad_norm": 0.50390625, + "learning_rate": 9.608283689649047e-05, + "loss": 0.9398, + "step": 39125 + }, + { + "epoch": 0.56, + "grad_norm": 0.5703125, + "learning_rate": 9.605782003323932e-05, + "loss": 0.8503, + "step": 39130 + }, + { + "epoch": 0.56, + "grad_norm": 0.51171875, + "learning_rate": 9.603280341708848e-05, + "loss": 0.8561, + "step": 39135 + }, + { + "epoch": 0.56, + "grad_norm": 0.51953125, + "learning_rate": 9.600778704960606e-05, + "loss": 1.0332, + "step": 39140 + }, + { + "epoch": 0.56, + "grad_norm": 0.6015625, + "learning_rate": 9.59827709323601e-05, + "loss": 0.9378, + "step": 39145 + }, + { + "epoch": 0.56, + "grad_norm": 0.5703125, + "learning_rate": 9.59577550669186e-05, + "loss": 0.9466, + "step": 39150 + }, + { + "epoch": 0.56, + "grad_norm": 0.546875, + "learning_rate": 9.593273945484966e-05, + "loss": 0.9579, + "step": 39155 + }, + { + "epoch": 0.56, + "grad_norm": 0.57421875, + "learning_rate": 9.590772409772125e-05, + "loss": 0.9372, + "step": 39160 + }, + { + "epoch": 0.56, + "grad_norm": 0.6015625, + "learning_rate": 9.588270899710133e-05, + "loss": 0.8365, + "step": 39165 + }, + { + "epoch": 0.56, + "grad_norm": 0.494140625, + "learning_rate": 9.58576941545579e-05, + "loss": 0.8629, + "step": 39170 + }, + { + "epoch": 0.56, + "grad_norm": 0.5234375, + "learning_rate": 9.583267957165891e-05, + "loss": 0.9038, + "step": 39175 + }, + { + "epoch": 0.56, + "grad_norm": 0.58984375, + "learning_rate": 9.580766524997232e-05, + "loss": 0.9896, + "step": 39180 + }, + { + "epoch": 0.56, + "grad_norm": 0.6171875, + "learning_rate": 9.578265119106605e-05, + "loss": 0.9699, + "step": 39185 + }, + { + "epoch": 0.56, + "grad_norm": 0.56640625, + "learning_rate": 9.575763739650798e-05, + "loss": 0.9685, + "step": 39190 + }, + { + "epoch": 0.56, + "grad_norm": 0.6171875, + "learning_rate": 9.573262386786607e-05, + "loss": 0.9458, + "step": 39195 + }, + { + "epoch": 0.56, + "grad_norm": 0.5703125, + "learning_rate": 9.570761060670814e-05, + "loss": 0.9869, + "step": 39200 + }, + { + "epoch": 0.56, + "grad_norm": 0.53515625, + "learning_rate": 9.568259761460205e-05, + "loss": 0.9797, + "step": 39205 + }, + { + "epoch": 0.56, + "grad_norm": 0.6171875, + "learning_rate": 9.565758489311572e-05, + "loss": 1.0223, + "step": 39210 + }, + { + "epoch": 0.56, + "grad_norm": 0.66015625, + "learning_rate": 9.563257244381683e-05, + "loss": 0.9996, + "step": 39215 + }, + { + "epoch": 0.56, + "grad_norm": 0.5078125, + "learning_rate": 9.560756026827333e-05, + "loss": 0.9554, + "step": 39220 + }, + { + "epoch": 0.56, + "grad_norm": 0.6484375, + "learning_rate": 9.558254836805293e-05, + "loss": 0.8866, + "step": 39225 + }, + { + "epoch": 0.56, + "grad_norm": 0.5703125, + "learning_rate": 9.555753674472342e-05, + "loss": 0.8414, + "step": 39230 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.553252539985258e-05, + "loss": 0.9681, + "step": 39235 + }, + { + "epoch": 0.56, + "grad_norm": 0.58203125, + "learning_rate": 9.550751433500814e-05, + "loss": 1.0373, + "step": 39240 + }, + { + "epoch": 0.56, + "grad_norm": 0.5078125, + "learning_rate": 9.54825035517578e-05, + "loss": 0.9632, + "step": 39245 + }, + { + "epoch": 0.56, + "grad_norm": 0.58984375, + "learning_rate": 9.54574930516693e-05, + "loss": 0.9035, + "step": 39250 + }, + { + "epoch": 0.56, + "grad_norm": 0.5703125, + "learning_rate": 9.543248283631031e-05, + "loss": 0.8037, + "step": 39255 + }, + { + "epoch": 0.56, + "grad_norm": 0.59375, + "learning_rate": 9.540747290724848e-05, + "loss": 1.0928, + "step": 39260 + }, + { + "epoch": 0.56, + "grad_norm": 0.63671875, + "learning_rate": 9.538246326605154e-05, + "loss": 1.0113, + "step": 39265 + }, + { + "epoch": 0.56, + "grad_norm": 0.609375, + "learning_rate": 9.535745391428699e-05, + "loss": 0.8858, + "step": 39270 + }, + { + "epoch": 0.56, + "grad_norm": 0.515625, + "learning_rate": 9.533244485352255e-05, + "loss": 0.9432, + "step": 39275 + }, + { + "epoch": 0.56, + "grad_norm": 0.51953125, + "learning_rate": 9.530743608532577e-05, + "loss": 0.859, + "step": 39280 + }, + { + "epoch": 0.56, + "grad_norm": 0.5625, + "learning_rate": 9.528242761126424e-05, + "loss": 0.9839, + "step": 39285 + }, + { + "epoch": 0.56, + "grad_norm": 0.54296875, + "learning_rate": 9.525741943290552e-05, + "loss": 1.0186, + "step": 39290 + }, + { + "epoch": 0.56, + "grad_norm": 0.61328125, + "learning_rate": 9.523241155181716e-05, + "loss": 0.8606, + "step": 39295 + }, + { + "epoch": 0.56, + "grad_norm": 0.50390625, + "learning_rate": 9.520740396956665e-05, + "loss": 0.9145, + "step": 39300 + }, + { + "epoch": 0.56, + "grad_norm": 0.51953125, + "learning_rate": 9.518239668772154e-05, + "loss": 0.9839, + "step": 39305 + }, + { + "epoch": 0.56, + "grad_norm": 0.58203125, + "learning_rate": 9.515738970784928e-05, + "loss": 0.8016, + "step": 39310 + }, + { + "epoch": 0.56, + "grad_norm": 0.54296875, + "learning_rate": 9.513238303151739e-05, + "loss": 0.9934, + "step": 39315 + }, + { + "epoch": 0.56, + "grad_norm": 0.56640625, + "learning_rate": 9.510737666029323e-05, + "loss": 0.8347, + "step": 39320 + }, + { + "epoch": 0.56, + "grad_norm": 0.59375, + "learning_rate": 9.508237059574429e-05, + "loss": 0.9688, + "step": 39325 + }, + { + "epoch": 0.56, + "grad_norm": 0.498046875, + "learning_rate": 9.505736483943795e-05, + "loss": 1.0757, + "step": 39330 + }, + { + "epoch": 0.56, + "grad_norm": 0.60546875, + "learning_rate": 9.503235939294163e-05, + "loss": 0.9708, + "step": 39335 + }, + { + "epoch": 0.56, + "grad_norm": 0.5234375, + "learning_rate": 9.500735425782266e-05, + "loss": 0.8411, + "step": 39340 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.49823494356484e-05, + "loss": 1.0257, + "step": 39345 + }, + { + "epoch": 0.56, + "grad_norm": 0.474609375, + "learning_rate": 9.495734492798623e-05, + "loss": 0.885, + "step": 39350 + }, + { + "epoch": 0.56, + "grad_norm": 0.55078125, + "learning_rate": 9.493234073640339e-05, + "loss": 1.0719, + "step": 39355 + }, + { + "epoch": 0.56, + "grad_norm": 0.52734375, + "learning_rate": 9.490733686246722e-05, + "loss": 0.9743, + "step": 39360 + }, + { + "epoch": 0.56, + "grad_norm": 0.6484375, + "learning_rate": 9.488233330774497e-05, + "loss": 0.8661, + "step": 39365 + }, + { + "epoch": 0.56, + "grad_norm": 0.54296875, + "learning_rate": 9.485733007380395e-05, + "loss": 0.955, + "step": 39370 + }, + { + "epoch": 0.56, + "grad_norm": 0.546875, + "learning_rate": 9.483232716221127e-05, + "loss": 0.8181, + "step": 39375 + }, + { + "epoch": 0.56, + "grad_norm": 0.6796875, + "learning_rate": 9.480732457453422e-05, + "loss": 0.9923, + "step": 39380 + }, + { + "epoch": 0.56, + "grad_norm": 0.57421875, + "learning_rate": 9.478232231234e-05, + "loss": 1.033, + "step": 39385 + }, + { + "epoch": 0.57, + "grad_norm": 0.515625, + "learning_rate": 9.475732037719572e-05, + "loss": 0.8905, + "step": 39390 + }, + { + "epoch": 0.57, + "grad_norm": 0.625, + "learning_rate": 9.473231877066861e-05, + "loss": 0.9827, + "step": 39395 + }, + { + "epoch": 0.57, + "grad_norm": 0.5234375, + "learning_rate": 9.470731749432574e-05, + "loss": 0.9783, + "step": 39400 + }, + { + "epoch": 0.57, + "grad_norm": 0.54296875, + "learning_rate": 9.468231654973425e-05, + "loss": 0.9423, + "step": 39405 + }, + { + "epoch": 0.57, + "grad_norm": 0.515625, + "learning_rate": 9.46573159384612e-05, + "loss": 0.9276, + "step": 39410 + }, + { + "epoch": 0.57, + "grad_norm": 0.5390625, + "learning_rate": 9.463231566207368e-05, + "loss": 0.992, + "step": 39415 + }, + { + "epoch": 0.57, + "grad_norm": 0.58203125, + "learning_rate": 9.460731572213875e-05, + "loss": 0.9216, + "step": 39420 + }, + { + "epoch": 0.57, + "grad_norm": 0.5390625, + "learning_rate": 9.45823161202234e-05, + "loss": 0.95, + "step": 39425 + }, + { + "epoch": 0.57, + "grad_norm": 0.5390625, + "learning_rate": 9.455731685789461e-05, + "loss": 0.9028, + "step": 39430 + }, + { + "epoch": 0.57, + "grad_norm": 0.57421875, + "learning_rate": 9.453231793671944e-05, + "loss": 0.9488, + "step": 39435 + }, + { + "epoch": 0.57, + "grad_norm": 0.71484375, + "learning_rate": 9.450731935826479e-05, + "loss": 1.054, + "step": 39440 + }, + { + "epoch": 0.57, + "grad_norm": 0.494140625, + "learning_rate": 9.44823211240976e-05, + "loss": 0.9122, + "step": 39445 + }, + { + "epoch": 0.57, + "grad_norm": 0.546875, + "learning_rate": 9.445732323578484e-05, + "loss": 0.877, + "step": 39450 + }, + { + "epoch": 0.57, + "grad_norm": 0.61328125, + "learning_rate": 9.443232569489337e-05, + "loss": 0.9831, + "step": 39455 + }, + { + "epoch": 0.57, + "grad_norm": 0.59765625, + "learning_rate": 9.440732850299003e-05, + "loss": 1.0377, + "step": 39460 + }, + { + "epoch": 0.57, + "grad_norm": 0.439453125, + "learning_rate": 9.438233166164175e-05, + "loss": 1.0571, + "step": 39465 + }, + { + "epoch": 0.57, + "grad_norm": 0.57421875, + "learning_rate": 9.43573351724153e-05, + "loss": 1.04, + "step": 39470 + }, + { + "epoch": 0.57, + "grad_norm": 0.5078125, + "learning_rate": 9.433233903687754e-05, + "loss": 1.0514, + "step": 39475 + }, + { + "epoch": 0.57, + "grad_norm": 0.52734375, + "learning_rate": 9.43073432565952e-05, + "loss": 0.8964, + "step": 39480 + }, + { + "epoch": 0.57, + "grad_norm": 0.54296875, + "learning_rate": 9.428234783313505e-05, + "loss": 1.0734, + "step": 39485 + }, + { + "epoch": 0.57, + "grad_norm": 0.515625, + "learning_rate": 9.425735276806387e-05, + "loss": 0.8907, + "step": 39490 + }, + { + "epoch": 0.57, + "grad_norm": 0.50390625, + "learning_rate": 9.423235806294835e-05, + "loss": 0.9568, + "step": 39495 + }, + { + "epoch": 0.57, + "grad_norm": 0.55859375, + "learning_rate": 9.420736371935519e-05, + "loss": 0.9042, + "step": 39500 + }, + { + "epoch": 0.57, + "grad_norm": 0.609375, + "learning_rate": 9.418236973885107e-05, + "loss": 0.8978, + "step": 39505 + }, + { + "epoch": 0.57, + "grad_norm": 0.5390625, + "learning_rate": 9.415737612300266e-05, + "loss": 0.9227, + "step": 39510 + }, + { + "epoch": 0.57, + "grad_norm": 0.5703125, + "learning_rate": 9.413238287337653e-05, + "loss": 1.0013, + "step": 39515 + }, + { + "epoch": 0.57, + "grad_norm": 0.52734375, + "learning_rate": 9.410738999153936e-05, + "loss": 0.8893, + "step": 39520 + }, + { + "epoch": 0.57, + "grad_norm": 0.52734375, + "learning_rate": 9.408239747905771e-05, + "loss": 0.8983, + "step": 39525 + }, + { + "epoch": 0.57, + "grad_norm": 0.498046875, + "learning_rate": 9.405740533749811e-05, + "loss": 0.9171, + "step": 39530 + }, + { + "epoch": 0.57, + "grad_norm": 0.5625, + "learning_rate": 9.403241356842711e-05, + "loss": 0.9745, + "step": 39535 + }, + { + "epoch": 0.57, + "grad_norm": 0.58203125, + "learning_rate": 9.40074221734112e-05, + "loss": 1.0574, + "step": 39540 + }, + { + "epoch": 0.57, + "grad_norm": 0.578125, + "learning_rate": 9.398243115401693e-05, + "loss": 1.023, + "step": 39545 + }, + { + "epoch": 0.57, + "grad_norm": 0.57421875, + "learning_rate": 9.39574405118107e-05, + "loss": 0.9277, + "step": 39550 + }, + { + "epoch": 0.57, + "grad_norm": 0.70703125, + "learning_rate": 9.393245024835898e-05, + "loss": 1.0421, + "step": 39555 + }, + { + "epoch": 0.57, + "grad_norm": 0.56640625, + "learning_rate": 9.39074603652282e-05, + "loss": 0.939, + "step": 39560 + }, + { + "epoch": 0.57, + "grad_norm": 0.56640625, + "learning_rate": 9.388247086398475e-05, + "loss": 0.826, + "step": 39565 + }, + { + "epoch": 0.57, + "grad_norm": 0.57421875, + "learning_rate": 9.385748174619497e-05, + "loss": 0.9839, + "step": 39570 + }, + { + "epoch": 0.57, + "grad_norm": 0.46484375, + "learning_rate": 9.383249301342524e-05, + "loss": 0.9497, + "step": 39575 + }, + { + "epoch": 0.57, + "grad_norm": 0.5625, + "learning_rate": 9.380750466724192e-05, + "loss": 0.9422, + "step": 39580 + }, + { + "epoch": 0.57, + "grad_norm": 0.546875, + "learning_rate": 9.378251670921122e-05, + "loss": 0.7498, + "step": 39585 + }, + { + "epoch": 0.57, + "grad_norm": 0.73828125, + "learning_rate": 9.375752914089946e-05, + "loss": 1.0139, + "step": 39590 + }, + { + "epoch": 0.57, + "grad_norm": 0.490234375, + "learning_rate": 9.373254196387286e-05, + "loss": 0.8377, + "step": 39595 + }, + { + "epoch": 0.57, + "grad_norm": 0.5390625, + "learning_rate": 9.370755517969768e-05, + "loss": 0.8605, + "step": 39600 + }, + { + "epoch": 0.57, + "grad_norm": 0.54296875, + "learning_rate": 9.368256878994012e-05, + "loss": 0.8669, + "step": 39605 + }, + { + "epoch": 0.57, + "grad_norm": 0.515625, + "learning_rate": 9.365758279616631e-05, + "loss": 0.9177, + "step": 39610 + }, + { + "epoch": 0.57, + "grad_norm": 0.59765625, + "learning_rate": 9.363259719994247e-05, + "loss": 0.9787, + "step": 39615 + }, + { + "epoch": 0.57, + "grad_norm": 0.671875, + "learning_rate": 9.360761200283468e-05, + "loss": 0.948, + "step": 39620 + }, + { + "epoch": 0.57, + "grad_norm": 0.58203125, + "learning_rate": 9.358262720640903e-05, + "loss": 1.0305, + "step": 39625 + }, + { + "epoch": 0.57, + "grad_norm": 0.54296875, + "learning_rate": 9.355764281223168e-05, + "loss": 0.9977, + "step": 39630 + }, + { + "epoch": 0.57, + "grad_norm": 0.58984375, + "learning_rate": 9.353265882186855e-05, + "loss": 0.7942, + "step": 39635 + }, + { + "epoch": 0.57, + "grad_norm": 0.625, + "learning_rate": 9.350767523688574e-05, + "loss": 0.902, + "step": 39640 + }, + { + "epoch": 0.57, + "grad_norm": 0.52734375, + "learning_rate": 9.348269205884926e-05, + "loss": 0.8388, + "step": 39645 + }, + { + "epoch": 0.57, + "grad_norm": 0.5703125, + "learning_rate": 9.345770928932505e-05, + "loss": 1.0024, + "step": 39650 + }, + { + "epoch": 0.57, + "grad_norm": 0.61328125, + "learning_rate": 9.343272692987908e-05, + "loss": 0.8947, + "step": 39655 + }, + { + "epoch": 0.57, + "grad_norm": 0.458984375, + "learning_rate": 9.340774498207726e-05, + "loss": 1.0304, + "step": 39660 + }, + { + "epoch": 0.57, + "grad_norm": 0.81640625, + "learning_rate": 9.338276344748548e-05, + "loss": 0.9795, + "step": 39665 + }, + { + "epoch": 0.57, + "grad_norm": 0.6015625, + "learning_rate": 9.335778232766964e-05, + "loss": 1.0649, + "step": 39670 + }, + { + "epoch": 0.57, + "grad_norm": 0.50390625, + "learning_rate": 9.333280162419558e-05, + "loss": 0.9476, + "step": 39675 + }, + { + "epoch": 0.57, + "grad_norm": 0.609375, + "learning_rate": 9.330782133862907e-05, + "loss": 1.0906, + "step": 39680 + }, + { + "epoch": 0.57, + "grad_norm": 0.65234375, + "learning_rate": 9.328284147253601e-05, + "loss": 0.9704, + "step": 39685 + }, + { + "epoch": 0.57, + "grad_norm": 0.56640625, + "learning_rate": 9.325786202748203e-05, + "loss": 0.9293, + "step": 39690 + }, + { + "epoch": 0.57, + "grad_norm": 0.64453125, + "learning_rate": 9.323288300503296e-05, + "loss": 1.004, + "step": 39695 + }, + { + "epoch": 0.57, + "grad_norm": 0.52734375, + "learning_rate": 9.32079044067545e-05, + "loss": 0.9423, + "step": 39700 + }, + { + "epoch": 0.57, + "grad_norm": 0.6171875, + "learning_rate": 9.31829262342123e-05, + "loss": 0.9661, + "step": 39705 + }, + { + "epoch": 0.57, + "grad_norm": 0.5859375, + "learning_rate": 9.315794848897207e-05, + "loss": 0.8995, + "step": 39710 + }, + { + "epoch": 0.57, + "grad_norm": 0.609375, + "learning_rate": 9.313297117259941e-05, + "loss": 0.9261, + "step": 39715 + }, + { + "epoch": 0.57, + "grad_norm": 0.5859375, + "learning_rate": 9.310799428665992e-05, + "loss": 0.9131, + "step": 39720 + }, + { + "epoch": 0.57, + "grad_norm": 0.66796875, + "learning_rate": 9.308301783271923e-05, + "loss": 1.0272, + "step": 39725 + }, + { + "epoch": 0.57, + "grad_norm": 0.57421875, + "learning_rate": 9.305804181234286e-05, + "loss": 0.9359, + "step": 39730 + }, + { + "epoch": 0.57, + "grad_norm": 0.484375, + "learning_rate": 9.303306622709636e-05, + "loss": 1.0216, + "step": 39735 + }, + { + "epoch": 0.57, + "grad_norm": 0.8828125, + "learning_rate": 9.300809107854517e-05, + "loss": 0.9969, + "step": 39740 + }, + { + "epoch": 0.57, + "grad_norm": 0.546875, + "learning_rate": 9.298311636825477e-05, + "loss": 1.057, + "step": 39745 + }, + { + "epoch": 0.57, + "grad_norm": 0.4921875, + "learning_rate": 9.295814209779066e-05, + "loss": 0.9669, + "step": 39750 + }, + { + "epoch": 0.57, + "grad_norm": 0.51953125, + "learning_rate": 9.293316826871821e-05, + "loss": 0.7833, + "step": 39755 + }, + { + "epoch": 0.57, + "grad_norm": 0.5078125, + "learning_rate": 9.290819488260281e-05, + "loss": 0.8582, + "step": 39760 + }, + { + "epoch": 0.57, + "grad_norm": 0.57421875, + "learning_rate": 9.288322194100985e-05, + "loss": 0.994, + "step": 39765 + }, + { + "epoch": 0.57, + "grad_norm": 0.578125, + "learning_rate": 9.285824944550465e-05, + "loss": 1.081, + "step": 39770 + }, + { + "epoch": 0.57, + "grad_norm": 0.58203125, + "learning_rate": 9.283327739765248e-05, + "loss": 0.9136, + "step": 39775 + }, + { + "epoch": 0.57, + "grad_norm": 0.5859375, + "learning_rate": 9.280830579901867e-05, + "loss": 1.1627, + "step": 39780 + }, + { + "epoch": 0.57, + "grad_norm": 0.62890625, + "learning_rate": 9.278333465116844e-05, + "loss": 1.0138, + "step": 39785 + }, + { + "epoch": 0.57, + "grad_norm": 0.55859375, + "learning_rate": 9.275836395566703e-05, + "loss": 0.9436, + "step": 39790 + }, + { + "epoch": 0.57, + "grad_norm": 0.62109375, + "learning_rate": 9.27333937140796e-05, + "loss": 0.8221, + "step": 39795 + }, + { + "epoch": 0.57, + "grad_norm": 0.5234375, + "learning_rate": 9.270842392797131e-05, + "loss": 0.9202, + "step": 39800 + }, + { + "epoch": 0.57, + "grad_norm": 0.609375, + "learning_rate": 9.268345459890734e-05, + "loss": 0.9205, + "step": 39805 + }, + { + "epoch": 0.57, + "grad_norm": 0.54296875, + "learning_rate": 9.265848572845275e-05, + "loss": 0.8917, + "step": 39810 + }, + { + "epoch": 0.57, + "grad_norm": 0.57421875, + "learning_rate": 9.263351731817263e-05, + "loss": 0.9382, + "step": 39815 + }, + { + "epoch": 0.57, + "grad_norm": 0.828125, + "learning_rate": 9.260854936963205e-05, + "loss": 0.9602, + "step": 39820 + }, + { + "epoch": 0.57, + "grad_norm": 0.5390625, + "learning_rate": 9.2583581884396e-05, + "loss": 0.9374, + "step": 39825 + }, + { + "epoch": 0.57, + "grad_norm": 0.56640625, + "learning_rate": 9.255861486402949e-05, + "loss": 1.0302, + "step": 39830 + }, + { + "epoch": 0.57, + "grad_norm": 0.52734375, + "learning_rate": 9.253364831009748e-05, + "loss": 0.7649, + "step": 39835 + }, + { + "epoch": 0.57, + "grad_norm": 0.58984375, + "learning_rate": 9.250868222416493e-05, + "loss": 0.8398, + "step": 39840 + }, + { + "epoch": 0.57, + "grad_norm": 0.58203125, + "learning_rate": 9.248371660779666e-05, + "loss": 0.9781, + "step": 39845 + }, + { + "epoch": 0.57, + "grad_norm": 0.5703125, + "learning_rate": 9.245875146255763e-05, + "loss": 1.0139, + "step": 39850 + }, + { + "epoch": 0.57, + "grad_norm": 0.578125, + "learning_rate": 9.24337867900126e-05, + "loss": 0.8359, + "step": 39855 + }, + { + "epoch": 0.57, + "grad_norm": 0.625, + "learning_rate": 9.240882259172647e-05, + "loss": 1.1231, + "step": 39860 + }, + { + "epoch": 0.57, + "grad_norm": 0.51171875, + "learning_rate": 9.238385886926397e-05, + "loss": 0.8595, + "step": 39865 + }, + { + "epoch": 0.57, + "grad_norm": 0.5703125, + "learning_rate": 9.235889562418986e-05, + "loss": 1.133, + "step": 39870 + }, + { + "epoch": 0.57, + "grad_norm": 0.48828125, + "learning_rate": 9.233393285806888e-05, + "loss": 0.9109, + "step": 39875 + }, + { + "epoch": 0.57, + "grad_norm": 0.5078125, + "learning_rate": 9.230897057246574e-05, + "loss": 0.924, + "step": 39880 + }, + { + "epoch": 0.57, + "grad_norm": 0.48828125, + "learning_rate": 9.228400876894506e-05, + "loss": 0.8275, + "step": 39885 + }, + { + "epoch": 0.57, + "grad_norm": 0.54296875, + "learning_rate": 9.22590474490715e-05, + "loss": 0.8016, + "step": 39890 + }, + { + "epoch": 0.57, + "grad_norm": 0.56640625, + "learning_rate": 9.22340866144097e-05, + "loss": 0.9906, + "step": 39895 + }, + { + "epoch": 0.57, + "grad_norm": 0.5625, + "learning_rate": 9.220912626652417e-05, + "loss": 0.9459, + "step": 39900 + }, + { + "epoch": 0.57, + "grad_norm": 0.54296875, + "learning_rate": 9.218416640697947e-05, + "loss": 0.92, + "step": 39905 + }, + { + "epoch": 0.57, + "grad_norm": 0.61328125, + "learning_rate": 9.215920703734012e-05, + "loss": 1.059, + "step": 39910 + }, + { + "epoch": 0.57, + "grad_norm": 0.578125, + "learning_rate": 9.213424815917062e-05, + "loss": 1.1374, + "step": 39915 + }, + { + "epoch": 0.57, + "grad_norm": 0.546875, + "learning_rate": 9.21092897740354e-05, + "loss": 1.0304, + "step": 39920 + }, + { + "epoch": 0.57, + "grad_norm": 0.53515625, + "learning_rate": 9.208433188349885e-05, + "loss": 0.8611, + "step": 39925 + }, + { + "epoch": 0.57, + "grad_norm": 0.6015625, + "learning_rate": 9.205937448912543e-05, + "loss": 0.9039, + "step": 39930 + }, + { + "epoch": 0.57, + "grad_norm": 0.50390625, + "learning_rate": 9.203441759247946e-05, + "loss": 0.8528, + "step": 39935 + }, + { + "epoch": 0.57, + "grad_norm": 0.5078125, + "learning_rate": 9.200946119512523e-05, + "loss": 0.764, + "step": 39940 + }, + { + "epoch": 0.57, + "grad_norm": 0.59375, + "learning_rate": 9.198450529862714e-05, + "loss": 0.9309, + "step": 39945 + }, + { + "epoch": 0.57, + "grad_norm": 0.59765625, + "learning_rate": 9.195954990454934e-05, + "loss": 1.0455, + "step": 39950 + }, + { + "epoch": 0.57, + "grad_norm": 0.5390625, + "learning_rate": 9.193459501445611e-05, + "loss": 1.173, + "step": 39955 + }, + { + "epoch": 0.57, + "grad_norm": 0.515625, + "learning_rate": 9.190964062991166e-05, + "loss": 0.7986, + "step": 39960 + }, + { + "epoch": 0.57, + "grad_norm": 0.53515625, + "learning_rate": 9.188468675248014e-05, + "loss": 0.8808, + "step": 39965 + }, + { + "epoch": 0.57, + "grad_norm": 0.53515625, + "learning_rate": 9.18597333837257e-05, + "loss": 0.862, + "step": 39970 + }, + { + "epoch": 0.57, + "grad_norm": 0.5390625, + "learning_rate": 9.183478052521244e-05, + "loss": 0.893, + "step": 39975 + }, + { + "epoch": 0.57, + "grad_norm": 0.52734375, + "learning_rate": 9.180982817850443e-05, + "loss": 1.0495, + "step": 39980 + }, + { + "epoch": 0.57, + "grad_norm": 0.54296875, + "learning_rate": 9.178487634516573e-05, + "loss": 0.9471, + "step": 39985 + }, + { + "epoch": 0.57, + "grad_norm": 0.52734375, + "learning_rate": 9.175992502676035e-05, + "loss": 0.9318, + "step": 39990 + }, + { + "epoch": 0.57, + "grad_norm": 0.6015625, + "learning_rate": 9.173497422485222e-05, + "loss": 0.956, + "step": 39995 + }, + { + "epoch": 0.57, + "grad_norm": 0.5859375, + "learning_rate": 9.171002394100539e-05, + "loss": 0.9119, + "step": 40000 + }, + { + "epoch": 0.57, + "grad_norm": 0.51171875, + "learning_rate": 9.168507417678364e-05, + "loss": 0.9412, + "step": 40005 + }, + { + "epoch": 0.57, + "grad_norm": 0.75390625, + "learning_rate": 9.166012493375091e-05, + "loss": 1.026, + "step": 40010 + }, + { + "epoch": 0.57, + "grad_norm": 0.5234375, + "learning_rate": 9.163517621347106e-05, + "loss": 1.0485, + "step": 40015 + }, + { + "epoch": 0.57, + "grad_norm": 0.56640625, + "learning_rate": 9.161022801750788e-05, + "loss": 0.9197, + "step": 40020 + }, + { + "epoch": 0.57, + "grad_norm": 0.703125, + "learning_rate": 9.158528034742518e-05, + "loss": 0.9262, + "step": 40025 + }, + { + "epoch": 0.57, + "grad_norm": 0.53125, + "learning_rate": 9.156033320478669e-05, + "loss": 0.9726, + "step": 40030 + }, + { + "epoch": 0.57, + "grad_norm": 0.5, + "learning_rate": 9.15353865911561e-05, + "loss": 0.8281, + "step": 40035 + }, + { + "epoch": 0.57, + "grad_norm": 0.65625, + "learning_rate": 9.151044050809715e-05, + "loss": 0.9731, + "step": 40040 + }, + { + "epoch": 0.57, + "grad_norm": 0.5, + "learning_rate": 9.148549495717344e-05, + "loss": 1.0655, + "step": 40045 + }, + { + "epoch": 0.57, + "grad_norm": 0.5546875, + "learning_rate": 9.146054993994864e-05, + "loss": 1.0064, + "step": 40050 + }, + { + "epoch": 0.57, + "grad_norm": 0.56640625, + "learning_rate": 9.143560545798625e-05, + "loss": 0.9116, + "step": 40055 + }, + { + "epoch": 0.57, + "grad_norm": 0.62890625, + "learning_rate": 9.141066151284988e-05, + "loss": 0.9974, + "step": 40060 + }, + { + "epoch": 0.57, + "grad_norm": 0.57421875, + "learning_rate": 9.138571810610303e-05, + "loss": 0.9558, + "step": 40065 + }, + { + "epoch": 0.57, + "grad_norm": 0.55078125, + "learning_rate": 9.136077523930918e-05, + "loss": 0.9115, + "step": 40070 + }, + { + "epoch": 0.57, + "grad_norm": 0.58984375, + "learning_rate": 9.133583291403176e-05, + "loss": 1.0983, + "step": 40075 + }, + { + "epoch": 0.57, + "grad_norm": 0.5234375, + "learning_rate": 9.131089113183422e-05, + "loss": 1.028, + "step": 40080 + }, + { + "epoch": 0.58, + "grad_norm": 0.55859375, + "learning_rate": 9.128594989427992e-05, + "loss": 0.9602, + "step": 40085 + }, + { + "epoch": 0.58, + "grad_norm": 0.57421875, + "learning_rate": 9.126100920293219e-05, + "loss": 0.9662, + "step": 40090 + }, + { + "epoch": 0.58, + "grad_norm": 0.64453125, + "learning_rate": 9.123606905935436e-05, + "loss": 0.944, + "step": 40095 + }, + { + "epoch": 0.58, + "grad_norm": 0.57421875, + "learning_rate": 9.121112946510973e-05, + "loss": 0.9506, + "step": 40100 + }, + { + "epoch": 0.58, + "grad_norm": 0.484375, + "learning_rate": 9.11861904217615e-05, + "loss": 0.9151, + "step": 40105 + }, + { + "epoch": 0.58, + "grad_norm": 0.73828125, + "learning_rate": 9.116125193087288e-05, + "loss": 0.8144, + "step": 40110 + }, + { + "epoch": 0.58, + "grad_norm": 0.5859375, + "learning_rate": 9.113631399400707e-05, + "loss": 0.9939, + "step": 40115 + }, + { + "epoch": 0.58, + "grad_norm": 0.65625, + "learning_rate": 9.11113766127272e-05, + "loss": 0.9773, + "step": 40120 + }, + { + "epoch": 0.58, + "grad_norm": 0.51953125, + "learning_rate": 9.108643978859632e-05, + "loss": 0.9812, + "step": 40125 + }, + { + "epoch": 0.58, + "grad_norm": 0.62109375, + "learning_rate": 9.10615035231776e-05, + "loss": 1.0516, + "step": 40130 + }, + { + "epoch": 0.58, + "grad_norm": 0.54296875, + "learning_rate": 9.103656781803399e-05, + "loss": 1.0237, + "step": 40135 + }, + { + "epoch": 0.58, + "grad_norm": 0.5625, + "learning_rate": 9.101163267472852e-05, + "loss": 0.969, + "step": 40140 + }, + { + "epoch": 0.58, + "grad_norm": 0.515625, + "learning_rate": 9.098669809482415e-05, + "loss": 1.0175, + "step": 40145 + }, + { + "epoch": 0.58, + "grad_norm": 0.58984375, + "learning_rate": 9.096176407988382e-05, + "loss": 0.9175, + "step": 40150 + }, + { + "epoch": 0.58, + "grad_norm": 0.625, + "learning_rate": 9.093683063147044e-05, + "loss": 1.1157, + "step": 40155 + }, + { + "epoch": 0.58, + "grad_norm": 0.5390625, + "learning_rate": 9.09118977511468e-05, + "loss": 1.0574, + "step": 40160 + }, + { + "epoch": 0.58, + "grad_norm": 0.54296875, + "learning_rate": 9.088696544047574e-05, + "loss": 0.8854, + "step": 40165 + }, + { + "epoch": 0.58, + "grad_norm": 0.52734375, + "learning_rate": 9.08620337010201e-05, + "loss": 0.8979, + "step": 40170 + }, + { + "epoch": 0.58, + "grad_norm": 0.58984375, + "learning_rate": 9.083710253434259e-05, + "loss": 1.1431, + "step": 40175 + }, + { + "epoch": 0.58, + "grad_norm": 0.55078125, + "learning_rate": 9.08121719420059e-05, + "loss": 0.9464, + "step": 40180 + }, + { + "epoch": 0.58, + "grad_norm": 0.51171875, + "learning_rate": 9.078724192557278e-05, + "loss": 0.9012, + "step": 40185 + }, + { + "epoch": 0.58, + "grad_norm": 0.58203125, + "learning_rate": 9.07623124866058e-05, + "loss": 0.9206, + "step": 40190 + }, + { + "epoch": 0.58, + "grad_norm": 0.60546875, + "learning_rate": 9.073738362666759e-05, + "loss": 0.9284, + "step": 40195 + }, + { + "epoch": 0.58, + "grad_norm": 0.5625, + "learning_rate": 9.071245534732073e-05, + "loss": 0.864, + "step": 40200 + }, + { + "epoch": 0.58, + "grad_norm": 0.53515625, + "learning_rate": 9.068752765012777e-05, + "loss": 0.9126, + "step": 40205 + }, + { + "epoch": 0.58, + "grad_norm": 0.5, + "learning_rate": 9.066260053665119e-05, + "loss": 0.9227, + "step": 40210 + }, + { + "epoch": 0.58, + "grad_norm": 0.61328125, + "learning_rate": 9.063767400845343e-05, + "loss": 1.0837, + "step": 40215 + }, + { + "epoch": 0.58, + "grad_norm": 0.5078125, + "learning_rate": 9.061274806709688e-05, + "loss": 1.0174, + "step": 40220 + }, + { + "epoch": 0.58, + "grad_norm": 0.51171875, + "learning_rate": 9.058782271414402e-05, + "loss": 0.8127, + "step": 40225 + }, + { + "epoch": 0.58, + "grad_norm": 0.58203125, + "learning_rate": 9.056289795115715e-05, + "loss": 0.84, + "step": 40230 + }, + { + "epoch": 0.58, + "grad_norm": 0.546875, + "learning_rate": 9.053797377969855e-05, + "loss": 1.0014, + "step": 40235 + }, + { + "epoch": 0.58, + "grad_norm": 0.625, + "learning_rate": 9.051305020133055e-05, + "loss": 1.0487, + "step": 40240 + }, + { + "epoch": 0.58, + "grad_norm": 0.56640625, + "learning_rate": 9.048812721761538e-05, + "loss": 0.9631, + "step": 40245 + }, + { + "epoch": 0.58, + "grad_norm": 0.5546875, + "learning_rate": 9.04632048301152e-05, + "loss": 0.9664, + "step": 40250 + }, + { + "epoch": 0.58, + "grad_norm": 0.546875, + "learning_rate": 9.043828304039222e-05, + "loss": 0.8728, + "step": 40255 + }, + { + "epoch": 0.58, + "grad_norm": 0.49609375, + "learning_rate": 9.041336185000858e-05, + "loss": 0.9733, + "step": 40260 + }, + { + "epoch": 0.58, + "grad_norm": 0.62890625, + "learning_rate": 9.038844126052633e-05, + "loss": 1.0681, + "step": 40265 + }, + { + "epoch": 0.58, + "grad_norm": 0.5078125, + "learning_rate": 9.03635212735075e-05, + "loss": 0.8276, + "step": 40270 + }, + { + "epoch": 0.58, + "grad_norm": 0.5234375, + "learning_rate": 9.033860189051412e-05, + "loss": 0.979, + "step": 40275 + }, + { + "epoch": 0.58, + "grad_norm": 0.5625, + "learning_rate": 9.031368311310821e-05, + "loss": 1.0012, + "step": 40280 + }, + { + "epoch": 0.58, + "grad_norm": 0.54296875, + "learning_rate": 9.028876494285166e-05, + "loss": 0.8916, + "step": 40285 + }, + { + "epoch": 0.58, + "grad_norm": 0.55078125, + "learning_rate": 9.026384738130637e-05, + "loss": 1.0582, + "step": 40290 + }, + { + "epoch": 0.58, + "grad_norm": 0.54296875, + "learning_rate": 9.023893043003423e-05, + "loss": 0.9211, + "step": 40295 + }, + { + "epoch": 0.58, + "grad_norm": 0.59375, + "learning_rate": 9.021401409059704e-05, + "loss": 0.902, + "step": 40300 + }, + { + "epoch": 0.58, + "grad_norm": 0.55078125, + "learning_rate": 9.018909836455659e-05, + "loss": 0.9903, + "step": 40305 + }, + { + "epoch": 0.58, + "grad_norm": 0.54296875, + "learning_rate": 9.016418325347464e-05, + "loss": 0.8672, + "step": 40310 + }, + { + "epoch": 0.58, + "grad_norm": 0.4921875, + "learning_rate": 9.013926875891291e-05, + "loss": 0.9729, + "step": 40315 + }, + { + "epoch": 0.58, + "grad_norm": 0.49609375, + "learning_rate": 9.011435488243302e-05, + "loss": 0.8721, + "step": 40320 + }, + { + "epoch": 0.58, + "grad_norm": 0.53125, + "learning_rate": 9.008944162559663e-05, + "loss": 0.9681, + "step": 40325 + }, + { + "epoch": 0.58, + "grad_norm": 0.494140625, + "learning_rate": 9.006452898996529e-05, + "loss": 0.9349, + "step": 40330 + }, + { + "epoch": 0.58, + "grad_norm": 0.5390625, + "learning_rate": 9.003961697710062e-05, + "loss": 1.0404, + "step": 40335 + }, + { + "epoch": 0.58, + "grad_norm": 0.5234375, + "learning_rate": 9.001470558856411e-05, + "loss": 0.9361, + "step": 40340 + }, + { + "epoch": 0.58, + "grad_norm": 0.482421875, + "learning_rate": 8.998979482591718e-05, + "loss": 0.8448, + "step": 40345 + }, + { + "epoch": 0.58, + "grad_norm": 0.64453125, + "learning_rate": 8.996488469072136e-05, + "loss": 1.0374, + "step": 40350 + }, + { + "epoch": 0.58, + "grad_norm": 0.5625, + "learning_rate": 8.993997518453799e-05, + "loss": 0.797, + "step": 40355 + }, + { + "epoch": 0.58, + "grad_norm": 0.5546875, + "learning_rate": 8.99150663089284e-05, + "loss": 0.7895, + "step": 40360 + }, + { + "epoch": 0.58, + "grad_norm": 0.6328125, + "learning_rate": 8.989015806545402e-05, + "loss": 0.9464, + "step": 40365 + }, + { + "epoch": 0.58, + "grad_norm": 0.55859375, + "learning_rate": 8.986525045567597e-05, + "loss": 0.9364, + "step": 40370 + }, + { + "epoch": 0.58, + "grad_norm": 0.55859375, + "learning_rate": 8.984034348115558e-05, + "loss": 0.9559, + "step": 40375 + }, + { + "epoch": 0.58, + "grad_norm": 0.66015625, + "learning_rate": 8.981543714345404e-05, + "loss": 1.109, + "step": 40380 + }, + { + "epoch": 0.58, + "grad_norm": 0.69921875, + "learning_rate": 8.979053144413248e-05, + "loss": 1.0065, + "step": 40385 + }, + { + "epoch": 0.58, + "grad_norm": 0.55859375, + "learning_rate": 8.976562638475206e-05, + "loss": 1.0162, + "step": 40390 + }, + { + "epoch": 0.58, + "grad_norm": 0.6328125, + "learning_rate": 8.974072196687384e-05, + "loss": 1.0205, + "step": 40395 + }, + { + "epoch": 0.58, + "grad_norm": 0.58203125, + "learning_rate": 8.971581819205881e-05, + "loss": 1.1037, + "step": 40400 + }, + { + "epoch": 0.58, + "grad_norm": 0.578125, + "learning_rate": 8.969091506186804e-05, + "loss": 0.8065, + "step": 40405 + }, + { + "epoch": 0.58, + "grad_norm": 0.5625, + "learning_rate": 8.966601257786246e-05, + "loss": 0.9767, + "step": 40410 + }, + { + "epoch": 0.58, + "grad_norm": 0.71484375, + "learning_rate": 8.964111074160296e-05, + "loss": 1.0224, + "step": 40415 + }, + { + "epoch": 0.58, + "grad_norm": 0.56640625, + "learning_rate": 8.961620955465049e-05, + "loss": 0.9845, + "step": 40420 + }, + { + "epoch": 0.58, + "grad_norm": 0.7109375, + "learning_rate": 8.959130901856576e-05, + "loss": 1.135, + "step": 40425 + }, + { + "epoch": 0.58, + "grad_norm": 0.55859375, + "learning_rate": 8.956640913490968e-05, + "loss": 1.0147, + "step": 40430 + }, + { + "epoch": 0.58, + "grad_norm": 0.59765625, + "learning_rate": 8.954150990524294e-05, + "loss": 1.1181, + "step": 40435 + }, + { + "epoch": 0.58, + "grad_norm": 0.5390625, + "learning_rate": 8.951661133112625e-05, + "loss": 0.8177, + "step": 40440 + }, + { + "epoch": 0.58, + "grad_norm": 0.55078125, + "learning_rate": 8.949171341412034e-05, + "loss": 0.9945, + "step": 40445 + }, + { + "epoch": 0.58, + "grad_norm": 0.62109375, + "learning_rate": 8.946681615578578e-05, + "loss": 1.0107, + "step": 40450 + }, + { + "epoch": 0.58, + "grad_norm": 0.51953125, + "learning_rate": 8.944191955768317e-05, + "loss": 0.9096, + "step": 40455 + }, + { + "epoch": 0.58, + "grad_norm": 0.5234375, + "learning_rate": 8.941702362137309e-05, + "loss": 1.0481, + "step": 40460 + }, + { + "epoch": 0.58, + "grad_norm": 0.5859375, + "learning_rate": 8.939212834841602e-05, + "loss": 0.885, + "step": 40465 + }, + { + "epoch": 0.58, + "grad_norm": 0.53515625, + "learning_rate": 8.936723374037245e-05, + "loss": 0.8906, + "step": 40470 + }, + { + "epoch": 0.58, + "grad_norm": 0.46875, + "learning_rate": 8.934233979880276e-05, + "loss": 1.0109, + "step": 40475 + }, + { + "epoch": 0.58, + "grad_norm": 0.609375, + "learning_rate": 8.931744652526731e-05, + "loss": 1.0306, + "step": 40480 + }, + { + "epoch": 0.58, + "grad_norm": 0.53125, + "learning_rate": 8.929255392132652e-05, + "loss": 0.7733, + "step": 40485 + }, + { + "epoch": 0.58, + "grad_norm": 0.51953125, + "learning_rate": 8.926766198854063e-05, + "loss": 0.9697, + "step": 40490 + }, + { + "epoch": 0.58, + "grad_norm": 0.62890625, + "learning_rate": 8.92427707284699e-05, + "loss": 0.9151, + "step": 40495 + }, + { + "epoch": 0.58, + "grad_norm": 0.625, + "learning_rate": 8.921788014267456e-05, + "loss": 1.1148, + "step": 40500 + }, + { + "epoch": 0.58, + "grad_norm": 0.59375, + "learning_rate": 8.919299023271479e-05, + "loss": 0.9216, + "step": 40505 + }, + { + "epoch": 0.58, + "grad_norm": 0.60546875, + "learning_rate": 8.916810100015066e-05, + "loss": 0.8619, + "step": 40510 + }, + { + "epoch": 0.58, + "grad_norm": 0.69921875, + "learning_rate": 8.914321244654233e-05, + "loss": 1.042, + "step": 40515 + }, + { + "epoch": 0.58, + "grad_norm": 0.51953125, + "learning_rate": 8.91183245734498e-05, + "loss": 0.9905, + "step": 40520 + }, + { + "epoch": 0.58, + "grad_norm": 0.55078125, + "learning_rate": 8.909343738243312e-05, + "loss": 0.8287, + "step": 40525 + }, + { + "epoch": 0.58, + "grad_norm": 0.61328125, + "learning_rate": 8.906855087505217e-05, + "loss": 0.9968, + "step": 40530 + }, + { + "epoch": 0.58, + "grad_norm": 0.5546875, + "learning_rate": 8.904366505286687e-05, + "loss": 0.944, + "step": 40535 + }, + { + "epoch": 0.58, + "grad_norm": 0.515625, + "learning_rate": 8.901877991743717e-05, + "loss": 0.8778, + "step": 40540 + }, + { + "epoch": 0.58, + "grad_norm": 0.56640625, + "learning_rate": 8.899389547032283e-05, + "loss": 0.9997, + "step": 40545 + }, + { + "epoch": 0.58, + "grad_norm": 0.50390625, + "learning_rate": 8.896901171308364e-05, + "loss": 1.0573, + "step": 40550 + }, + { + "epoch": 0.58, + "grad_norm": 0.58984375, + "learning_rate": 8.894412864727937e-05, + "loss": 0.8705, + "step": 40555 + }, + { + "epoch": 0.58, + "grad_norm": 0.59765625, + "learning_rate": 8.891924627446974e-05, + "loss": 0.9807, + "step": 40560 + }, + { + "epoch": 0.58, + "grad_norm": 0.4609375, + "learning_rate": 8.889436459621432e-05, + "loss": 1.0162, + "step": 40565 + }, + { + "epoch": 0.58, + "grad_norm": 0.59765625, + "learning_rate": 8.886948361407281e-05, + "loss": 0.9592, + "step": 40570 + }, + { + "epoch": 0.58, + "grad_norm": 0.5859375, + "learning_rate": 8.884460332960477e-05, + "loss": 1.0517, + "step": 40575 + }, + { + "epoch": 0.58, + "grad_norm": 0.60546875, + "learning_rate": 8.881972374436967e-05, + "loss": 1.0649, + "step": 40580 + }, + { + "epoch": 0.58, + "grad_norm": 0.51171875, + "learning_rate": 8.879484485992702e-05, + "loss": 0.85, + "step": 40585 + }, + { + "epoch": 0.58, + "grad_norm": 0.59765625, + "learning_rate": 8.876996667783624e-05, + "loss": 1.0682, + "step": 40590 + }, + { + "epoch": 0.58, + "grad_norm": 0.59375, + "learning_rate": 8.874508919965675e-05, + "loss": 1.009, + "step": 40595 + }, + { + "epoch": 0.58, + "grad_norm": 0.55078125, + "learning_rate": 8.872021242694789e-05, + "loss": 0.9226, + "step": 40600 + }, + { + "epoch": 0.58, + "grad_norm": 0.65234375, + "learning_rate": 8.869533636126895e-05, + "loss": 0.8566, + "step": 40605 + }, + { + "epoch": 0.58, + "grad_norm": 0.5, + "learning_rate": 8.867046100417921e-05, + "loss": 0.949, + "step": 40610 + }, + { + "epoch": 0.58, + "grad_norm": 0.55078125, + "learning_rate": 8.864558635723787e-05, + "loss": 0.9686, + "step": 40615 + }, + { + "epoch": 0.58, + "grad_norm": 0.578125, + "learning_rate": 8.862071242200411e-05, + "loss": 0.9619, + "step": 40620 + }, + { + "epoch": 0.58, + "grad_norm": 0.53515625, + "learning_rate": 8.859583920003707e-05, + "loss": 0.9039, + "step": 40625 + }, + { + "epoch": 0.58, + "grad_norm": 0.51953125, + "learning_rate": 8.857096669289583e-05, + "loss": 0.9528, + "step": 40630 + }, + { + "epoch": 0.58, + "grad_norm": 0.60546875, + "learning_rate": 8.85460949021394e-05, + "loss": 0.912, + "step": 40635 + }, + { + "epoch": 0.58, + "grad_norm": 0.6171875, + "learning_rate": 8.852122382932678e-05, + "loss": 1.0178, + "step": 40640 + }, + { + "epoch": 0.58, + "grad_norm": 0.58984375, + "learning_rate": 8.849635347601692e-05, + "loss": 0.906, + "step": 40645 + }, + { + "epoch": 0.58, + "grad_norm": 0.5234375, + "learning_rate": 8.847148384376872e-05, + "loss": 0.9614, + "step": 40650 + }, + { + "epoch": 0.58, + "grad_norm": 0.59765625, + "learning_rate": 8.844661493414106e-05, + "loss": 0.9905, + "step": 40655 + }, + { + "epoch": 0.58, + "grad_norm": 0.5078125, + "learning_rate": 8.84217467486927e-05, + "loss": 0.8828, + "step": 40660 + }, + { + "epoch": 0.58, + "grad_norm": 0.62890625, + "learning_rate": 8.839687928898248e-05, + "loss": 1.0715, + "step": 40665 + }, + { + "epoch": 0.58, + "grad_norm": 0.51953125, + "learning_rate": 8.837201255656907e-05, + "loss": 0.8497, + "step": 40670 + }, + { + "epoch": 0.58, + "grad_norm": 0.6171875, + "learning_rate": 8.834714655301114e-05, + "loss": 0.9762, + "step": 40675 + }, + { + "epoch": 0.58, + "grad_norm": 0.51953125, + "learning_rate": 8.832228127986734e-05, + "loss": 0.9128, + "step": 40680 + }, + { + "epoch": 0.58, + "grad_norm": 0.5859375, + "learning_rate": 8.82974167386963e-05, + "loss": 1.2287, + "step": 40685 + }, + { + "epoch": 0.58, + "grad_norm": 0.5234375, + "learning_rate": 8.827255293105646e-05, + "loss": 1.03, + "step": 40690 + }, + { + "epoch": 0.58, + "grad_norm": 0.56640625, + "learning_rate": 8.824768985850638e-05, + "loss": 1.0054, + "step": 40695 + }, + { + "epoch": 0.58, + "grad_norm": 0.4921875, + "learning_rate": 8.822282752260445e-05, + "loss": 0.8711, + "step": 40700 + }, + { + "epoch": 0.58, + "grad_norm": 0.482421875, + "learning_rate": 8.819796592490912e-05, + "loss": 0.969, + "step": 40705 + }, + { + "epoch": 0.58, + "grad_norm": 0.546875, + "learning_rate": 8.817310506697873e-05, + "loss": 1.0671, + "step": 40710 + }, + { + "epoch": 0.58, + "grad_norm": 0.53125, + "learning_rate": 8.814824495037157e-05, + "loss": 0.9447, + "step": 40715 + }, + { + "epoch": 0.58, + "grad_norm": 0.5625, + "learning_rate": 8.812338557664594e-05, + "loss": 1.0268, + "step": 40720 + }, + { + "epoch": 0.58, + "grad_norm": 0.55078125, + "learning_rate": 8.809852694736001e-05, + "loss": 1.0194, + "step": 40725 + }, + { + "epoch": 0.58, + "grad_norm": 0.53125, + "learning_rate": 8.807366906407196e-05, + "loss": 0.9151, + "step": 40730 + }, + { + "epoch": 0.58, + "grad_norm": 0.9609375, + "learning_rate": 8.804881192833996e-05, + "loss": 0.7921, + "step": 40735 + }, + { + "epoch": 0.58, + "grad_norm": 0.7109375, + "learning_rate": 8.802395554172201e-05, + "loss": 1.011, + "step": 40740 + }, + { + "epoch": 0.58, + "grad_norm": 0.5625, + "learning_rate": 8.799909990577617e-05, + "loss": 0.915, + "step": 40745 + }, + { + "epoch": 0.58, + "grad_norm": 0.578125, + "learning_rate": 8.79742450220604e-05, + "loss": 1.0229, + "step": 40750 + }, + { + "epoch": 0.58, + "grad_norm": 0.5546875, + "learning_rate": 8.794939089213265e-05, + "loss": 0.9477, + "step": 40755 + }, + { + "epoch": 0.58, + "grad_norm": 0.50390625, + "learning_rate": 8.792453751755081e-05, + "loss": 0.7819, + "step": 40760 + }, + { + "epoch": 0.58, + "grad_norm": 0.89453125, + "learning_rate": 8.789968489987273e-05, + "loss": 0.9665, + "step": 40765 + }, + { + "epoch": 0.58, + "grad_norm": 0.4921875, + "learning_rate": 8.787483304065613e-05, + "loss": 0.7705, + "step": 40770 + }, + { + "epoch": 0.58, + "grad_norm": 0.55078125, + "learning_rate": 8.784998194145885e-05, + "loss": 0.9689, + "step": 40775 + }, + { + "epoch": 0.58, + "grad_norm": 0.546875, + "learning_rate": 8.782513160383852e-05, + "loss": 0.8291, + "step": 40780 + }, + { + "epoch": 0.59, + "grad_norm": 0.625, + "learning_rate": 8.780028202935281e-05, + "loss": 0.9454, + "step": 40785 + }, + { + "epoch": 0.59, + "grad_norm": 0.671875, + "learning_rate": 8.777543321955934e-05, + "loss": 0.9423, + "step": 40790 + }, + { + "epoch": 0.59, + "grad_norm": 0.578125, + "learning_rate": 8.775058517601563e-05, + "loss": 1.0121, + "step": 40795 + }, + { + "epoch": 0.59, + "grad_norm": 0.54296875, + "learning_rate": 8.772573790027919e-05, + "loss": 0.9232, + "step": 40800 + }, + { + "epoch": 0.59, + "grad_norm": 0.56640625, + "learning_rate": 8.770089139390745e-05, + "loss": 1.001, + "step": 40805 + }, + { + "epoch": 0.59, + "grad_norm": 0.56640625, + "learning_rate": 8.767604565845787e-05, + "loss": 0.929, + "step": 40810 + }, + { + "epoch": 0.59, + "grad_norm": 0.61328125, + "learning_rate": 8.76512006954878e-05, + "loss": 0.9996, + "step": 40815 + }, + { + "epoch": 0.59, + "grad_norm": 0.57421875, + "learning_rate": 8.76263565065545e-05, + "loss": 0.9141, + "step": 40820 + }, + { + "epoch": 0.59, + "grad_norm": 0.50390625, + "learning_rate": 8.760151309321527e-05, + "loss": 1.0298, + "step": 40825 + }, + { + "epoch": 0.59, + "grad_norm": 0.5, + "learning_rate": 8.757667045702733e-05, + "loss": 0.8709, + "step": 40830 + }, + { + "epoch": 0.59, + "grad_norm": 0.6328125, + "learning_rate": 8.755182859954784e-05, + "loss": 0.9799, + "step": 40835 + }, + { + "epoch": 0.59, + "grad_norm": 0.5625, + "learning_rate": 8.752698752233394e-05, + "loss": 1.0642, + "step": 40840 + }, + { + "epoch": 0.59, + "grad_norm": 0.490234375, + "learning_rate": 8.750214722694261e-05, + "loss": 0.9883, + "step": 40845 + }, + { + "epoch": 0.59, + "grad_norm": 0.59765625, + "learning_rate": 8.747730771493095e-05, + "loss": 0.9238, + "step": 40850 + }, + { + "epoch": 0.59, + "grad_norm": 0.60546875, + "learning_rate": 8.74524689878559e-05, + "loss": 0.9287, + "step": 40855 + }, + { + "epoch": 0.59, + "grad_norm": 0.55859375, + "learning_rate": 8.742763104727439e-05, + "loss": 0.9518, + "step": 40860 + }, + { + "epoch": 0.59, + "grad_norm": 0.5078125, + "learning_rate": 8.740279389474327e-05, + "loss": 0.8663, + "step": 40865 + }, + { + "epoch": 0.59, + "grad_norm": 0.609375, + "learning_rate": 8.737795753181939e-05, + "loss": 0.9363, + "step": 40870 + }, + { + "epoch": 0.59, + "grad_norm": 0.8046875, + "learning_rate": 8.735312196005949e-05, + "loss": 1.1332, + "step": 40875 + }, + { + "epoch": 0.59, + "grad_norm": 0.59765625, + "learning_rate": 8.732828718102033e-05, + "loss": 1.0223, + "step": 40880 + }, + { + "epoch": 0.59, + "grad_norm": 0.609375, + "learning_rate": 8.730345319625857e-05, + "loss": 1.0087, + "step": 40885 + }, + { + "epoch": 0.59, + "grad_norm": 0.5390625, + "learning_rate": 8.727862000733081e-05, + "loss": 0.9222, + "step": 40890 + }, + { + "epoch": 0.59, + "grad_norm": 0.53515625, + "learning_rate": 8.725378761579366e-05, + "loss": 1.0914, + "step": 40895 + }, + { + "epoch": 0.59, + "grad_norm": 0.53515625, + "learning_rate": 8.722895602320358e-05, + "loss": 1.0208, + "step": 40900 + }, + { + "epoch": 0.59, + "grad_norm": 0.60546875, + "learning_rate": 8.720412523111709e-05, + "loss": 0.8234, + "step": 40905 + }, + { + "epoch": 0.59, + "grad_norm": 0.58984375, + "learning_rate": 8.717929524109062e-05, + "loss": 0.8737, + "step": 40910 + }, + { + "epoch": 0.59, + "grad_norm": 0.48828125, + "learning_rate": 8.71544660546805e-05, + "loss": 1.0277, + "step": 40915 + }, + { + "epoch": 0.59, + "grad_norm": 0.54296875, + "learning_rate": 8.71296376734431e-05, + "loss": 0.9144, + "step": 40920 + }, + { + "epoch": 0.59, + "grad_norm": 0.61328125, + "learning_rate": 8.710481009893467e-05, + "loss": 0.9951, + "step": 40925 + }, + { + "epoch": 0.59, + "grad_norm": 0.6953125, + "learning_rate": 8.70799833327114e-05, + "loss": 0.9455, + "step": 40930 + }, + { + "epoch": 0.59, + "grad_norm": 0.62890625, + "learning_rate": 8.705515737632952e-05, + "loss": 0.9778, + "step": 40935 + }, + { + "epoch": 0.59, + "grad_norm": 0.51171875, + "learning_rate": 8.703033223134511e-05, + "loss": 0.9631, + "step": 40940 + }, + { + "epoch": 0.59, + "grad_norm": 0.47265625, + "learning_rate": 8.700550789931428e-05, + "loss": 0.8185, + "step": 40945 + }, + { + "epoch": 0.59, + "grad_norm": 0.57421875, + "learning_rate": 8.698068438179297e-05, + "loss": 1.0634, + "step": 40950 + }, + { + "epoch": 0.59, + "grad_norm": 0.5859375, + "learning_rate": 8.695586168033719e-05, + "loss": 1.0236, + "step": 40955 + }, + { + "epoch": 0.59, + "grad_norm": 0.5625, + "learning_rate": 8.693103979650288e-05, + "loss": 1.0162, + "step": 40960 + }, + { + "epoch": 0.59, + "grad_norm": 0.5234375, + "learning_rate": 8.690621873184587e-05, + "loss": 0.9561, + "step": 40965 + }, + { + "epoch": 0.59, + "grad_norm": 0.5234375, + "learning_rate": 8.688139848792196e-05, + "loss": 1.0784, + "step": 40970 + }, + { + "epoch": 0.59, + "grad_norm": 0.474609375, + "learning_rate": 8.685657906628696e-05, + "loss": 0.8949, + "step": 40975 + }, + { + "epoch": 0.59, + "grad_norm": 0.546875, + "learning_rate": 8.683176046849655e-05, + "loss": 0.9578, + "step": 40980 + }, + { + "epoch": 0.59, + "grad_norm": 0.46875, + "learning_rate": 8.680694269610637e-05, + "loss": 0.8533, + "step": 40985 + }, + { + "epoch": 0.59, + "grad_norm": 0.54296875, + "learning_rate": 8.678212575067207e-05, + "loss": 0.8894, + "step": 40990 + }, + { + "epoch": 0.59, + "grad_norm": 0.71875, + "learning_rate": 8.675730963374918e-05, + "loss": 0.9729, + "step": 40995 + }, + { + "epoch": 0.59, + "grad_norm": 0.56640625, + "learning_rate": 8.673249434689322e-05, + "loss": 0.9695, + "step": 41000 + }, + { + "epoch": 0.59, + "grad_norm": 0.50390625, + "learning_rate": 8.670767989165962e-05, + "loss": 0.8899, + "step": 41005 + }, + { + "epoch": 0.59, + "grad_norm": 0.5703125, + "learning_rate": 8.668286626960375e-05, + "loss": 0.868, + "step": 41010 + }, + { + "epoch": 0.59, + "grad_norm": 0.60546875, + "learning_rate": 8.6658053482281e-05, + "loss": 0.9941, + "step": 41015 + }, + { + "epoch": 0.59, + "grad_norm": 0.56640625, + "learning_rate": 8.663324153124667e-05, + "loss": 0.9316, + "step": 41020 + }, + { + "epoch": 0.59, + "grad_norm": 0.5078125, + "learning_rate": 8.660843041805597e-05, + "loss": 0.8675, + "step": 41025 + }, + { + "epoch": 0.59, + "grad_norm": 0.55078125, + "learning_rate": 8.658362014426411e-05, + "loss": 0.8628, + "step": 41030 + }, + { + "epoch": 0.59, + "grad_norm": 0.5234375, + "learning_rate": 8.655881071142622e-05, + "loss": 0.8342, + "step": 41035 + }, + { + "epoch": 0.59, + "grad_norm": 0.5703125, + "learning_rate": 8.653400212109737e-05, + "loss": 0.9824, + "step": 41040 + }, + { + "epoch": 0.59, + "grad_norm": 0.55859375, + "learning_rate": 8.650919437483262e-05, + "loss": 1.1338, + "step": 41045 + }, + { + "epoch": 0.59, + "grad_norm": 0.5625, + "learning_rate": 8.648438747418694e-05, + "loss": 0.9745, + "step": 41050 + }, + { + "epoch": 0.59, + "grad_norm": 0.6640625, + "learning_rate": 8.645958142071524e-05, + "loss": 0.9639, + "step": 41055 + }, + { + "epoch": 0.59, + "grad_norm": 0.62890625, + "learning_rate": 8.64347762159724e-05, + "loss": 1.0613, + "step": 41060 + }, + { + "epoch": 0.59, + "grad_norm": 0.5703125, + "learning_rate": 8.64099718615132e-05, + "loss": 0.9962, + "step": 41065 + }, + { + "epoch": 0.59, + "grad_norm": 0.51171875, + "learning_rate": 8.638516835889248e-05, + "loss": 0.8651, + "step": 41070 + }, + { + "epoch": 0.59, + "grad_norm": 0.59375, + "learning_rate": 8.636036570966491e-05, + "loss": 0.8809, + "step": 41075 + }, + { + "epoch": 0.59, + "grad_norm": 0.58203125, + "learning_rate": 8.633556391538511e-05, + "loss": 1.0232, + "step": 41080 + }, + { + "epoch": 0.59, + "grad_norm": 0.53125, + "learning_rate": 8.631076297760776e-05, + "loss": 0.9977, + "step": 41085 + }, + { + "epoch": 0.59, + "grad_norm": 0.55078125, + "learning_rate": 8.628596289788738e-05, + "loss": 1.0693, + "step": 41090 + }, + { + "epoch": 0.59, + "grad_norm": 0.546875, + "learning_rate": 8.626116367777845e-05, + "loss": 0.9539, + "step": 41095 + }, + { + "epoch": 0.59, + "grad_norm": 0.57421875, + "learning_rate": 8.623636531883545e-05, + "loss": 0.894, + "step": 41100 + }, + { + "epoch": 0.59, + "grad_norm": 0.57421875, + "learning_rate": 8.621156782261275e-05, + "loss": 1.0753, + "step": 41105 + }, + { + "epoch": 0.59, + "grad_norm": 0.5390625, + "learning_rate": 8.618677119066468e-05, + "loss": 0.8618, + "step": 41110 + }, + { + "epoch": 0.59, + "grad_norm": 0.61328125, + "learning_rate": 8.616197542454551e-05, + "loss": 1.0041, + "step": 41115 + }, + { + "epoch": 0.59, + "grad_norm": 0.578125, + "learning_rate": 8.613718052580946e-05, + "loss": 0.9626, + "step": 41120 + }, + { + "epoch": 0.59, + "grad_norm": 0.6015625, + "learning_rate": 8.611238649601075e-05, + "loss": 0.7566, + "step": 41125 + }, + { + "epoch": 0.59, + "grad_norm": 0.65234375, + "learning_rate": 8.608759333670349e-05, + "loss": 0.9487, + "step": 41130 + }, + { + "epoch": 0.59, + "grad_norm": 0.578125, + "learning_rate": 8.606280104944168e-05, + "loss": 0.9074, + "step": 41135 + }, + { + "epoch": 0.59, + "grad_norm": 0.6015625, + "learning_rate": 8.603800963577939e-05, + "loss": 1.0181, + "step": 41140 + }, + { + "epoch": 0.59, + "grad_norm": 0.53515625, + "learning_rate": 8.601321909727056e-05, + "loss": 0.8415, + "step": 41145 + }, + { + "epoch": 0.59, + "grad_norm": 0.55078125, + "learning_rate": 8.598842943546908e-05, + "loss": 0.891, + "step": 41150 + }, + { + "epoch": 0.59, + "grad_norm": 0.53125, + "learning_rate": 8.596364065192885e-05, + "loss": 0.9422, + "step": 41155 + }, + { + "epoch": 0.59, + "grad_norm": 0.52734375, + "learning_rate": 8.593885274820356e-05, + "loss": 0.9747, + "step": 41160 + }, + { + "epoch": 0.59, + "grad_norm": 0.5625, + "learning_rate": 8.591406572584701e-05, + "loss": 0.9683, + "step": 41165 + }, + { + "epoch": 0.59, + "grad_norm": 0.625, + "learning_rate": 8.588927958641286e-05, + "loss": 0.9289, + "step": 41170 + }, + { + "epoch": 0.59, + "grad_norm": 0.5390625, + "learning_rate": 8.586449433145472e-05, + "loss": 0.94, + "step": 41175 + }, + { + "epoch": 0.59, + "grad_norm": 0.546875, + "learning_rate": 8.583970996252618e-05, + "loss": 1.0362, + "step": 41180 + }, + { + "epoch": 0.59, + "grad_norm": 0.5625, + "learning_rate": 8.581492648118077e-05, + "loss": 1.1214, + "step": 41185 + }, + { + "epoch": 0.59, + "grad_norm": 0.5625, + "learning_rate": 8.579014388897188e-05, + "loss": 0.9097, + "step": 41190 + }, + { + "epoch": 0.59, + "grad_norm": 0.640625, + "learning_rate": 8.576536218745299e-05, + "loss": 0.9254, + "step": 41195 + }, + { + "epoch": 0.59, + "grad_norm": 0.5390625, + "learning_rate": 8.57405813781774e-05, + "loss": 1.0014, + "step": 41200 + }, + { + "epoch": 0.59, + "grad_norm": 0.578125, + "learning_rate": 8.57158014626984e-05, + "loss": 1.0573, + "step": 41205 + }, + { + "epoch": 0.59, + "grad_norm": 0.66796875, + "learning_rate": 8.569102244256929e-05, + "loss": 0.91, + "step": 41210 + }, + { + "epoch": 0.59, + "grad_norm": 0.6484375, + "learning_rate": 8.566624431934312e-05, + "loss": 0.9182, + "step": 41215 + }, + { + "epoch": 0.59, + "grad_norm": 0.5078125, + "learning_rate": 8.564146709457312e-05, + "loss": 0.9758, + "step": 41220 + }, + { + "epoch": 0.59, + "grad_norm": 0.65234375, + "learning_rate": 8.56166907698123e-05, + "loss": 0.7203, + "step": 41225 + }, + { + "epoch": 0.59, + "grad_norm": 0.52734375, + "learning_rate": 8.559191534661367e-05, + "loss": 0.8752, + "step": 41230 + }, + { + "epoch": 0.59, + "grad_norm": 0.61328125, + "learning_rate": 8.556714082653021e-05, + "loss": 1.0124, + "step": 41235 + }, + { + "epoch": 0.59, + "grad_norm": 0.52734375, + "learning_rate": 8.55423672111148e-05, + "loss": 0.9686, + "step": 41240 + }, + { + "epoch": 0.59, + "grad_norm": 0.55078125, + "learning_rate": 8.551759450192026e-05, + "loss": 1.0235, + "step": 41245 + }, + { + "epoch": 0.59, + "grad_norm": 0.6171875, + "learning_rate": 8.549282270049941e-05, + "loss": 1.0041, + "step": 41250 + }, + { + "epoch": 0.59, + "grad_norm": 0.59375, + "learning_rate": 8.546805180840497e-05, + "loss": 0.9878, + "step": 41255 + }, + { + "epoch": 0.59, + "grad_norm": 0.66796875, + "learning_rate": 8.54432818271896e-05, + "loss": 0.8734, + "step": 41260 + }, + { + "epoch": 0.59, + "grad_norm": 0.50390625, + "learning_rate": 8.541851275840589e-05, + "loss": 0.9486, + "step": 41265 + }, + { + "epoch": 0.59, + "grad_norm": 0.50390625, + "learning_rate": 8.539374460360638e-05, + "loss": 0.7125, + "step": 41270 + }, + { + "epoch": 0.59, + "grad_norm": 0.58984375, + "learning_rate": 8.536897736434362e-05, + "loss": 0.9791, + "step": 41275 + }, + { + "epoch": 0.59, + "grad_norm": 0.72265625, + "learning_rate": 8.534421104217001e-05, + "loss": 1.0311, + "step": 41280 + }, + { + "epoch": 0.59, + "grad_norm": 0.52734375, + "learning_rate": 8.531944563863794e-05, + "loss": 0.9974, + "step": 41285 + }, + { + "epoch": 0.59, + "grad_norm": 0.56640625, + "learning_rate": 8.529468115529975e-05, + "loss": 0.9587, + "step": 41290 + }, + { + "epoch": 0.59, + "grad_norm": 0.59765625, + "learning_rate": 8.52699175937077e-05, + "loss": 0.9041, + "step": 41295 + }, + { + "epoch": 0.59, + "grad_norm": 0.57421875, + "learning_rate": 8.524515495541395e-05, + "loss": 0.8812, + "step": 41300 + }, + { + "epoch": 0.59, + "grad_norm": 0.57421875, + "learning_rate": 8.522039324197073e-05, + "loss": 0.9394, + "step": 41305 + }, + { + "epoch": 0.59, + "grad_norm": 0.53125, + "learning_rate": 8.51956324549301e-05, + "loss": 0.9483, + "step": 41310 + }, + { + "epoch": 0.59, + "grad_norm": 0.5078125, + "learning_rate": 8.517087259584409e-05, + "loss": 0.8897, + "step": 41315 + }, + { + "epoch": 0.59, + "grad_norm": 0.57421875, + "learning_rate": 8.514611366626466e-05, + "loss": 0.9513, + "step": 41320 + }, + { + "epoch": 0.59, + "grad_norm": 0.578125, + "learning_rate": 8.512135566774372e-05, + "loss": 0.9343, + "step": 41325 + }, + { + "epoch": 0.59, + "grad_norm": 0.55078125, + "learning_rate": 8.50965986018332e-05, + "loss": 0.8736, + "step": 41330 + }, + { + "epoch": 0.59, + "grad_norm": 0.61328125, + "learning_rate": 8.507184247008482e-05, + "loss": 1.0342, + "step": 41335 + }, + { + "epoch": 0.59, + "grad_norm": 0.51171875, + "learning_rate": 8.504708727405035e-05, + "loss": 0.7705, + "step": 41340 + }, + { + "epoch": 0.59, + "grad_norm": 0.57421875, + "learning_rate": 8.502233301528149e-05, + "loss": 1.098, + "step": 41345 + }, + { + "epoch": 0.59, + "grad_norm": 0.6171875, + "learning_rate": 8.499757969532986e-05, + "loss": 0.855, + "step": 41350 + }, + { + "epoch": 0.59, + "grad_norm": 0.5625, + "learning_rate": 8.497282731574701e-05, + "loss": 0.9717, + "step": 41355 + }, + { + "epoch": 0.59, + "grad_norm": 0.515625, + "learning_rate": 8.494807587808447e-05, + "loss": 0.9026, + "step": 41360 + }, + { + "epoch": 0.59, + "grad_norm": 0.5234375, + "learning_rate": 8.49233253838937e-05, + "loss": 0.8608, + "step": 41365 + }, + { + "epoch": 0.59, + "grad_norm": 0.58203125, + "learning_rate": 8.489857583472604e-05, + "loss": 0.8898, + "step": 41370 + }, + { + "epoch": 0.59, + "grad_norm": 0.578125, + "learning_rate": 8.487382723213284e-05, + "loss": 1.023, + "step": 41375 + }, + { + "epoch": 0.59, + "grad_norm": 0.57421875, + "learning_rate": 8.484907957766537e-05, + "loss": 0.8763, + "step": 41380 + }, + { + "epoch": 0.59, + "grad_norm": 0.53125, + "learning_rate": 8.482433287287485e-05, + "loss": 0.9427, + "step": 41385 + }, + { + "epoch": 0.59, + "grad_norm": 0.54296875, + "learning_rate": 8.479958711931245e-05, + "loss": 0.8177, + "step": 41390 + }, + { + "epoch": 0.59, + "grad_norm": 0.58203125, + "learning_rate": 8.477484231852921e-05, + "loss": 0.9392, + "step": 41395 + }, + { + "epoch": 0.59, + "grad_norm": 0.609375, + "learning_rate": 8.475009847207622e-05, + "loss": 0.895, + "step": 41400 + }, + { + "epoch": 0.59, + "grad_norm": 0.55859375, + "learning_rate": 8.472535558150441e-05, + "loss": 0.9946, + "step": 41405 + }, + { + "epoch": 0.59, + "grad_norm": 0.546875, + "learning_rate": 8.470061364836472e-05, + "loss": 0.9943, + "step": 41410 + }, + { + "epoch": 0.59, + "grad_norm": 0.6015625, + "learning_rate": 8.467587267420797e-05, + "loss": 0.9164, + "step": 41415 + }, + { + "epoch": 0.59, + "grad_norm": 0.56640625, + "learning_rate": 8.465113266058503e-05, + "loss": 0.9155, + "step": 41420 + }, + { + "epoch": 0.59, + "grad_norm": 0.55859375, + "learning_rate": 8.462639360904653e-05, + "loss": 0.9499, + "step": 41425 + }, + { + "epoch": 0.59, + "grad_norm": 0.62890625, + "learning_rate": 8.460165552114321e-05, + "loss": 0.8514, + "step": 41430 + }, + { + "epoch": 0.59, + "grad_norm": 0.451171875, + "learning_rate": 8.457691839842563e-05, + "loss": 0.8172, + "step": 41435 + }, + { + "epoch": 0.59, + "grad_norm": 0.5859375, + "learning_rate": 8.455218224244439e-05, + "loss": 0.9282, + "step": 41440 + }, + { + "epoch": 0.59, + "grad_norm": 0.64453125, + "learning_rate": 8.452744705474995e-05, + "loss": 0.9488, + "step": 41445 + }, + { + "epoch": 0.59, + "grad_norm": 0.55859375, + "learning_rate": 8.450271283689274e-05, + "loss": 0.9748, + "step": 41450 + }, + { + "epoch": 0.59, + "grad_norm": 0.5234375, + "learning_rate": 8.447797959042317e-05, + "loss": 0.8702, + "step": 41455 + }, + { + "epoch": 0.59, + "grad_norm": 0.54296875, + "learning_rate": 8.44532473168915e-05, + "loss": 0.9403, + "step": 41460 + }, + { + "epoch": 0.59, + "grad_norm": 0.56640625, + "learning_rate": 8.442851601784798e-05, + "loss": 0.8392, + "step": 41465 + }, + { + "epoch": 0.59, + "grad_norm": 0.59375, + "learning_rate": 8.440378569484286e-05, + "loss": 0.9365, + "step": 41470 + }, + { + "epoch": 0.59, + "grad_norm": 0.55859375, + "learning_rate": 8.437905634942618e-05, + "loss": 0.9095, + "step": 41475 + }, + { + "epoch": 0.6, + "grad_norm": 0.60546875, + "learning_rate": 8.435432798314805e-05, + "loss": 1.0542, + "step": 41480 + }, + { + "epoch": 0.6, + "grad_norm": 0.5625, + "learning_rate": 8.432960059755842e-05, + "loss": 0.9465, + "step": 41485 + }, + { + "epoch": 0.6, + "grad_norm": 0.515625, + "learning_rate": 8.430487419420732e-05, + "loss": 0.9138, + "step": 41490 + }, + { + "epoch": 0.6, + "grad_norm": 0.546875, + "learning_rate": 8.428014877464457e-05, + "loss": 0.9988, + "step": 41495 + }, + { + "epoch": 0.6, + "grad_norm": 0.58984375, + "learning_rate": 8.425542434041999e-05, + "loss": 0.983, + "step": 41500 + }, + { + "epoch": 0.6, + "grad_norm": 0.79296875, + "learning_rate": 8.423070089308333e-05, + "loss": 0.9907, + "step": 41505 + }, + { + "epoch": 0.6, + "grad_norm": 0.54296875, + "learning_rate": 8.420597843418432e-05, + "loss": 0.8086, + "step": 41510 + }, + { + "epoch": 0.6, + "grad_norm": 0.55859375, + "learning_rate": 8.418125696527256e-05, + "loss": 1.0055, + "step": 41515 + }, + { + "epoch": 0.6, + "grad_norm": 0.515625, + "learning_rate": 8.415653648789762e-05, + "loss": 0.8668, + "step": 41520 + }, + { + "epoch": 0.6, + "grad_norm": 0.76171875, + "learning_rate": 8.413181700360905e-05, + "loss": 1.0207, + "step": 41525 + }, + { + "epoch": 0.6, + "grad_norm": 0.5625, + "learning_rate": 8.410709851395626e-05, + "loss": 0.8962, + "step": 41530 + }, + { + "epoch": 0.6, + "grad_norm": 0.58203125, + "learning_rate": 8.408238102048861e-05, + "loss": 0.8318, + "step": 41535 + }, + { + "epoch": 0.6, + "grad_norm": 0.466796875, + "learning_rate": 8.405766452475544e-05, + "loss": 0.8854, + "step": 41540 + }, + { + "epoch": 0.6, + "grad_norm": 0.64453125, + "learning_rate": 8.403294902830603e-05, + "loss": 0.9743, + "step": 41545 + }, + { + "epoch": 0.6, + "grad_norm": 0.57421875, + "learning_rate": 8.400823453268956e-05, + "loss": 0.8932, + "step": 41550 + }, + { + "epoch": 0.6, + "grad_norm": 0.62109375, + "learning_rate": 8.398352103945515e-05, + "loss": 0.9847, + "step": 41555 + }, + { + "epoch": 0.6, + "grad_norm": 0.5390625, + "learning_rate": 8.39588085501519e-05, + "loss": 0.8523, + "step": 41560 + }, + { + "epoch": 0.6, + "grad_norm": 0.51953125, + "learning_rate": 8.39340970663288e-05, + "loss": 0.9778, + "step": 41565 + }, + { + "epoch": 0.6, + "grad_norm": 0.60546875, + "learning_rate": 8.390938658953478e-05, + "loss": 0.9594, + "step": 41570 + }, + { + "epoch": 0.6, + "grad_norm": 0.486328125, + "learning_rate": 8.388467712131877e-05, + "loss": 0.8426, + "step": 41575 + }, + { + "epoch": 0.6, + "grad_norm": 0.5859375, + "learning_rate": 8.38599686632295e-05, + "loss": 1.0202, + "step": 41580 + }, + { + "epoch": 0.6, + "grad_norm": 0.55859375, + "learning_rate": 8.383526121681581e-05, + "loss": 1.0363, + "step": 41585 + }, + { + "epoch": 0.6, + "grad_norm": 0.486328125, + "learning_rate": 8.381055478362635e-05, + "loss": 0.9128, + "step": 41590 + }, + { + "epoch": 0.6, + "grad_norm": 0.5546875, + "learning_rate": 8.378584936520973e-05, + "loss": 0.9713, + "step": 41595 + }, + { + "epoch": 0.6, + "grad_norm": 0.5703125, + "learning_rate": 8.376114496311456e-05, + "loss": 0.9081, + "step": 41600 + }, + { + "epoch": 0.6, + "grad_norm": 0.5390625, + "learning_rate": 8.373644157888933e-05, + "loss": 0.993, + "step": 41605 + }, + { + "epoch": 0.6, + "grad_norm": 0.60546875, + "learning_rate": 8.371173921408243e-05, + "loss": 0.9647, + "step": 41610 + }, + { + "epoch": 0.6, + "grad_norm": 0.546875, + "learning_rate": 8.368703787024229e-05, + "loss": 1.16, + "step": 41615 + }, + { + "epoch": 0.6, + "grad_norm": 0.578125, + "learning_rate": 8.36623375489172e-05, + "loss": 0.8755, + "step": 41620 + }, + { + "epoch": 0.6, + "grad_norm": 0.5859375, + "learning_rate": 8.363763825165538e-05, + "loss": 1.1381, + "step": 41625 + }, + { + "epoch": 0.6, + "grad_norm": 0.48046875, + "learning_rate": 8.361293998000509e-05, + "loss": 0.9973, + "step": 41630 + }, + { + "epoch": 0.6, + "grad_norm": 0.51953125, + "learning_rate": 8.35882427355143e-05, + "loss": 0.9467, + "step": 41635 + }, + { + "epoch": 0.6, + "grad_norm": 0.58203125, + "learning_rate": 8.35635465197312e-05, + "loss": 0.8742, + "step": 41640 + }, + { + "epoch": 0.6, + "grad_norm": 0.5078125, + "learning_rate": 8.353885133420372e-05, + "loss": 0.9813, + "step": 41645 + }, + { + "epoch": 0.6, + "grad_norm": 0.546875, + "learning_rate": 8.351415718047976e-05, + "loss": 1.0443, + "step": 41650 + }, + { + "epoch": 0.6, + "grad_norm": 0.62109375, + "learning_rate": 8.348946406010721e-05, + "loss": 0.9129, + "step": 41655 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.346477197463387e-05, + "loss": 0.938, + "step": 41660 + }, + { + "epoch": 0.6, + "grad_norm": 0.5390625, + "learning_rate": 8.344008092560743e-05, + "loss": 0.9234, + "step": 41665 + }, + { + "epoch": 0.6, + "grad_norm": 0.5625, + "learning_rate": 8.341539091457561e-05, + "loss": 1.1227, + "step": 41670 + }, + { + "epoch": 0.6, + "grad_norm": 0.625, + "learning_rate": 8.339070194308597e-05, + "loss": 0.9144, + "step": 41675 + }, + { + "epoch": 0.6, + "grad_norm": 0.6171875, + "learning_rate": 8.336601401268607e-05, + "loss": 0.9558, + "step": 41680 + }, + { + "epoch": 0.6, + "grad_norm": 0.58984375, + "learning_rate": 8.334132712492333e-05, + "loss": 0.8646, + "step": 41685 + }, + { + "epoch": 0.6, + "grad_norm": 0.5703125, + "learning_rate": 8.331664128134515e-05, + "loss": 1.0444, + "step": 41690 + }, + { + "epoch": 0.6, + "grad_norm": 0.50390625, + "learning_rate": 8.329195648349892e-05, + "loss": 0.9609, + "step": 41695 + }, + { + "epoch": 0.6, + "grad_norm": 0.5078125, + "learning_rate": 8.32672727329319e-05, + "loss": 1.0872, + "step": 41700 + }, + { + "epoch": 0.6, + "grad_norm": 0.57421875, + "learning_rate": 8.324259003119126e-05, + "loss": 1.0415, + "step": 41705 + }, + { + "epoch": 0.6, + "grad_norm": 0.65625, + "learning_rate": 8.321790837982417e-05, + "loss": 0.9616, + "step": 41710 + }, + { + "epoch": 0.6, + "grad_norm": 0.58984375, + "learning_rate": 8.31932277803777e-05, + "loss": 0.7862, + "step": 41715 + }, + { + "epoch": 0.6, + "grad_norm": 0.6796875, + "learning_rate": 8.316854823439884e-05, + "loss": 0.9196, + "step": 41720 + }, + { + "epoch": 0.6, + "grad_norm": 0.5546875, + "learning_rate": 8.314386974343455e-05, + "loss": 0.8995, + "step": 41725 + }, + { + "epoch": 0.6, + "grad_norm": 0.5625, + "learning_rate": 8.311919230903172e-05, + "loss": 0.9577, + "step": 41730 + }, + { + "epoch": 0.6, + "grad_norm": 0.462890625, + "learning_rate": 8.309451593273717e-05, + "loss": 0.8472, + "step": 41735 + }, + { + "epoch": 0.6, + "grad_norm": 0.52734375, + "learning_rate": 8.306984061609758e-05, + "loss": 0.9422, + "step": 41740 + }, + { + "epoch": 0.6, + "grad_norm": 0.5546875, + "learning_rate": 8.304516636065964e-05, + "loss": 1.1044, + "step": 41745 + }, + { + "epoch": 0.6, + "grad_norm": 0.61328125, + "learning_rate": 8.302049316796999e-05, + "loss": 0.8926, + "step": 41750 + }, + { + "epoch": 0.6, + "grad_norm": 0.486328125, + "learning_rate": 8.29958210395752e-05, + "loss": 0.8397, + "step": 41755 + }, + { + "epoch": 0.6, + "grad_norm": 0.51171875, + "learning_rate": 8.297114997702166e-05, + "loss": 0.9482, + "step": 41760 + }, + { + "epoch": 0.6, + "grad_norm": 0.51953125, + "learning_rate": 8.294647998185587e-05, + "loss": 0.8748, + "step": 41765 + }, + { + "epoch": 0.6, + "grad_norm": 0.53125, + "learning_rate": 8.292181105562414e-05, + "loss": 0.9345, + "step": 41770 + }, + { + "epoch": 0.6, + "grad_norm": 0.83984375, + "learning_rate": 8.289714319987271e-05, + "loss": 1.0754, + "step": 41775 + }, + { + "epoch": 0.6, + "grad_norm": 0.5546875, + "learning_rate": 8.287247641614785e-05, + "loss": 0.9052, + "step": 41780 + }, + { + "epoch": 0.6, + "grad_norm": 0.5625, + "learning_rate": 8.28478107059957e-05, + "loss": 0.9907, + "step": 41785 + }, + { + "epoch": 0.6, + "grad_norm": 0.58984375, + "learning_rate": 8.282314607096228e-05, + "loss": 1.0787, + "step": 41790 + }, + { + "epoch": 0.6, + "grad_norm": 0.515625, + "learning_rate": 8.279848251259364e-05, + "loss": 0.9882, + "step": 41795 + }, + { + "epoch": 0.6, + "grad_norm": 0.5390625, + "learning_rate": 8.277382003243567e-05, + "loss": 0.8283, + "step": 41800 + }, + { + "epoch": 0.6, + "grad_norm": 0.68359375, + "learning_rate": 8.274915863203432e-05, + "loss": 0.8574, + "step": 41805 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.272449831293534e-05, + "loss": 0.902, + "step": 41810 + }, + { + "epoch": 0.6, + "grad_norm": 0.609375, + "learning_rate": 8.269983907668446e-05, + "loss": 0.965, + "step": 41815 + }, + { + "epoch": 0.6, + "grad_norm": 0.59765625, + "learning_rate": 8.26751809248274e-05, + "loss": 1.014, + "step": 41820 + }, + { + "epoch": 0.6, + "grad_norm": 0.83203125, + "learning_rate": 8.265052385890972e-05, + "loss": 0.947, + "step": 41825 + }, + { + "epoch": 0.6, + "grad_norm": 0.6640625, + "learning_rate": 8.262586788047696e-05, + "loss": 1.0511, + "step": 41830 + }, + { + "epoch": 0.6, + "grad_norm": 0.546875, + "learning_rate": 8.26012129910746e-05, + "loss": 0.8874, + "step": 41835 + }, + { + "epoch": 0.6, + "grad_norm": 0.51953125, + "learning_rate": 8.257655919224804e-05, + "loss": 0.8345, + "step": 41840 + }, + { + "epoch": 0.6, + "grad_norm": 0.578125, + "learning_rate": 8.255190648554257e-05, + "loss": 0.8518, + "step": 41845 + }, + { + "epoch": 0.6, + "grad_norm": 0.55859375, + "learning_rate": 8.252725487250349e-05, + "loss": 1.0714, + "step": 41850 + }, + { + "epoch": 0.6, + "grad_norm": 0.57421875, + "learning_rate": 8.250260435467594e-05, + "loss": 0.9246, + "step": 41855 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.247795493360511e-05, + "loss": 0.978, + "step": 41860 + }, + { + "epoch": 0.6, + "grad_norm": 0.59375, + "learning_rate": 8.2453306610836e-05, + "loss": 0.8857, + "step": 41865 + }, + { + "epoch": 0.6, + "grad_norm": 0.50390625, + "learning_rate": 8.24286593879136e-05, + "loss": 0.9305, + "step": 41870 + }, + { + "epoch": 0.6, + "grad_norm": 0.515625, + "learning_rate": 8.240401326638287e-05, + "loss": 0.8613, + "step": 41875 + }, + { + "epoch": 0.6, + "grad_norm": 0.498046875, + "learning_rate": 8.237936824778863e-05, + "loss": 1.0857, + "step": 41880 + }, + { + "epoch": 0.6, + "grad_norm": 0.51171875, + "learning_rate": 8.235472433367563e-05, + "loss": 0.9642, + "step": 41885 + }, + { + "epoch": 0.6, + "grad_norm": 0.59765625, + "learning_rate": 8.233008152558868e-05, + "loss": 0.8736, + "step": 41890 + }, + { + "epoch": 0.6, + "grad_norm": 0.51953125, + "learning_rate": 8.230543982507227e-05, + "loss": 0.9844, + "step": 41895 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.228079923367107e-05, + "loss": 0.9585, + "step": 41900 + }, + { + "epoch": 0.6, + "grad_norm": 0.5390625, + "learning_rate": 8.225615975292955e-05, + "loss": 0.9325, + "step": 41905 + }, + { + "epoch": 0.6, + "grad_norm": 0.5625, + "learning_rate": 8.223152138439213e-05, + "loss": 0.8639, + "step": 41910 + }, + { + "epoch": 0.6, + "grad_norm": 0.61328125, + "learning_rate": 8.220688412960321e-05, + "loss": 0.987, + "step": 41915 + }, + { + "epoch": 0.6, + "grad_norm": 0.498046875, + "learning_rate": 8.218224799010704e-05, + "loss": 0.8088, + "step": 41920 + }, + { + "epoch": 0.6, + "grad_norm": 0.546875, + "learning_rate": 8.215761296744786e-05, + "loss": 1.0435, + "step": 41925 + }, + { + "epoch": 0.6, + "grad_norm": 0.64453125, + "learning_rate": 8.213297906316983e-05, + "loss": 0.8773, + "step": 41930 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.210834627881702e-05, + "loss": 0.9031, + "step": 41935 + }, + { + "epoch": 0.6, + "grad_norm": 0.5859375, + "learning_rate": 8.208371461593341e-05, + "loss": 0.8389, + "step": 41940 + }, + { + "epoch": 0.6, + "grad_norm": 0.54296875, + "learning_rate": 8.205908407606306e-05, + "loss": 0.9602, + "step": 41945 + }, + { + "epoch": 0.6, + "grad_norm": 0.59765625, + "learning_rate": 8.203445466074968e-05, + "loss": 0.9552, + "step": 41950 + }, + { + "epoch": 0.6, + "grad_norm": 0.546875, + "learning_rate": 8.200982637153715e-05, + "loss": 0.9129, + "step": 41955 + }, + { + "epoch": 0.6, + "grad_norm": 0.5703125, + "learning_rate": 8.198519920996921e-05, + "loss": 0.8813, + "step": 41960 + }, + { + "epoch": 0.6, + "grad_norm": 0.625, + "learning_rate": 8.196057317758948e-05, + "loss": 1.0221, + "step": 41965 + }, + { + "epoch": 0.6, + "grad_norm": 0.53125, + "learning_rate": 8.193594827594158e-05, + "loss": 0.9442, + "step": 41970 + }, + { + "epoch": 0.6, + "grad_norm": 0.5, + "learning_rate": 8.191132450656902e-05, + "loss": 0.9139, + "step": 41975 + }, + { + "epoch": 0.6, + "grad_norm": 0.51953125, + "learning_rate": 8.188670187101521e-05, + "loss": 1.1236, + "step": 41980 + }, + { + "epoch": 0.6, + "grad_norm": 0.5078125, + "learning_rate": 8.186208037082359e-05, + "loss": 0.8923, + "step": 41985 + }, + { + "epoch": 0.6, + "grad_norm": 0.578125, + "learning_rate": 8.183746000753743e-05, + "loss": 1.0116, + "step": 41990 + }, + { + "epoch": 0.6, + "grad_norm": 0.56640625, + "learning_rate": 8.181284078269997e-05, + "loss": 0.8802, + "step": 41995 + }, + { + "epoch": 0.6, + "grad_norm": 0.52734375, + "learning_rate": 8.178822269785435e-05, + "loss": 1.0867, + "step": 42000 + }, + { + "epoch": 0.6, + "grad_norm": 0.484375, + "learning_rate": 8.176360575454366e-05, + "loss": 0.9187, + "step": 42005 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.173898995431095e-05, + "loss": 1.1618, + "step": 42010 + }, + { + "epoch": 0.6, + "grad_norm": 0.578125, + "learning_rate": 8.171437529869912e-05, + "loss": 0.993, + "step": 42015 + }, + { + "epoch": 0.6, + "grad_norm": 0.6796875, + "learning_rate": 8.168976178925109e-05, + "loss": 1.0539, + "step": 42020 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.166514942750964e-05, + "loss": 0.9547, + "step": 42025 + }, + { + "epoch": 0.6, + "grad_norm": 0.45703125, + "learning_rate": 8.164053821501751e-05, + "loss": 0.7925, + "step": 42030 + }, + { + "epoch": 0.6, + "grad_norm": 0.53515625, + "learning_rate": 8.161592815331734e-05, + "loss": 0.8804, + "step": 42035 + }, + { + "epoch": 0.6, + "grad_norm": 0.65625, + "learning_rate": 8.159131924395176e-05, + "loss": 1.0156, + "step": 42040 + }, + { + "epoch": 0.6, + "grad_norm": 0.5703125, + "learning_rate": 8.156671148846325e-05, + "loss": 0.914, + "step": 42045 + }, + { + "epoch": 0.6, + "grad_norm": 0.5078125, + "learning_rate": 8.154210488839426e-05, + "loss": 0.9318, + "step": 42050 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.151749944528714e-05, + "loss": 1.0271, + "step": 42055 + }, + { + "epoch": 0.6, + "grad_norm": 0.59765625, + "learning_rate": 8.14928951606842e-05, + "loss": 1.0947, + "step": 42060 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.146829203612769e-05, + "loss": 0.7624, + "step": 42065 + }, + { + "epoch": 0.6, + "grad_norm": 0.59765625, + "learning_rate": 8.144369007315973e-05, + "loss": 0.7011, + "step": 42070 + }, + { + "epoch": 0.6, + "grad_norm": 0.546875, + "learning_rate": 8.141908927332239e-05, + "loss": 1.0747, + "step": 42075 + }, + { + "epoch": 0.6, + "grad_norm": 0.55078125, + "learning_rate": 8.139448963815772e-05, + "loss": 0.968, + "step": 42080 + }, + { + "epoch": 0.6, + "grad_norm": 0.78515625, + "learning_rate": 8.136989116920763e-05, + "loss": 1.0052, + "step": 42085 + }, + { + "epoch": 0.6, + "grad_norm": 0.5546875, + "learning_rate": 8.134529386801396e-05, + "loss": 1.0768, + "step": 42090 + }, + { + "epoch": 0.6, + "grad_norm": 0.65625, + "learning_rate": 8.132069773611854e-05, + "loss": 1.1377, + "step": 42095 + }, + { + "epoch": 0.6, + "grad_norm": 0.484375, + "learning_rate": 8.129610277506308e-05, + "loss": 0.9914, + "step": 42100 + }, + { + "epoch": 0.6, + "grad_norm": 0.66015625, + "learning_rate": 8.127150898638918e-05, + "loss": 1.1432, + "step": 42105 + }, + { + "epoch": 0.6, + "grad_norm": 0.6015625, + "learning_rate": 8.124691637163842e-05, + "loss": 0.873, + "step": 42110 + }, + { + "epoch": 0.6, + "grad_norm": 0.52734375, + "learning_rate": 8.122232493235228e-05, + "loss": 1.0254, + "step": 42115 + }, + { + "epoch": 0.6, + "grad_norm": 0.5234375, + "learning_rate": 8.119773467007222e-05, + "loss": 1.0591, + "step": 42120 + }, + { + "epoch": 0.6, + "grad_norm": 0.515625, + "learning_rate": 8.117314558633956e-05, + "loss": 0.8329, + "step": 42125 + }, + { + "epoch": 0.6, + "grad_norm": 0.5390625, + "learning_rate": 8.114855768269556e-05, + "loss": 0.981, + "step": 42130 + }, + { + "epoch": 0.6, + "grad_norm": 0.60546875, + "learning_rate": 8.112397096068146e-05, + "loss": 0.9654, + "step": 42135 + }, + { + "epoch": 0.6, + "grad_norm": 0.578125, + "learning_rate": 8.109938542183836e-05, + "loss": 0.9152, + "step": 42140 + }, + { + "epoch": 0.6, + "grad_norm": 0.609375, + "learning_rate": 8.107480106770729e-05, + "loss": 1.0351, + "step": 42145 + }, + { + "epoch": 0.6, + "grad_norm": 0.609375, + "learning_rate": 8.105021789982925e-05, + "loss": 1.0923, + "step": 42150 + }, + { + "epoch": 0.6, + "grad_norm": 0.52734375, + "learning_rate": 8.102563591974516e-05, + "loss": 0.9168, + "step": 42155 + }, + { + "epoch": 0.6, + "grad_norm": 0.5703125, + "learning_rate": 8.100105512899579e-05, + "loss": 0.9979, + "step": 42160 + }, + { + "epoch": 0.6, + "grad_norm": 0.578125, + "learning_rate": 8.097647552912192e-05, + "loss": 0.9022, + "step": 42165 + }, + { + "epoch": 0.6, + "grad_norm": 0.60546875, + "learning_rate": 8.095189712166425e-05, + "loss": 1.0565, + "step": 42170 + }, + { + "epoch": 0.6, + "grad_norm": 0.51171875, + "learning_rate": 8.092731990816335e-05, + "loss": 0.8553, + "step": 42175 + }, + { + "epoch": 0.61, + "grad_norm": 0.484375, + "learning_rate": 8.090274389015978e-05, + "loss": 0.9753, + "step": 42180 + }, + { + "epoch": 0.61, + "grad_norm": 0.55078125, + "learning_rate": 8.087816906919391e-05, + "loss": 0.9659, + "step": 42185 + }, + { + "epoch": 0.61, + "grad_norm": 0.5234375, + "learning_rate": 8.085359544680623e-05, + "loss": 0.9577, + "step": 42190 + }, + { + "epoch": 0.61, + "grad_norm": 0.490234375, + "learning_rate": 8.082902302453701e-05, + "loss": 0.9535, + "step": 42195 + }, + { + "epoch": 0.61, + "grad_norm": 0.51171875, + "learning_rate": 8.080445180392642e-05, + "loss": 1.0207, + "step": 42200 + }, + { + "epoch": 0.61, + "grad_norm": 0.59765625, + "learning_rate": 8.07798817865147e-05, + "loss": 0.9898, + "step": 42205 + }, + { + "epoch": 0.61, + "grad_norm": 0.640625, + "learning_rate": 8.075531297384185e-05, + "loss": 0.9406, + "step": 42210 + }, + { + "epoch": 0.61, + "grad_norm": 0.54296875, + "learning_rate": 8.07307453674479e-05, + "loss": 0.9575, + "step": 42215 + }, + { + "epoch": 0.61, + "grad_norm": 0.546875, + "learning_rate": 8.070617896887277e-05, + "loss": 0.9358, + "step": 42220 + }, + { + "epoch": 0.61, + "grad_norm": 0.5859375, + "learning_rate": 8.068161377965633e-05, + "loss": 0.9085, + "step": 42225 + }, + { + "epoch": 0.61, + "grad_norm": 0.5703125, + "learning_rate": 8.065704980133833e-05, + "loss": 0.8574, + "step": 42230 + }, + { + "epoch": 0.61, + "grad_norm": 0.578125, + "learning_rate": 8.063248703545847e-05, + "loss": 1.0217, + "step": 42235 + }, + { + "epoch": 0.61, + "grad_norm": 0.515625, + "learning_rate": 8.06079254835564e-05, + "loss": 0.8214, + "step": 42240 + }, + { + "epoch": 0.61, + "grad_norm": 0.5390625, + "learning_rate": 8.058336514717164e-05, + "loss": 0.9639, + "step": 42245 + }, + { + "epoch": 0.61, + "grad_norm": 0.52734375, + "learning_rate": 8.055880602784367e-05, + "loss": 0.9374, + "step": 42250 + }, + { + "epoch": 0.61, + "grad_norm": 0.56640625, + "learning_rate": 8.053424812711186e-05, + "loss": 1.2145, + "step": 42255 + }, + { + "epoch": 0.61, + "grad_norm": 0.61328125, + "learning_rate": 8.050969144651559e-05, + "loss": 0.9234, + "step": 42260 + }, + { + "epoch": 0.61, + "grad_norm": 0.640625, + "learning_rate": 8.048513598759403e-05, + "loss": 0.9074, + "step": 42265 + }, + { + "epoch": 0.61, + "grad_norm": 0.6015625, + "learning_rate": 8.046058175188638e-05, + "loss": 1.0246, + "step": 42270 + }, + { + "epoch": 0.61, + "grad_norm": 0.57421875, + "learning_rate": 8.043602874093169e-05, + "loss": 0.8926, + "step": 42275 + }, + { + "epoch": 0.61, + "grad_norm": 0.57421875, + "learning_rate": 8.041147695626902e-05, + "loss": 0.9339, + "step": 42280 + }, + { + "epoch": 0.61, + "grad_norm": 0.49609375, + "learning_rate": 8.03869263994373e-05, + "loss": 0.9105, + "step": 42285 + }, + { + "epoch": 0.61, + "grad_norm": 0.53515625, + "learning_rate": 8.036237707197534e-05, + "loss": 0.8557, + "step": 42290 + }, + { + "epoch": 0.61, + "grad_norm": 0.578125, + "learning_rate": 8.033782897542197e-05, + "loss": 0.9911, + "step": 42295 + }, + { + "epoch": 0.61, + "grad_norm": 0.5, + "learning_rate": 8.031328211131586e-05, + "loss": 0.8827, + "step": 42300 + }, + { + "epoch": 0.61, + "grad_norm": 0.53515625, + "learning_rate": 8.028873648119565e-05, + "loss": 0.8477, + "step": 42305 + }, + { + "epoch": 0.61, + "grad_norm": 0.58984375, + "learning_rate": 8.026419208659994e-05, + "loss": 0.8995, + "step": 42310 + }, + { + "epoch": 0.61, + "grad_norm": 0.6171875, + "learning_rate": 8.023964892906709e-05, + "loss": 0.9163, + "step": 42315 + }, + { + "epoch": 0.61, + "grad_norm": 0.546875, + "learning_rate": 8.021510701013557e-05, + "loss": 1.0234, + "step": 42320 + }, + { + "epoch": 0.61, + "grad_norm": 0.58984375, + "learning_rate": 8.019056633134367e-05, + "loss": 0.9321, + "step": 42325 + }, + { + "epoch": 0.61, + "grad_norm": 0.55859375, + "learning_rate": 8.01660268942296e-05, + "loss": 0.9225, + "step": 42330 + }, + { + "epoch": 0.61, + "grad_norm": 0.71484375, + "learning_rate": 8.01414887003316e-05, + "loss": 0.9481, + "step": 42335 + }, + { + "epoch": 0.61, + "grad_norm": 0.56640625, + "learning_rate": 8.01169517511877e-05, + "loss": 0.9872, + "step": 42340 + }, + { + "epoch": 0.61, + "grad_norm": 0.578125, + "learning_rate": 8.009241604833588e-05, + "loss": 0.9599, + "step": 42345 + }, + { + "epoch": 0.61, + "grad_norm": 0.53515625, + "learning_rate": 8.006788159331412e-05, + "loss": 0.9175, + "step": 42350 + }, + { + "epoch": 0.61, + "grad_norm": 0.5078125, + "learning_rate": 8.004334838766024e-05, + "loss": 0.9903, + "step": 42355 + }, + { + "epoch": 0.61, + "grad_norm": 0.5234375, + "learning_rate": 8.001881643291198e-05, + "loss": 0.9758, + "step": 42360 + }, + { + "epoch": 0.61, + "grad_norm": 0.486328125, + "learning_rate": 7.999428573060715e-05, + "loss": 1.0384, + "step": 42365 + }, + { + "epoch": 0.61, + "grad_norm": 0.5859375, + "learning_rate": 7.99697562822832e-05, + "loss": 0.9855, + "step": 42370 + }, + { + "epoch": 0.61, + "grad_norm": 0.58203125, + "learning_rate": 7.994522808947775e-05, + "loss": 0.9536, + "step": 42375 + }, + { + "epoch": 0.61, + "grad_norm": 0.55078125, + "learning_rate": 7.992070115372825e-05, + "loss": 1.2132, + "step": 42380 + }, + { + "epoch": 0.61, + "grad_norm": 0.59765625, + "learning_rate": 7.989617547657206e-05, + "loss": 0.9809, + "step": 42385 + }, + { + "epoch": 0.61, + "grad_norm": 0.56640625, + "learning_rate": 7.98716510595465e-05, + "loss": 0.9123, + "step": 42390 + }, + { + "epoch": 0.61, + "grad_norm": 0.5234375, + "learning_rate": 7.984712790418878e-05, + "loss": 0.9471, + "step": 42395 + }, + { + "epoch": 0.61, + "grad_norm": 0.412109375, + "learning_rate": 7.9822606012036e-05, + "loss": 0.8363, + "step": 42400 + }, + { + "epoch": 0.61, + "grad_norm": 0.65234375, + "learning_rate": 7.979808538462528e-05, + "loss": 0.961, + "step": 42405 + }, + { + "epoch": 0.61, + "grad_norm": 0.515625, + "learning_rate": 7.977356602349359e-05, + "loss": 0.8457, + "step": 42410 + }, + { + "epoch": 0.61, + "grad_norm": 0.59765625, + "learning_rate": 7.974904793017784e-05, + "loss": 1.0536, + "step": 42415 + }, + { + "epoch": 0.61, + "grad_norm": 0.58984375, + "learning_rate": 7.972453110621479e-05, + "loss": 0.9591, + "step": 42420 + }, + { + "epoch": 0.61, + "grad_norm": 0.50390625, + "learning_rate": 7.970001555314121e-05, + "loss": 0.8937, + "step": 42425 + }, + { + "epoch": 0.61, + "grad_norm": 0.54296875, + "learning_rate": 7.967550127249381e-05, + "loss": 0.8458, + "step": 42430 + }, + { + "epoch": 0.61, + "grad_norm": 0.5703125, + "learning_rate": 7.965098826580912e-05, + "loss": 0.906, + "step": 42435 + }, + { + "epoch": 0.61, + "grad_norm": 0.6328125, + "learning_rate": 7.962647653462364e-05, + "loss": 1.0945, + "step": 42440 + }, + { + "epoch": 0.61, + "grad_norm": 0.455078125, + "learning_rate": 7.960196608047385e-05, + "loss": 0.9566, + "step": 42445 + }, + { + "epoch": 0.61, + "grad_norm": 0.498046875, + "learning_rate": 7.957745690489604e-05, + "loss": 0.898, + "step": 42450 + }, + { + "epoch": 0.61, + "grad_norm": 0.53515625, + "learning_rate": 7.955294900942646e-05, + "loss": 0.8699, + "step": 42455 + }, + { + "epoch": 0.61, + "grad_norm": 0.5390625, + "learning_rate": 7.952844239560138e-05, + "loss": 0.9584, + "step": 42460 + }, + { + "epoch": 0.61, + "grad_norm": 0.58203125, + "learning_rate": 7.950393706495683e-05, + "loss": 0.9459, + "step": 42465 + }, + { + "epoch": 0.61, + "grad_norm": 0.58203125, + "learning_rate": 7.947943301902885e-05, + "loss": 0.9533, + "step": 42470 + }, + { + "epoch": 0.61, + "grad_norm": 0.53125, + "learning_rate": 7.945493025935338e-05, + "loss": 0.93, + "step": 42475 + }, + { + "epoch": 0.61, + "grad_norm": 0.515625, + "learning_rate": 7.943042878746626e-05, + "loss": 0.8268, + "step": 42480 + }, + { + "epoch": 0.61, + "grad_norm": 0.5390625, + "learning_rate": 7.94059286049033e-05, + "loss": 0.8896, + "step": 42485 + }, + { + "epoch": 0.61, + "grad_norm": 0.54296875, + "learning_rate": 7.93814297132002e-05, + "loss": 1.0771, + "step": 42490 + }, + { + "epoch": 0.61, + "grad_norm": 0.53125, + "learning_rate": 7.935693211389254e-05, + "loss": 0.8995, + "step": 42495 + }, + { + "epoch": 0.61, + "grad_norm": 0.62890625, + "learning_rate": 7.933243580851591e-05, + "loss": 0.9314, + "step": 42500 + }, + { + "epoch": 0.61, + "grad_norm": 0.55078125, + "learning_rate": 7.930794079860575e-05, + "loss": 0.8779, + "step": 42505 + }, + { + "epoch": 0.61, + "grad_norm": 0.625, + "learning_rate": 7.92834470856974e-05, + "loss": 1.006, + "step": 42510 + }, + { + "epoch": 0.61, + "grad_norm": 0.55859375, + "learning_rate": 7.925895467132618e-05, + "loss": 0.8862, + "step": 42515 + }, + { + "epoch": 0.61, + "grad_norm": 0.5859375, + "learning_rate": 7.923446355702735e-05, + "loss": 0.9694, + "step": 42520 + }, + { + "epoch": 0.61, + "grad_norm": 0.55859375, + "learning_rate": 7.920997374433596e-05, + "loss": 0.8995, + "step": 42525 + }, + { + "epoch": 0.61, + "grad_norm": 0.494140625, + "learning_rate": 7.91854852347871e-05, + "loss": 0.8734, + "step": 42530 + }, + { + "epoch": 0.61, + "grad_norm": 0.58984375, + "learning_rate": 7.916099802991572e-05, + "loss": 0.9556, + "step": 42535 + }, + { + "epoch": 0.61, + "grad_norm": 0.6015625, + "learning_rate": 7.913651213125672e-05, + "loss": 0.9399, + "step": 42540 + }, + { + "epoch": 0.61, + "grad_norm": 0.90625, + "learning_rate": 7.911202754034491e-05, + "loss": 0.9976, + "step": 42545 + }, + { + "epoch": 0.61, + "grad_norm": 0.53125, + "learning_rate": 7.908754425871498e-05, + "loss": 0.9188, + "step": 42550 + }, + { + "epoch": 0.61, + "grad_norm": 0.5390625, + "learning_rate": 7.906306228790161e-05, + "loss": 0.8477, + "step": 42555 + }, + { + "epoch": 0.61, + "grad_norm": 0.60546875, + "learning_rate": 7.903858162943935e-05, + "loss": 1.0648, + "step": 42560 + }, + { + "epoch": 0.61, + "grad_norm": 0.5390625, + "learning_rate": 7.901410228486264e-05, + "loss": 0.9438, + "step": 42565 + }, + { + "epoch": 0.61, + "grad_norm": 0.59375, + "learning_rate": 7.898962425570592e-05, + "loss": 0.9425, + "step": 42570 + }, + { + "epoch": 0.61, + "grad_norm": 0.53515625, + "learning_rate": 7.89651475435035e-05, + "loss": 0.8806, + "step": 42575 + }, + { + "epoch": 0.61, + "grad_norm": 0.498046875, + "learning_rate": 7.894067214978959e-05, + "loss": 0.9518, + "step": 42580 + }, + { + "epoch": 0.61, + "grad_norm": 0.60546875, + "learning_rate": 7.891619807609832e-05, + "loss": 0.9417, + "step": 42585 + }, + { + "epoch": 0.61, + "grad_norm": 0.61328125, + "learning_rate": 7.889172532396373e-05, + "loss": 0.8483, + "step": 42590 + }, + { + "epoch": 0.61, + "grad_norm": 0.5390625, + "learning_rate": 7.886725389491987e-05, + "loss": 0.8405, + "step": 42595 + }, + { + "epoch": 0.61, + "grad_norm": 0.55859375, + "learning_rate": 7.88427837905006e-05, + "loss": 0.9486, + "step": 42600 + }, + { + "epoch": 0.61, + "grad_norm": 0.58203125, + "learning_rate": 7.881831501223973e-05, + "loss": 1.03, + "step": 42605 + }, + { + "epoch": 0.61, + "grad_norm": 0.5078125, + "learning_rate": 7.879384756167101e-05, + "loss": 0.9571, + "step": 42610 + }, + { + "epoch": 0.61, + "grad_norm": 0.515625, + "learning_rate": 7.876938144032807e-05, + "loss": 1.0589, + "step": 42615 + }, + { + "epoch": 0.61, + "grad_norm": 0.51953125, + "learning_rate": 7.874491664974448e-05, + "loss": 0.9998, + "step": 42620 + }, + { + "epoch": 0.61, + "grad_norm": 0.6015625, + "learning_rate": 7.872045319145376e-05, + "loss": 1.0316, + "step": 42625 + }, + { + "epoch": 0.61, + "grad_norm": 0.5625, + "learning_rate": 7.869599106698922e-05, + "loss": 0.9804, + "step": 42630 + }, + { + "epoch": 0.61, + "grad_norm": 0.55078125, + "learning_rate": 7.867153027788424e-05, + "loss": 0.8879, + "step": 42635 + }, + { + "epoch": 0.61, + "grad_norm": 0.6640625, + "learning_rate": 7.864707082567204e-05, + "loss": 1.0132, + "step": 42640 + }, + { + "epoch": 0.61, + "grad_norm": 0.5859375, + "learning_rate": 7.862261271188574e-05, + "loss": 1.0564, + "step": 42645 + }, + { + "epoch": 0.61, + "grad_norm": 0.5703125, + "learning_rate": 7.859815593805844e-05, + "loss": 1.0339, + "step": 42650 + }, + { + "epoch": 0.61, + "grad_norm": 0.58203125, + "learning_rate": 7.85737005057231e-05, + "loss": 1.0806, + "step": 42655 + }, + { + "epoch": 0.61, + "grad_norm": 0.53515625, + "learning_rate": 7.854924641641258e-05, + "loss": 0.9391, + "step": 42660 + }, + { + "epoch": 0.61, + "grad_norm": 0.498046875, + "learning_rate": 7.852479367165976e-05, + "loss": 0.8207, + "step": 42665 + }, + { + "epoch": 0.61, + "grad_norm": 0.58984375, + "learning_rate": 7.850034227299734e-05, + "loss": 0.9667, + "step": 42670 + }, + { + "epoch": 0.61, + "grad_norm": 0.57421875, + "learning_rate": 7.84758922219579e-05, + "loss": 0.9414, + "step": 42675 + }, + { + "epoch": 0.61, + "grad_norm": 0.4921875, + "learning_rate": 7.845144352007414e-05, + "loss": 0.9857, + "step": 42680 + }, + { + "epoch": 0.61, + "grad_norm": 0.5625, + "learning_rate": 7.842699616887837e-05, + "loss": 0.956, + "step": 42685 + }, + { + "epoch": 0.61, + "grad_norm": 0.55859375, + "learning_rate": 7.840255016990308e-05, + "loss": 0.8266, + "step": 42690 + }, + { + "epoch": 0.61, + "grad_norm": 0.56640625, + "learning_rate": 7.837810552468053e-05, + "loss": 0.866, + "step": 42695 + }, + { + "epoch": 0.61, + "grad_norm": 0.5390625, + "learning_rate": 7.835366223474293e-05, + "loss": 0.9463, + "step": 42700 + }, + { + "epoch": 0.61, + "grad_norm": 0.5390625, + "learning_rate": 7.832922030162246e-05, + "loss": 0.8329, + "step": 42705 + }, + { + "epoch": 0.61, + "grad_norm": 0.60546875, + "learning_rate": 7.830477972685115e-05, + "loss": 0.9353, + "step": 42710 + }, + { + "epoch": 0.61, + "grad_norm": 0.62109375, + "learning_rate": 7.828034051196093e-05, + "loss": 0.9475, + "step": 42715 + }, + { + "epoch": 0.61, + "grad_norm": 0.58203125, + "learning_rate": 7.825590265848371e-05, + "loss": 0.9154, + "step": 42720 + }, + { + "epoch": 0.61, + "grad_norm": 0.56640625, + "learning_rate": 7.823146616795129e-05, + "loss": 1.0003, + "step": 42725 + }, + { + "epoch": 0.61, + "grad_norm": 0.5703125, + "learning_rate": 7.820703104189538e-05, + "loss": 0.8323, + "step": 42730 + }, + { + "epoch": 0.61, + "grad_norm": 0.515625, + "learning_rate": 7.818259728184757e-05, + "loss": 0.8252, + "step": 42735 + }, + { + "epoch": 0.61, + "grad_norm": 0.7109375, + "learning_rate": 7.815816488933938e-05, + "loss": 1.0588, + "step": 42740 + }, + { + "epoch": 0.61, + "grad_norm": 0.65234375, + "learning_rate": 7.813373386590232e-05, + "loss": 0.917, + "step": 42745 + }, + { + "epoch": 0.61, + "grad_norm": 0.57421875, + "learning_rate": 7.810930421306772e-05, + "loss": 0.9622, + "step": 42750 + }, + { + "epoch": 0.61, + "grad_norm": 0.486328125, + "learning_rate": 7.808487593236683e-05, + "loss": 0.8535, + "step": 42755 + }, + { + "epoch": 0.61, + "grad_norm": 0.671875, + "learning_rate": 7.806044902533092e-05, + "loss": 1.0698, + "step": 42760 + }, + { + "epoch": 0.61, + "grad_norm": 0.62109375, + "learning_rate": 7.803602349349104e-05, + "loss": 0.9576, + "step": 42765 + }, + { + "epoch": 0.61, + "grad_norm": 0.578125, + "learning_rate": 7.801159933837821e-05, + "loss": 1.0837, + "step": 42770 + }, + { + "epoch": 0.61, + "grad_norm": 0.63671875, + "learning_rate": 7.798717656152339e-05, + "loss": 0.8904, + "step": 42775 + }, + { + "epoch": 0.61, + "grad_norm": 0.52734375, + "learning_rate": 7.796275516445741e-05, + "loss": 0.9422, + "step": 42780 + }, + { + "epoch": 0.61, + "grad_norm": 0.458984375, + "learning_rate": 7.793833514871106e-05, + "loss": 0.9295, + "step": 42785 + }, + { + "epoch": 0.61, + "grad_norm": 0.51171875, + "learning_rate": 7.791391651581497e-05, + "loss": 0.9939, + "step": 42790 + }, + { + "epoch": 0.61, + "grad_norm": 0.53515625, + "learning_rate": 7.788949926729972e-05, + "loss": 0.9126, + "step": 42795 + }, + { + "epoch": 0.61, + "grad_norm": 0.56640625, + "learning_rate": 7.786508340469586e-05, + "loss": 0.8445, + "step": 42800 + }, + { + "epoch": 0.61, + "grad_norm": 0.546875, + "learning_rate": 7.78406689295338e-05, + "loss": 0.9759, + "step": 42805 + }, + { + "epoch": 0.61, + "grad_norm": 0.462890625, + "learning_rate": 7.78162558433438e-05, + "loss": 1.0567, + "step": 42810 + }, + { + "epoch": 0.61, + "grad_norm": 0.5390625, + "learning_rate": 7.779184414765618e-05, + "loss": 0.862, + "step": 42815 + }, + { + "epoch": 0.61, + "grad_norm": 0.53125, + "learning_rate": 7.776743384400106e-05, + "loss": 1.0773, + "step": 42820 + }, + { + "epoch": 0.61, + "grad_norm": 0.51953125, + "learning_rate": 7.77430249339085e-05, + "loss": 0.9642, + "step": 42825 + }, + { + "epoch": 0.61, + "grad_norm": 0.578125, + "learning_rate": 7.771861741890848e-05, + "loss": 0.8702, + "step": 42830 + }, + { + "epoch": 0.61, + "grad_norm": 0.52734375, + "learning_rate": 7.769421130053094e-05, + "loss": 0.8947, + "step": 42835 + }, + { + "epoch": 0.61, + "grad_norm": 0.51953125, + "learning_rate": 7.766980658030562e-05, + "loss": 0.9315, + "step": 42840 + }, + { + "epoch": 0.61, + "grad_norm": 0.53125, + "learning_rate": 7.764540325976225e-05, + "loss": 0.9786, + "step": 42845 + }, + { + "epoch": 0.61, + "grad_norm": 0.56640625, + "learning_rate": 7.762100134043043e-05, + "loss": 0.9207, + "step": 42850 + }, + { + "epoch": 0.61, + "grad_norm": 0.515625, + "learning_rate": 7.759660082383977e-05, + "loss": 1.0167, + "step": 42855 + }, + { + "epoch": 0.61, + "grad_norm": 0.546875, + "learning_rate": 7.757220171151967e-05, + "loss": 0.9736, + "step": 42860 + }, + { + "epoch": 0.61, + "grad_norm": 0.47265625, + "learning_rate": 7.754780400499951e-05, + "loss": 1.0015, + "step": 42865 + }, + { + "epoch": 0.61, + "grad_norm": 0.5, + "learning_rate": 7.752340770580858e-05, + "loss": 0.9226, + "step": 42870 + }, + { + "epoch": 0.62, + "grad_norm": 0.52734375, + "learning_rate": 7.749901281547604e-05, + "loss": 0.9444, + "step": 42875 + }, + { + "epoch": 0.62, + "grad_norm": 0.65234375, + "learning_rate": 7.747461933553099e-05, + "loss": 1.0346, + "step": 42880 + }, + { + "epoch": 0.62, + "grad_norm": 0.462890625, + "learning_rate": 7.745022726750248e-05, + "loss": 0.9532, + "step": 42885 + }, + { + "epoch": 0.62, + "grad_norm": 0.51171875, + "learning_rate": 7.742583661291943e-05, + "loss": 0.8445, + "step": 42890 + }, + { + "epoch": 0.62, + "grad_norm": 0.5546875, + "learning_rate": 7.740144737331062e-05, + "loss": 0.9205, + "step": 42895 + }, + { + "epoch": 0.62, + "grad_norm": 0.48046875, + "learning_rate": 7.737705955020482e-05, + "loss": 0.9847, + "step": 42900 + }, + { + "epoch": 0.62, + "grad_norm": 0.54296875, + "learning_rate": 7.735267314513069e-05, + "loss": 0.9144, + "step": 42905 + }, + { + "epoch": 0.62, + "grad_norm": 0.59375, + "learning_rate": 7.732828815961683e-05, + "loss": 1.0651, + "step": 42910 + }, + { + "epoch": 0.62, + "grad_norm": 0.55078125, + "learning_rate": 7.730390459519167e-05, + "loss": 0.9732, + "step": 42915 + }, + { + "epoch": 0.62, + "grad_norm": 0.5390625, + "learning_rate": 7.727952245338359e-05, + "loss": 0.9412, + "step": 42920 + }, + { + "epoch": 0.62, + "grad_norm": 0.56640625, + "learning_rate": 7.725514173572096e-05, + "loss": 1.1264, + "step": 42925 + }, + { + "epoch": 0.62, + "grad_norm": 0.7109375, + "learning_rate": 7.723076244373195e-05, + "loss": 0.9375, + "step": 42930 + }, + { + "epoch": 0.62, + "grad_norm": 0.55859375, + "learning_rate": 7.720638457894465e-05, + "loss": 0.9551, + "step": 42935 + }, + { + "epoch": 0.62, + "grad_norm": 0.48046875, + "learning_rate": 7.718200814288717e-05, + "loss": 1.0087, + "step": 42940 + }, + { + "epoch": 0.62, + "grad_norm": 0.53125, + "learning_rate": 7.715763313708739e-05, + "loss": 1.0652, + "step": 42945 + }, + { + "epoch": 0.62, + "grad_norm": 0.53125, + "learning_rate": 7.713325956307316e-05, + "loss": 0.963, + "step": 42950 + }, + { + "epoch": 0.62, + "grad_norm": 0.58984375, + "learning_rate": 7.710888742237226e-05, + "loss": 0.8619, + "step": 42955 + }, + { + "epoch": 0.62, + "grad_norm": 0.59765625, + "learning_rate": 7.708451671651239e-05, + "loss": 0.9818, + "step": 42960 + }, + { + "epoch": 0.62, + "grad_norm": 0.56640625, + "learning_rate": 7.70601474470211e-05, + "loss": 0.9524, + "step": 42965 + }, + { + "epoch": 0.62, + "grad_norm": 0.73828125, + "learning_rate": 7.703577961542588e-05, + "loss": 1.0222, + "step": 42970 + }, + { + "epoch": 0.62, + "grad_norm": 0.56640625, + "learning_rate": 7.701141322325418e-05, + "loss": 1.0135, + "step": 42975 + }, + { + "epoch": 0.62, + "grad_norm": 0.51171875, + "learning_rate": 7.698704827203326e-05, + "loss": 0.8863, + "step": 42980 + }, + { + "epoch": 0.62, + "grad_norm": 0.5703125, + "learning_rate": 7.696268476329038e-05, + "loss": 0.9803, + "step": 42985 + }, + { + "epoch": 0.62, + "grad_norm": 0.61328125, + "learning_rate": 7.693832269855264e-05, + "loss": 1.1359, + "step": 42990 + }, + { + "epoch": 0.62, + "grad_norm": 0.53125, + "learning_rate": 7.691396207934711e-05, + "loss": 0.9429, + "step": 42995 + }, + { + "epoch": 0.62, + "grad_norm": 0.55078125, + "learning_rate": 7.688960290720073e-05, + "loss": 0.996, + "step": 43000 + }, + { + "epoch": 0.62, + "grad_norm": 0.62890625, + "learning_rate": 7.686524518364036e-05, + "loss": 0.9694, + "step": 43005 + }, + { + "epoch": 0.62, + "grad_norm": 0.828125, + "learning_rate": 7.684088891019276e-05, + "loss": 1.0616, + "step": 43010 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.681653408838462e-05, + "loss": 0.8888, + "step": 43015 + }, + { + "epoch": 0.62, + "grad_norm": 0.55078125, + "learning_rate": 7.679218071974254e-05, + "loss": 1.0106, + "step": 43020 + }, + { + "epoch": 0.62, + "grad_norm": 0.609375, + "learning_rate": 7.676782880579298e-05, + "loss": 0.9764, + "step": 43025 + }, + { + "epoch": 0.62, + "grad_norm": 0.5703125, + "learning_rate": 7.674347834806238e-05, + "loss": 0.9293, + "step": 43030 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.671912934807706e-05, + "loss": 1.0437, + "step": 43035 + }, + { + "epoch": 0.62, + "grad_norm": 0.56640625, + "learning_rate": 7.669478180736319e-05, + "loss": 0.8764, + "step": 43040 + }, + { + "epoch": 0.62, + "grad_norm": 0.76953125, + "learning_rate": 7.667043572744701e-05, + "loss": 1.0054, + "step": 43045 + }, + { + "epoch": 0.62, + "grad_norm": 0.5703125, + "learning_rate": 7.664609110985444e-05, + "loss": 0.9659, + "step": 43050 + }, + { + "epoch": 0.62, + "grad_norm": 0.56640625, + "learning_rate": 7.662174795611147e-05, + "loss": 0.8469, + "step": 43055 + }, + { + "epoch": 0.62, + "grad_norm": 0.64453125, + "learning_rate": 7.659740626774399e-05, + "loss": 1.0004, + "step": 43060 + }, + { + "epoch": 0.62, + "grad_norm": 0.482421875, + "learning_rate": 7.65730660462777e-05, + "loss": 0.8755, + "step": 43065 + }, + { + "epoch": 0.62, + "grad_norm": 0.5546875, + "learning_rate": 7.654872729323834e-05, + "loss": 0.9929, + "step": 43070 + }, + { + "epoch": 0.62, + "grad_norm": 0.57421875, + "learning_rate": 7.652439001015145e-05, + "loss": 0.9484, + "step": 43075 + }, + { + "epoch": 0.62, + "grad_norm": 0.5703125, + "learning_rate": 7.650005419854252e-05, + "loss": 1.0401, + "step": 43080 + }, + { + "epoch": 0.62, + "grad_norm": 0.609375, + "learning_rate": 7.647571985993697e-05, + "loss": 1.1313, + "step": 43085 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.645138699586013e-05, + "loss": 0.8608, + "step": 43090 + }, + { + "epoch": 0.62, + "grad_norm": 0.67578125, + "learning_rate": 7.64270556078371e-05, + "loss": 1.0649, + "step": 43095 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.640272569739316e-05, + "loss": 1.0124, + "step": 43100 + }, + { + "epoch": 0.62, + "grad_norm": 0.478515625, + "learning_rate": 7.637839726605318e-05, + "loss": 0.8626, + "step": 43105 + }, + { + "epoch": 0.62, + "grad_norm": 0.5546875, + "learning_rate": 7.635407031534218e-05, + "loss": 0.8512, + "step": 43110 + }, + { + "epoch": 0.62, + "grad_norm": 0.63671875, + "learning_rate": 7.632974484678499e-05, + "loss": 1.1224, + "step": 43115 + }, + { + "epoch": 0.62, + "grad_norm": 0.65234375, + "learning_rate": 7.630542086190633e-05, + "loss": 0.9001, + "step": 43120 + }, + { + "epoch": 0.62, + "grad_norm": 0.419921875, + "learning_rate": 7.62810983622309e-05, + "loss": 0.9177, + "step": 43125 + }, + { + "epoch": 0.62, + "grad_norm": 0.51171875, + "learning_rate": 7.625677734928322e-05, + "loss": 0.932, + "step": 43130 + }, + { + "epoch": 0.62, + "grad_norm": 0.703125, + "learning_rate": 7.623245782458777e-05, + "loss": 1.0339, + "step": 43135 + }, + { + "epoch": 0.62, + "grad_norm": 0.51953125, + "learning_rate": 7.620813978966895e-05, + "loss": 0.9261, + "step": 43140 + }, + { + "epoch": 0.62, + "grad_norm": 0.5546875, + "learning_rate": 7.618382324605104e-05, + "loss": 0.9913, + "step": 43145 + }, + { + "epoch": 0.62, + "grad_norm": 0.6171875, + "learning_rate": 7.615950819525821e-05, + "loss": 0.9563, + "step": 43150 + }, + { + "epoch": 0.62, + "grad_norm": 0.55859375, + "learning_rate": 7.613519463881456e-05, + "loss": 0.8773, + "step": 43155 + }, + { + "epoch": 0.62, + "grad_norm": 0.50390625, + "learning_rate": 7.611088257824405e-05, + "loss": 0.8426, + "step": 43160 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.608657201507066e-05, + "loss": 1.0284, + "step": 43165 + }, + { + "epoch": 0.62, + "grad_norm": 0.5859375, + "learning_rate": 7.606226295081815e-05, + "loss": 0.9378, + "step": 43170 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.603795538701026e-05, + "loss": 0.983, + "step": 43175 + }, + { + "epoch": 0.62, + "grad_norm": 0.61328125, + "learning_rate": 7.601364932517065e-05, + "loss": 0.9846, + "step": 43180 + }, + { + "epoch": 0.62, + "grad_norm": 0.55859375, + "learning_rate": 7.598934476682279e-05, + "loss": 0.8697, + "step": 43185 + }, + { + "epoch": 0.62, + "grad_norm": 0.58984375, + "learning_rate": 7.596504171349013e-05, + "loss": 1.0234, + "step": 43190 + }, + { + "epoch": 0.62, + "grad_norm": 0.625, + "learning_rate": 7.594074016669606e-05, + "loss": 0.9989, + "step": 43195 + }, + { + "epoch": 0.62, + "grad_norm": 0.61328125, + "learning_rate": 7.59164401279638e-05, + "loss": 0.9878, + "step": 43200 + }, + { + "epoch": 0.62, + "grad_norm": 0.56640625, + "learning_rate": 7.589214159881652e-05, + "loss": 0.8622, + "step": 43205 + }, + { + "epoch": 0.62, + "grad_norm": 0.6796875, + "learning_rate": 7.586784458077723e-05, + "loss": 1.1644, + "step": 43210 + }, + { + "epoch": 0.62, + "grad_norm": 0.6328125, + "learning_rate": 7.584354907536892e-05, + "loss": 1.0065, + "step": 43215 + }, + { + "epoch": 0.62, + "grad_norm": 0.61328125, + "learning_rate": 7.581925508411448e-05, + "loss": 1.0772, + "step": 43220 + }, + { + "epoch": 0.62, + "grad_norm": 0.5546875, + "learning_rate": 7.579496260853666e-05, + "loss": 0.8406, + "step": 43225 + }, + { + "epoch": 0.62, + "grad_norm": 0.5390625, + "learning_rate": 7.577067165015815e-05, + "loss": 0.8243, + "step": 43230 + }, + { + "epoch": 0.62, + "grad_norm": 0.5703125, + "learning_rate": 7.574638221050156e-05, + "loss": 0.8884, + "step": 43235 + }, + { + "epoch": 0.62, + "grad_norm": 0.5390625, + "learning_rate": 7.572209429108935e-05, + "loss": 0.9756, + "step": 43240 + }, + { + "epoch": 0.62, + "grad_norm": 0.5390625, + "learning_rate": 7.569780789344388e-05, + "loss": 0.9726, + "step": 43245 + }, + { + "epoch": 0.62, + "grad_norm": 0.57421875, + "learning_rate": 7.567352301908752e-05, + "loss": 0.9453, + "step": 43250 + }, + { + "epoch": 0.62, + "grad_norm": 0.52734375, + "learning_rate": 7.564923966954247e-05, + "loss": 0.8655, + "step": 43255 + }, + { + "epoch": 0.62, + "grad_norm": 0.51953125, + "learning_rate": 7.562495784633078e-05, + "loss": 0.8646, + "step": 43260 + }, + { + "epoch": 0.62, + "grad_norm": 0.57421875, + "learning_rate": 7.56006775509745e-05, + "loss": 1.0515, + "step": 43265 + }, + { + "epoch": 0.62, + "grad_norm": 0.546875, + "learning_rate": 7.557639878499551e-05, + "loss": 0.932, + "step": 43270 + }, + { + "epoch": 0.62, + "grad_norm": 0.59765625, + "learning_rate": 7.555212154991569e-05, + "loss": 0.9745, + "step": 43275 + }, + { + "epoch": 0.62, + "grad_norm": 0.546875, + "learning_rate": 7.552784584725674e-05, + "loss": 1.104, + "step": 43280 + }, + { + "epoch": 0.62, + "grad_norm": 0.5546875, + "learning_rate": 7.550357167854025e-05, + "loss": 1.0603, + "step": 43285 + }, + { + "epoch": 0.62, + "grad_norm": 0.58984375, + "learning_rate": 7.547929904528783e-05, + "loss": 0.8669, + "step": 43290 + }, + { + "epoch": 0.62, + "grad_norm": 0.55859375, + "learning_rate": 7.545502794902084e-05, + "loss": 0.8064, + "step": 43295 + }, + { + "epoch": 0.62, + "grad_norm": 0.6875, + "learning_rate": 7.543075839126065e-05, + "loss": 1.0797, + "step": 43300 + }, + { + "epoch": 0.62, + "grad_norm": 0.578125, + "learning_rate": 7.540649037352853e-05, + "loss": 0.9345, + "step": 43305 + }, + { + "epoch": 0.62, + "grad_norm": 0.58203125, + "learning_rate": 7.538222389734561e-05, + "loss": 0.9949, + "step": 43310 + }, + { + "epoch": 0.62, + "grad_norm": 0.52734375, + "learning_rate": 7.535795896423292e-05, + "loss": 0.9763, + "step": 43315 + }, + { + "epoch": 0.62, + "grad_norm": 0.515625, + "learning_rate": 7.533369557571144e-05, + "loss": 0.9526, + "step": 43320 + }, + { + "epoch": 0.62, + "grad_norm": 0.486328125, + "learning_rate": 7.530943373330197e-05, + "loss": 0.9384, + "step": 43325 + }, + { + "epoch": 0.62, + "grad_norm": 0.66015625, + "learning_rate": 7.528517343852535e-05, + "loss": 1.0181, + "step": 43330 + }, + { + "epoch": 0.62, + "grad_norm": 0.55078125, + "learning_rate": 7.526091469290221e-05, + "loss": 0.8762, + "step": 43335 + }, + { + "epoch": 0.62, + "grad_norm": 0.470703125, + "learning_rate": 7.523665749795308e-05, + "loss": 0.8089, + "step": 43340 + }, + { + "epoch": 0.62, + "grad_norm": 0.51953125, + "learning_rate": 7.521240185519849e-05, + "loss": 0.9557, + "step": 43345 + }, + { + "epoch": 0.62, + "grad_norm": 0.57421875, + "learning_rate": 7.518814776615878e-05, + "loss": 0.8498, + "step": 43350 + }, + { + "epoch": 0.62, + "grad_norm": 0.498046875, + "learning_rate": 7.51638952323542e-05, + "loss": 1.0413, + "step": 43355 + }, + { + "epoch": 0.62, + "grad_norm": 0.53125, + "learning_rate": 7.513964425530502e-05, + "loss": 0.8996, + "step": 43360 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.511539483653119e-05, + "loss": 1.0167, + "step": 43365 + }, + { + "epoch": 0.62, + "grad_norm": 0.478515625, + "learning_rate": 7.509114697755277e-05, + "loss": 0.9066, + "step": 43370 + }, + { + "epoch": 0.62, + "grad_norm": 0.6015625, + "learning_rate": 7.506690067988963e-05, + "loss": 0.9654, + "step": 43375 + }, + { + "epoch": 0.62, + "grad_norm": 0.55078125, + "learning_rate": 7.504265594506152e-05, + "loss": 1.0211, + "step": 43380 + }, + { + "epoch": 0.62, + "grad_norm": 0.5234375, + "learning_rate": 7.50184127745882e-05, + "loss": 0.9962, + "step": 43385 + }, + { + "epoch": 0.62, + "grad_norm": 0.5859375, + "learning_rate": 7.499417116998921e-05, + "loss": 1.0111, + "step": 43390 + }, + { + "epoch": 0.62, + "grad_norm": 0.56640625, + "learning_rate": 7.496993113278403e-05, + "loss": 0.9227, + "step": 43395 + }, + { + "epoch": 0.62, + "grad_norm": 0.609375, + "learning_rate": 7.49456926644921e-05, + "loss": 0.8432, + "step": 43400 + }, + { + "epoch": 0.62, + "grad_norm": 0.5546875, + "learning_rate": 7.492145576663267e-05, + "loss": 0.9982, + "step": 43405 + }, + { + "epoch": 0.62, + "grad_norm": 0.5625, + "learning_rate": 7.489722044072493e-05, + "loss": 0.9431, + "step": 43410 + }, + { + "epoch": 0.62, + "grad_norm": 0.578125, + "learning_rate": 7.487298668828809e-05, + "loss": 1.0562, + "step": 43415 + }, + { + "epoch": 0.62, + "grad_norm": 0.50390625, + "learning_rate": 7.484875451084098e-05, + "loss": 1.0369, + "step": 43420 + }, + { + "epoch": 0.62, + "grad_norm": 0.515625, + "learning_rate": 7.482452390990262e-05, + "loss": 1.0191, + "step": 43425 + }, + { + "epoch": 0.62, + "grad_norm": 0.64453125, + "learning_rate": 7.480029488699177e-05, + "loss": 1.0395, + "step": 43430 + }, + { + "epoch": 0.62, + "grad_norm": 0.62890625, + "learning_rate": 7.47760674436271e-05, + "loss": 0.9695, + "step": 43435 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.475184158132728e-05, + "loss": 0.9514, + "step": 43440 + }, + { + "epoch": 0.62, + "grad_norm": 0.59765625, + "learning_rate": 7.472761730161079e-05, + "loss": 1.0904, + "step": 43445 + }, + { + "epoch": 0.62, + "grad_norm": 0.5078125, + "learning_rate": 7.470339460599601e-05, + "loss": 0.8637, + "step": 43450 + }, + { + "epoch": 0.62, + "grad_norm": 0.58984375, + "learning_rate": 7.467917349600129e-05, + "loss": 0.7954, + "step": 43455 + }, + { + "epoch": 0.62, + "grad_norm": 0.57421875, + "learning_rate": 7.46549539731448e-05, + "loss": 0.9869, + "step": 43460 + }, + { + "epoch": 0.62, + "grad_norm": 0.63671875, + "learning_rate": 7.463073603894469e-05, + "loss": 0.8432, + "step": 43465 + }, + { + "epoch": 0.62, + "grad_norm": 0.55859375, + "learning_rate": 7.460651969491892e-05, + "loss": 0.8657, + "step": 43470 + }, + { + "epoch": 0.62, + "grad_norm": 0.515625, + "learning_rate": 7.45823049425854e-05, + "loss": 0.9686, + "step": 43475 + }, + { + "epoch": 0.62, + "grad_norm": 0.51171875, + "learning_rate": 7.455809178346196e-05, + "loss": 1.0432, + "step": 43480 + }, + { + "epoch": 0.62, + "grad_norm": 0.55859375, + "learning_rate": 7.453388021906631e-05, + "loss": 0.7601, + "step": 43485 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.450967025091604e-05, + "loss": 0.9924, + "step": 43490 + }, + { + "epoch": 0.62, + "grad_norm": 0.5234375, + "learning_rate": 7.448546188052867e-05, + "loss": 0.9843, + "step": 43495 + }, + { + "epoch": 0.62, + "grad_norm": 0.458984375, + "learning_rate": 7.446125510942162e-05, + "loss": 0.9573, + "step": 43500 + }, + { + "epoch": 0.62, + "grad_norm": 0.53515625, + "learning_rate": 7.443704993911216e-05, + "loss": 0.9863, + "step": 43505 + }, + { + "epoch": 0.62, + "grad_norm": 0.5546875, + "learning_rate": 7.441284637111755e-05, + "loss": 0.9481, + "step": 43510 + }, + { + "epoch": 0.62, + "grad_norm": 0.5703125, + "learning_rate": 7.438864440695487e-05, + "loss": 0.9117, + "step": 43515 + }, + { + "epoch": 0.62, + "grad_norm": 0.55859375, + "learning_rate": 7.436444404814115e-05, + "loss": 0.9144, + "step": 43520 + }, + { + "epoch": 0.62, + "grad_norm": 0.52734375, + "learning_rate": 7.434024529619325e-05, + "loss": 0.9627, + "step": 43525 + }, + { + "epoch": 0.62, + "grad_norm": 0.55078125, + "learning_rate": 7.431604815262799e-05, + "loss": 1.0474, + "step": 43530 + }, + { + "epoch": 0.62, + "grad_norm": 0.470703125, + "learning_rate": 7.429185261896207e-05, + "loss": 1.0196, + "step": 43535 + }, + { + "epoch": 0.62, + "grad_norm": 0.515625, + "learning_rate": 7.426765869671214e-05, + "loss": 0.8849, + "step": 43540 + }, + { + "epoch": 0.62, + "grad_norm": 0.5078125, + "learning_rate": 7.424346638739463e-05, + "loss": 0.9278, + "step": 43545 + }, + { + "epoch": 0.62, + "grad_norm": 0.5546875, + "learning_rate": 7.421927569252601e-05, + "loss": 0.9472, + "step": 43550 + }, + { + "epoch": 0.62, + "grad_norm": 0.494140625, + "learning_rate": 7.419508661362255e-05, + "loss": 0.9385, + "step": 43555 + }, + { + "epoch": 0.62, + "grad_norm": 0.59375, + "learning_rate": 7.417089915220044e-05, + "loss": 0.8907, + "step": 43560 + }, + { + "epoch": 0.62, + "grad_norm": 0.6171875, + "learning_rate": 7.41467133097758e-05, + "loss": 1.055, + "step": 43565 + }, + { + "epoch": 0.62, + "grad_norm": 0.53125, + "learning_rate": 7.412252908786463e-05, + "loss": 0.9292, + "step": 43570 + }, + { + "epoch": 0.63, + "grad_norm": 0.61328125, + "learning_rate": 7.409834648798279e-05, + "loss": 1.0102, + "step": 43575 + }, + { + "epoch": 0.63, + "grad_norm": 0.55859375, + "learning_rate": 7.407416551164608e-05, + "loss": 0.9795, + "step": 43580 + }, + { + "epoch": 0.63, + "grad_norm": 0.478515625, + "learning_rate": 7.404998616037022e-05, + "loss": 0.8821, + "step": 43585 + }, + { + "epoch": 0.63, + "grad_norm": 0.5703125, + "learning_rate": 7.402580843567078e-05, + "loss": 0.8465, + "step": 43590 + }, + { + "epoch": 0.63, + "grad_norm": 0.5703125, + "learning_rate": 7.400163233906324e-05, + "loss": 1.0351, + "step": 43595 + }, + { + "epoch": 0.63, + "grad_norm": 0.578125, + "learning_rate": 7.397745787206298e-05, + "loss": 0.9298, + "step": 43600 + }, + { + "epoch": 0.63, + "grad_norm": 0.55078125, + "learning_rate": 7.395328503618533e-05, + "loss": 0.982, + "step": 43605 + }, + { + "epoch": 0.63, + "grad_norm": 0.55078125, + "learning_rate": 7.392911383294543e-05, + "loss": 0.9462, + "step": 43610 + }, + { + "epoch": 0.63, + "grad_norm": 0.53125, + "learning_rate": 7.390494426385835e-05, + "loss": 1.0016, + "step": 43615 + }, + { + "epoch": 0.63, + "grad_norm": 0.5234375, + "learning_rate": 7.388077633043908e-05, + "loss": 0.8682, + "step": 43620 + }, + { + "epoch": 0.63, + "grad_norm": 0.578125, + "learning_rate": 7.385661003420255e-05, + "loss": 0.9146, + "step": 43625 + }, + { + "epoch": 0.63, + "grad_norm": 0.5546875, + "learning_rate": 7.383244537666345e-05, + "loss": 0.896, + "step": 43630 + }, + { + "epoch": 0.63, + "grad_norm": 0.57421875, + "learning_rate": 7.380828235933644e-05, + "loss": 1.0661, + "step": 43635 + }, + { + "epoch": 0.63, + "grad_norm": 0.640625, + "learning_rate": 7.378412098373616e-05, + "loss": 0.909, + "step": 43640 + }, + { + "epoch": 0.63, + "grad_norm": 0.58984375, + "learning_rate": 7.375996125137704e-05, + "loss": 1.0145, + "step": 43645 + }, + { + "epoch": 0.63, + "grad_norm": 0.59765625, + "learning_rate": 7.373580316377337e-05, + "loss": 0.8261, + "step": 43650 + }, + { + "epoch": 0.63, + "grad_norm": 0.57421875, + "learning_rate": 7.371164672243953e-05, + "loss": 0.9722, + "step": 43655 + }, + { + "epoch": 0.63, + "grad_norm": 0.62890625, + "learning_rate": 7.36874919288896e-05, + "loss": 0.856, + "step": 43660 + }, + { + "epoch": 0.63, + "grad_norm": 0.578125, + "learning_rate": 7.366333878463765e-05, + "loss": 1.0439, + "step": 43665 + }, + { + "epoch": 0.63, + "grad_norm": 0.52734375, + "learning_rate": 7.36391872911976e-05, + "loss": 1.074, + "step": 43670 + }, + { + "epoch": 0.63, + "grad_norm": 0.53125, + "learning_rate": 7.361503745008335e-05, + "loss": 0.8275, + "step": 43675 + }, + { + "epoch": 0.63, + "grad_norm": 0.61328125, + "learning_rate": 7.359088926280858e-05, + "loss": 0.9063, + "step": 43680 + }, + { + "epoch": 0.63, + "grad_norm": 0.54296875, + "learning_rate": 7.356674273088695e-05, + "loss": 0.9664, + "step": 43685 + }, + { + "epoch": 0.63, + "grad_norm": 0.56640625, + "learning_rate": 7.354259785583197e-05, + "loss": 0.9558, + "step": 43690 + }, + { + "epoch": 0.63, + "grad_norm": 0.54296875, + "learning_rate": 7.351845463915711e-05, + "loss": 0.9554, + "step": 43695 + }, + { + "epoch": 0.63, + "grad_norm": 0.5703125, + "learning_rate": 7.349431308237568e-05, + "loss": 1.1306, + "step": 43700 + }, + { + "epoch": 0.63, + "grad_norm": 0.55859375, + "learning_rate": 7.347017318700087e-05, + "loss": 0.8899, + "step": 43705 + }, + { + "epoch": 0.63, + "grad_norm": 0.6015625, + "learning_rate": 7.344603495454582e-05, + "loss": 0.9215, + "step": 43710 + }, + { + "epoch": 0.63, + "grad_norm": 0.5625, + "learning_rate": 7.342189838652357e-05, + "loss": 0.9706, + "step": 43715 + }, + { + "epoch": 0.63, + "grad_norm": 0.5859375, + "learning_rate": 7.339776348444696e-05, + "loss": 0.8483, + "step": 43720 + }, + { + "epoch": 0.63, + "grad_norm": 0.51953125, + "learning_rate": 7.337363024982886e-05, + "loss": 0.8448, + "step": 43725 + }, + { + "epoch": 0.63, + "grad_norm": 0.578125, + "learning_rate": 7.334949868418197e-05, + "loss": 0.8894, + "step": 43730 + }, + { + "epoch": 0.63, + "grad_norm": 0.5703125, + "learning_rate": 7.332536878901882e-05, + "loss": 0.9876, + "step": 43735 + }, + { + "epoch": 0.63, + "grad_norm": 0.62109375, + "learning_rate": 7.330124056585194e-05, + "loss": 0.983, + "step": 43740 + }, + { + "epoch": 0.63, + "grad_norm": 0.62890625, + "learning_rate": 7.327711401619369e-05, + "loss": 1.0151, + "step": 43745 + }, + { + "epoch": 0.63, + "grad_norm": 0.515625, + "learning_rate": 7.325298914155638e-05, + "loss": 1.0221, + "step": 43750 + }, + { + "epoch": 0.63, + "grad_norm": 0.55078125, + "learning_rate": 7.322886594345218e-05, + "loss": 0.854, + "step": 43755 + }, + { + "epoch": 0.63, + "grad_norm": 0.53125, + "learning_rate": 7.320474442339314e-05, + "loss": 0.8749, + "step": 43760 + }, + { + "epoch": 0.63, + "grad_norm": 0.490234375, + "learning_rate": 7.318062458289124e-05, + "loss": 0.8302, + "step": 43765 + }, + { + "epoch": 0.63, + "grad_norm": 0.66796875, + "learning_rate": 7.315650642345835e-05, + "loss": 1.0153, + "step": 43770 + }, + { + "epoch": 0.63, + "grad_norm": 0.5390625, + "learning_rate": 7.313238994660618e-05, + "loss": 1.0044, + "step": 43775 + }, + { + "epoch": 0.63, + "grad_norm": 0.50390625, + "learning_rate": 7.310827515384648e-05, + "loss": 0.9339, + "step": 43780 + }, + { + "epoch": 0.63, + "grad_norm": 0.5625, + "learning_rate": 7.308416204669063e-05, + "loss": 0.8673, + "step": 43785 + }, + { + "epoch": 0.63, + "grad_norm": 0.546875, + "learning_rate": 7.30600506266502e-05, + "loss": 0.9973, + "step": 43790 + }, + { + "epoch": 0.63, + "grad_norm": 0.50390625, + "learning_rate": 7.303594089523649e-05, + "loss": 0.818, + "step": 43795 + }, + { + "epoch": 0.63, + "grad_norm": 0.50390625, + "learning_rate": 7.301183285396068e-05, + "loss": 0.8573, + "step": 43800 + }, + { + "epoch": 0.63, + "grad_norm": 0.51171875, + "learning_rate": 7.298772650433394e-05, + "loss": 0.9827, + "step": 43805 + }, + { + "epoch": 0.63, + "grad_norm": 0.50390625, + "learning_rate": 7.296362184786728e-05, + "loss": 0.9497, + "step": 43810 + }, + { + "epoch": 0.63, + "grad_norm": 0.5234375, + "learning_rate": 7.293951888607156e-05, + "loss": 1.0426, + "step": 43815 + }, + { + "epoch": 0.63, + "grad_norm": 0.49609375, + "learning_rate": 7.291541762045765e-05, + "loss": 0.9381, + "step": 43820 + }, + { + "epoch": 0.63, + "grad_norm": 0.52734375, + "learning_rate": 7.28913180525362e-05, + "loss": 0.8794, + "step": 43825 + }, + { + "epoch": 0.63, + "grad_norm": 0.5390625, + "learning_rate": 7.28672201838178e-05, + "loss": 0.9819, + "step": 43830 + }, + { + "epoch": 0.63, + "grad_norm": 0.51953125, + "learning_rate": 7.2843124015813e-05, + "loss": 1.1377, + "step": 43835 + }, + { + "epoch": 0.63, + "grad_norm": 0.52734375, + "learning_rate": 7.281902955003204e-05, + "loss": 1.0289, + "step": 43840 + }, + { + "epoch": 0.63, + "grad_norm": 0.50390625, + "learning_rate": 7.279493678798529e-05, + "loss": 0.9459, + "step": 43845 + }, + { + "epoch": 0.63, + "grad_norm": 0.6640625, + "learning_rate": 7.277084573118289e-05, + "loss": 0.9653, + "step": 43850 + }, + { + "epoch": 0.63, + "grad_norm": 0.53515625, + "learning_rate": 7.274675638113486e-05, + "loss": 0.8437, + "step": 43855 + }, + { + "epoch": 0.63, + "grad_norm": 0.51171875, + "learning_rate": 7.272266873935122e-05, + "loss": 0.974, + "step": 43860 + }, + { + "epoch": 0.63, + "grad_norm": 0.55859375, + "learning_rate": 7.269858280734176e-05, + "loss": 1.1957, + "step": 43865 + }, + { + "epoch": 0.63, + "grad_norm": 0.59375, + "learning_rate": 7.26744985866162e-05, + "loss": 0.9526, + "step": 43870 + }, + { + "epoch": 0.63, + "grad_norm": 0.5078125, + "learning_rate": 7.265041607868422e-05, + "loss": 0.8111, + "step": 43875 + }, + { + "epoch": 0.63, + "grad_norm": 0.59765625, + "learning_rate": 7.262633528505529e-05, + "loss": 0.8836, + "step": 43880 + }, + { + "epoch": 0.63, + "grad_norm": 0.578125, + "learning_rate": 7.260225620723888e-05, + "loss": 0.8641, + "step": 43885 + }, + { + "epoch": 0.63, + "grad_norm": 0.5546875, + "learning_rate": 7.257817884674421e-05, + "loss": 0.82, + "step": 43890 + }, + { + "epoch": 0.63, + "grad_norm": 0.5546875, + "learning_rate": 7.255410320508052e-05, + "loss": 1.0119, + "step": 43895 + }, + { + "epoch": 0.63, + "grad_norm": 0.51953125, + "learning_rate": 7.253002928375692e-05, + "loss": 0.8987, + "step": 43900 + }, + { + "epoch": 0.63, + "grad_norm": 0.53515625, + "learning_rate": 7.250595708428236e-05, + "loss": 1.0118, + "step": 43905 + }, + { + "epoch": 0.63, + "grad_norm": 0.59375, + "learning_rate": 7.248188660816571e-05, + "loss": 1.0069, + "step": 43910 + }, + { + "epoch": 0.63, + "grad_norm": 0.5234375, + "learning_rate": 7.245781785691576e-05, + "loss": 0.9783, + "step": 43915 + }, + { + "epoch": 0.63, + "grad_norm": 0.62109375, + "learning_rate": 7.243375083204116e-05, + "loss": 0.9433, + "step": 43920 + }, + { + "epoch": 0.63, + "grad_norm": 0.5078125, + "learning_rate": 7.240968553505043e-05, + "loss": 0.9561, + "step": 43925 + }, + { + "epoch": 0.63, + "grad_norm": 0.55859375, + "learning_rate": 7.238562196745206e-05, + "loss": 0.9336, + "step": 43930 + }, + { + "epoch": 0.63, + "grad_norm": 0.57421875, + "learning_rate": 7.236156013075435e-05, + "loss": 1.0196, + "step": 43935 + }, + { + "epoch": 0.63, + "grad_norm": 0.53515625, + "learning_rate": 7.233750002646555e-05, + "loss": 0.9104, + "step": 43940 + }, + { + "epoch": 0.63, + "grad_norm": 0.5390625, + "learning_rate": 7.231344165609375e-05, + "loss": 0.9293, + "step": 43945 + }, + { + "epoch": 0.63, + "grad_norm": 0.53515625, + "learning_rate": 7.228938502114692e-05, + "loss": 0.9552, + "step": 43950 + }, + { + "epoch": 0.63, + "grad_norm": 0.5859375, + "learning_rate": 7.226533012313301e-05, + "loss": 0.8964, + "step": 43955 + }, + { + "epoch": 0.63, + "grad_norm": 0.57421875, + "learning_rate": 7.224127696355981e-05, + "loss": 0.889, + "step": 43960 + }, + { + "epoch": 0.63, + "grad_norm": 0.546875, + "learning_rate": 7.221722554393496e-05, + "loss": 1.1623, + "step": 43965 + }, + { + "epoch": 0.63, + "grad_norm": 0.55859375, + "learning_rate": 7.219317586576609e-05, + "loss": 0.8779, + "step": 43970 + }, + { + "epoch": 0.63, + "grad_norm": 0.578125, + "learning_rate": 7.21691279305606e-05, + "loss": 0.8894, + "step": 43975 + }, + { + "epoch": 0.63, + "grad_norm": 0.52734375, + "learning_rate": 7.214508173982585e-05, + "loss": 1.0329, + "step": 43980 + }, + { + "epoch": 0.63, + "grad_norm": 0.609375, + "learning_rate": 7.212103729506914e-05, + "loss": 0.8058, + "step": 43985 + }, + { + "epoch": 0.63, + "grad_norm": 0.5234375, + "learning_rate": 7.209699459779758e-05, + "loss": 0.8913, + "step": 43990 + }, + { + "epoch": 0.63, + "grad_norm": 0.625, + "learning_rate": 7.207295364951814e-05, + "loss": 0.9224, + "step": 43995 + }, + { + "epoch": 0.63, + "grad_norm": 0.5546875, + "learning_rate": 7.204891445173779e-05, + "loss": 1.0408, + "step": 44000 + }, + { + "epoch": 0.63, + "grad_norm": 0.640625, + "learning_rate": 7.202487700596328e-05, + "loss": 1.0718, + "step": 44005 + }, + { + "epoch": 0.63, + "grad_norm": 0.64453125, + "learning_rate": 7.200084131370138e-05, + "loss": 0.91, + "step": 44010 + }, + { + "epoch": 0.63, + "grad_norm": 0.65234375, + "learning_rate": 7.197680737645861e-05, + "loss": 0.8467, + "step": 44015 + }, + { + "epoch": 0.63, + "grad_norm": 0.51171875, + "learning_rate": 7.195277519574147e-05, + "loss": 0.9021, + "step": 44020 + }, + { + "epoch": 0.63, + "grad_norm": 0.52734375, + "learning_rate": 7.192874477305633e-05, + "loss": 0.8741, + "step": 44025 + }, + { + "epoch": 0.63, + "grad_norm": 0.6640625, + "learning_rate": 7.190471610990944e-05, + "loss": 1.0427, + "step": 44030 + }, + { + "epoch": 0.63, + "grad_norm": 0.6875, + "learning_rate": 7.188068920780692e-05, + "loss": 0.9584, + "step": 44035 + }, + { + "epoch": 0.63, + "grad_norm": 0.5859375, + "learning_rate": 7.185666406825486e-05, + "loss": 0.9775, + "step": 44040 + }, + { + "epoch": 0.63, + "grad_norm": 0.65234375, + "learning_rate": 7.183264069275915e-05, + "loss": 0.9982, + "step": 44045 + }, + { + "epoch": 0.63, + "grad_norm": 0.56640625, + "learning_rate": 7.18086190828256e-05, + "loss": 0.9059, + "step": 44050 + }, + { + "epoch": 0.63, + "grad_norm": 0.55859375, + "learning_rate": 7.17845992399599e-05, + "loss": 0.9274, + "step": 44055 + }, + { + "epoch": 0.63, + "grad_norm": 0.5390625, + "learning_rate": 7.176058116566764e-05, + "loss": 1.0291, + "step": 44060 + }, + { + "epoch": 0.63, + "grad_norm": 0.671875, + "learning_rate": 7.173656486145434e-05, + "loss": 0.8917, + "step": 44065 + }, + { + "epoch": 0.63, + "grad_norm": 0.5390625, + "learning_rate": 7.171255032882534e-05, + "loss": 0.9251, + "step": 44070 + }, + { + "epoch": 0.63, + "grad_norm": 0.51953125, + "learning_rate": 7.168853756928587e-05, + "loss": 1.0542, + "step": 44075 + }, + { + "epoch": 0.63, + "grad_norm": 0.796875, + "learning_rate": 7.166452658434115e-05, + "loss": 1.0037, + "step": 44080 + }, + { + "epoch": 0.63, + "grad_norm": 0.7890625, + "learning_rate": 7.164051737549615e-05, + "loss": 0.9898, + "step": 44085 + }, + { + "epoch": 0.63, + "grad_norm": 0.51171875, + "learning_rate": 7.161650994425582e-05, + "loss": 0.8652, + "step": 44090 + }, + { + "epoch": 0.63, + "grad_norm": 0.5625, + "learning_rate": 7.159250429212503e-05, + "loss": 1.0474, + "step": 44095 + }, + { + "epoch": 0.63, + "grad_norm": 0.51953125, + "learning_rate": 7.156850042060837e-05, + "loss": 1.0262, + "step": 44100 + }, + { + "epoch": 0.63, + "grad_norm": 0.63671875, + "learning_rate": 7.154449833121049e-05, + "loss": 0.8672, + "step": 44105 + }, + { + "epoch": 0.63, + "grad_norm": 0.57421875, + "learning_rate": 7.152049802543587e-05, + "loss": 0.8591, + "step": 44110 + }, + { + "epoch": 0.63, + "grad_norm": 0.498046875, + "learning_rate": 7.149649950478884e-05, + "loss": 0.9944, + "step": 44115 + }, + { + "epoch": 0.63, + "grad_norm": 0.62890625, + "learning_rate": 7.147250277077371e-05, + "loss": 0.9759, + "step": 44120 + }, + { + "epoch": 0.63, + "grad_norm": 0.5390625, + "learning_rate": 7.14485078248946e-05, + "loss": 0.9551, + "step": 44125 + }, + { + "epoch": 0.63, + "grad_norm": 0.55078125, + "learning_rate": 7.142451466865551e-05, + "loss": 0.9274, + "step": 44130 + }, + { + "epoch": 0.63, + "grad_norm": 0.5625, + "learning_rate": 7.140052330356042e-05, + "loss": 0.9259, + "step": 44135 + }, + { + "epoch": 0.63, + "grad_norm": 0.63671875, + "learning_rate": 7.137653373111309e-05, + "loss": 1.0829, + "step": 44140 + }, + { + "epoch": 0.63, + "grad_norm": 0.578125, + "learning_rate": 7.135254595281719e-05, + "loss": 0.982, + "step": 44145 + }, + { + "epoch": 0.63, + "grad_norm": 0.62109375, + "learning_rate": 7.132855997017642e-05, + "loss": 0.8594, + "step": 44150 + }, + { + "epoch": 0.63, + "grad_norm": 0.52734375, + "learning_rate": 7.13045757846941e-05, + "loss": 1.0339, + "step": 44155 + }, + { + "epoch": 0.63, + "grad_norm": 0.58984375, + "learning_rate": 7.128059339787368e-05, + "loss": 0.974, + "step": 44160 + }, + { + "epoch": 0.63, + "grad_norm": 0.55859375, + "learning_rate": 7.125661281121837e-05, + "loss": 0.8284, + "step": 44165 + }, + { + "epoch": 0.63, + "grad_norm": 0.53125, + "learning_rate": 7.123263402623125e-05, + "loss": 0.9314, + "step": 44170 + }, + { + "epoch": 0.63, + "grad_norm": 0.5234375, + "learning_rate": 7.120865704441546e-05, + "loss": 0.9241, + "step": 44175 + }, + { + "epoch": 0.63, + "grad_norm": 0.53515625, + "learning_rate": 7.11846818672738e-05, + "loss": 0.9662, + "step": 44180 + }, + { + "epoch": 0.63, + "grad_norm": 0.578125, + "learning_rate": 7.116070849630911e-05, + "loss": 1.0086, + "step": 44185 + }, + { + "epoch": 0.63, + "grad_norm": 0.59765625, + "learning_rate": 7.113673693302406e-05, + "loss": 0.9713, + "step": 44190 + }, + { + "epoch": 0.63, + "grad_norm": 0.51171875, + "learning_rate": 7.111276717892121e-05, + "loss": 0.9079, + "step": 44195 + }, + { + "epoch": 0.63, + "grad_norm": 0.56640625, + "learning_rate": 7.108879923550305e-05, + "loss": 1.0538, + "step": 44200 + }, + { + "epoch": 0.63, + "grad_norm": 0.59375, + "learning_rate": 7.106483310427184e-05, + "loss": 0.9012, + "step": 44205 + }, + { + "epoch": 0.63, + "grad_norm": 0.48828125, + "learning_rate": 7.104086878672984e-05, + "loss": 0.8179, + "step": 44210 + }, + { + "epoch": 0.63, + "grad_norm": 0.50390625, + "learning_rate": 7.101690628437918e-05, + "loss": 0.8541, + "step": 44215 + }, + { + "epoch": 0.63, + "grad_norm": 0.609375, + "learning_rate": 7.099294559872184e-05, + "loss": 1.0969, + "step": 44220 + }, + { + "epoch": 0.63, + "grad_norm": 0.54296875, + "learning_rate": 7.096898673125969e-05, + "loss": 1.0144, + "step": 44225 + }, + { + "epoch": 0.63, + "grad_norm": 0.5546875, + "learning_rate": 7.094502968349453e-05, + "loss": 0.9345, + "step": 44230 + }, + { + "epoch": 0.63, + "grad_norm": 0.6015625, + "learning_rate": 7.092107445692802e-05, + "loss": 1.0356, + "step": 44235 + }, + { + "epoch": 0.63, + "grad_norm": 0.5546875, + "learning_rate": 7.089712105306163e-05, + "loss": 1.0366, + "step": 44240 + }, + { + "epoch": 0.63, + "grad_norm": 0.6171875, + "learning_rate": 7.087316947339689e-05, + "loss": 0.9069, + "step": 44245 + }, + { + "epoch": 0.63, + "grad_norm": 0.5390625, + "learning_rate": 7.084921971943503e-05, + "loss": 0.9973, + "step": 44250 + }, + { + "epoch": 0.63, + "grad_norm": 0.53515625, + "learning_rate": 7.082527179267731e-05, + "loss": 0.9333, + "step": 44255 + }, + { + "epoch": 0.63, + "grad_norm": 0.53515625, + "learning_rate": 7.080132569462474e-05, + "loss": 1.0377, + "step": 44260 + }, + { + "epoch": 0.63, + "grad_norm": 0.65625, + "learning_rate": 7.077738142677836e-05, + "loss": 0.8374, + "step": 44265 + }, + { + "epoch": 0.64, + "grad_norm": 0.58984375, + "learning_rate": 7.0753438990639e-05, + "loss": 0.9651, + "step": 44270 + }, + { + "epoch": 0.64, + "grad_norm": 0.5625, + "learning_rate": 7.072949838770737e-05, + "loss": 1.0695, + "step": 44275 + }, + { + "epoch": 0.64, + "grad_norm": 0.56640625, + "learning_rate": 7.07055596194841e-05, + "loss": 0.9352, + "step": 44280 + }, + { + "epoch": 0.64, + "grad_norm": 0.60546875, + "learning_rate": 7.068162268746975e-05, + "loss": 0.8394, + "step": 44285 + }, + { + "epoch": 0.64, + "grad_norm": 0.58203125, + "learning_rate": 7.065768759316468e-05, + "loss": 1.0907, + "step": 44290 + }, + { + "epoch": 0.64, + "grad_norm": 0.5, + "learning_rate": 7.063375433806914e-05, + "loss": 0.9587, + "step": 44295 + }, + { + "epoch": 0.64, + "grad_norm": 0.6015625, + "learning_rate": 7.060982292368334e-05, + "loss": 1.0745, + "step": 44300 + }, + { + "epoch": 0.64, + "grad_norm": 0.578125, + "learning_rate": 7.058589335150734e-05, + "loss": 0.9421, + "step": 44305 + }, + { + "epoch": 0.64, + "grad_norm": 0.5390625, + "learning_rate": 7.056196562304103e-05, + "loss": 0.9171, + "step": 44310 + }, + { + "epoch": 0.64, + "grad_norm": 0.56640625, + "learning_rate": 7.053803973978423e-05, + "loss": 0.9543, + "step": 44315 + }, + { + "epoch": 0.64, + "grad_norm": 0.56640625, + "learning_rate": 7.051411570323665e-05, + "loss": 0.8534, + "step": 44320 + }, + { + "epoch": 0.64, + "grad_norm": 0.53515625, + "learning_rate": 7.04901935148979e-05, + "loss": 0.7847, + "step": 44325 + }, + { + "epoch": 0.64, + "grad_norm": 0.625, + "learning_rate": 7.04662731762674e-05, + "loss": 0.9292, + "step": 44330 + }, + { + "epoch": 0.64, + "grad_norm": 0.59375, + "learning_rate": 7.044235468884455e-05, + "loss": 0.9307, + "step": 44335 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 7.041843805412855e-05, + "loss": 0.9345, + "step": 44340 + }, + { + "epoch": 0.64, + "grad_norm": 0.5703125, + "learning_rate": 7.039452327361857e-05, + "loss": 0.8205, + "step": 44345 + }, + { + "epoch": 0.64, + "grad_norm": 0.59375, + "learning_rate": 7.037061034881358e-05, + "loss": 0.9699, + "step": 44350 + }, + { + "epoch": 0.64, + "grad_norm": 0.6171875, + "learning_rate": 7.034669928121248e-05, + "loss": 0.9605, + "step": 44355 + }, + { + "epoch": 0.64, + "grad_norm": 0.625, + "learning_rate": 7.032279007231406e-05, + "loss": 1.0062, + "step": 44360 + }, + { + "epoch": 0.64, + "grad_norm": 0.6171875, + "learning_rate": 7.029888272361695e-05, + "loss": 0.8797, + "step": 44365 + }, + { + "epoch": 0.64, + "grad_norm": 0.546875, + "learning_rate": 7.027497723661967e-05, + "loss": 0.9759, + "step": 44370 + }, + { + "epoch": 0.64, + "grad_norm": 0.63671875, + "learning_rate": 7.025107361282069e-05, + "loss": 0.9113, + "step": 44375 + }, + { + "epoch": 0.64, + "grad_norm": 0.55859375, + "learning_rate": 7.02271718537183e-05, + "loss": 1.0109, + "step": 44380 + }, + { + "epoch": 0.64, + "grad_norm": 0.53125, + "learning_rate": 7.020327196081067e-05, + "loss": 0.8384, + "step": 44385 + }, + { + "epoch": 0.64, + "grad_norm": 0.56640625, + "learning_rate": 7.01793739355959e-05, + "loss": 0.9772, + "step": 44390 + }, + { + "epoch": 0.64, + "grad_norm": 0.54296875, + "learning_rate": 7.015547777957194e-05, + "loss": 1.0573, + "step": 44395 + }, + { + "epoch": 0.64, + "grad_norm": 0.58984375, + "learning_rate": 7.01315834942366e-05, + "loss": 1.0351, + "step": 44400 + }, + { + "epoch": 0.64, + "grad_norm": 0.5234375, + "learning_rate": 7.010769108108764e-05, + "loss": 0.8791, + "step": 44405 + }, + { + "epoch": 0.64, + "grad_norm": 0.58984375, + "learning_rate": 7.008380054162268e-05, + "loss": 0.9083, + "step": 44410 + }, + { + "epoch": 0.64, + "grad_norm": 0.54296875, + "learning_rate": 7.005991187733914e-05, + "loss": 0.9679, + "step": 44415 + }, + { + "epoch": 0.64, + "grad_norm": 0.55078125, + "learning_rate": 7.003602508973444e-05, + "loss": 0.8742, + "step": 44420 + }, + { + "epoch": 0.64, + "grad_norm": 0.53125, + "learning_rate": 7.001214018030578e-05, + "loss": 0.8526, + "step": 44425 + }, + { + "epoch": 0.64, + "grad_norm": 0.5703125, + "learning_rate": 6.998825715055035e-05, + "loss": 0.8964, + "step": 44430 + }, + { + "epoch": 0.64, + "grad_norm": 0.55078125, + "learning_rate": 6.996437600196514e-05, + "loss": 1.1129, + "step": 44435 + }, + { + "epoch": 0.64, + "grad_norm": 0.53125, + "learning_rate": 6.994049673604703e-05, + "loss": 0.8891, + "step": 44440 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 6.991661935429284e-05, + "loss": 0.8465, + "step": 44445 + }, + { + "epoch": 0.64, + "grad_norm": 0.54296875, + "learning_rate": 6.989274385819921e-05, + "loss": 1.0433, + "step": 44450 + }, + { + "epoch": 0.64, + "grad_norm": 0.65625, + "learning_rate": 6.986887024926267e-05, + "loss": 0.8986, + "step": 44455 + }, + { + "epoch": 0.64, + "grad_norm": 0.53515625, + "learning_rate": 6.984499852897968e-05, + "loss": 1.0319, + "step": 44460 + }, + { + "epoch": 0.64, + "grad_norm": 0.59765625, + "learning_rate": 6.982112869884654e-05, + "loss": 0.9768, + "step": 44465 + }, + { + "epoch": 0.64, + "grad_norm": 0.734375, + "learning_rate": 6.97972607603594e-05, + "loss": 0.8718, + "step": 44470 + }, + { + "epoch": 0.64, + "grad_norm": 0.56640625, + "learning_rate": 6.977339471501436e-05, + "loss": 0.969, + "step": 44475 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 6.974953056430736e-05, + "loss": 0.9752, + "step": 44480 + }, + { + "epoch": 0.64, + "grad_norm": 0.5078125, + "learning_rate": 6.972566830973423e-05, + "loss": 0.8427, + "step": 44485 + }, + { + "epoch": 0.64, + "grad_norm": 0.5234375, + "learning_rate": 6.970180795279069e-05, + "loss": 0.909, + "step": 44490 + }, + { + "epoch": 0.64, + "grad_norm": 0.578125, + "learning_rate": 6.967794949497233e-05, + "loss": 0.9467, + "step": 44495 + }, + { + "epoch": 0.64, + "grad_norm": 0.71484375, + "learning_rate": 6.965409293777464e-05, + "loss": 1.0362, + "step": 44500 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 6.963023828269297e-05, + "loss": 0.8744, + "step": 44505 + }, + { + "epoch": 0.64, + "grad_norm": 0.578125, + "learning_rate": 6.960638553122254e-05, + "loss": 0.9793, + "step": 44510 + }, + { + "epoch": 0.64, + "grad_norm": 0.4765625, + "learning_rate": 6.958253468485853e-05, + "loss": 0.9228, + "step": 44515 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 6.955868574509583e-05, + "loss": 0.9583, + "step": 44520 + }, + { + "epoch": 0.64, + "grad_norm": 0.59765625, + "learning_rate": 6.953483871342941e-05, + "loss": 1.0902, + "step": 44525 + }, + { + "epoch": 0.64, + "grad_norm": 0.61328125, + "learning_rate": 6.951099359135399e-05, + "loss": 1.0439, + "step": 44530 + }, + { + "epoch": 0.64, + "grad_norm": 0.66796875, + "learning_rate": 6.948715038036417e-05, + "loss": 1.0237, + "step": 44535 + }, + { + "epoch": 0.64, + "grad_norm": 0.5234375, + "learning_rate": 6.946330908195457e-05, + "loss": 1.0154, + "step": 44540 + }, + { + "epoch": 0.64, + "grad_norm": 0.57421875, + "learning_rate": 6.943946969761951e-05, + "loss": 0.9348, + "step": 44545 + }, + { + "epoch": 0.64, + "grad_norm": 0.546875, + "learning_rate": 6.94156322288533e-05, + "loss": 0.9672, + "step": 44550 + }, + { + "epoch": 0.64, + "grad_norm": 0.53125, + "learning_rate": 6.93917966771501e-05, + "loss": 0.9573, + "step": 44555 + }, + { + "epoch": 0.64, + "grad_norm": 0.57421875, + "learning_rate": 6.936796304400395e-05, + "loss": 1.1258, + "step": 44560 + }, + { + "epoch": 0.64, + "grad_norm": 0.46875, + "learning_rate": 6.934413133090872e-05, + "loss": 0.9212, + "step": 44565 + }, + { + "epoch": 0.64, + "grad_norm": 0.62890625, + "learning_rate": 6.932030153935834e-05, + "loss": 0.9884, + "step": 44570 + }, + { + "epoch": 0.64, + "grad_norm": 0.57421875, + "learning_rate": 6.929647367084633e-05, + "loss": 0.9147, + "step": 44575 + }, + { + "epoch": 0.64, + "grad_norm": 0.6328125, + "learning_rate": 6.927264772686635e-05, + "loss": 1.0419, + "step": 44580 + }, + { + "epoch": 0.64, + "grad_norm": 0.640625, + "learning_rate": 6.924882370891179e-05, + "loss": 0.9673, + "step": 44585 + }, + { + "epoch": 0.64, + "grad_norm": 0.609375, + "learning_rate": 6.922500161847596e-05, + "loss": 0.9509, + "step": 44590 + }, + { + "epoch": 0.64, + "grad_norm": 0.55859375, + "learning_rate": 6.92011814570521e-05, + "loss": 1.022, + "step": 44595 + }, + { + "epoch": 0.64, + "grad_norm": 0.59765625, + "learning_rate": 6.917736322613329e-05, + "loss": 1.0838, + "step": 44600 + }, + { + "epoch": 0.64, + "grad_norm": 0.56640625, + "learning_rate": 6.915354692721242e-05, + "loss": 1.0785, + "step": 44605 + }, + { + "epoch": 0.64, + "grad_norm": 0.5703125, + "learning_rate": 6.912973256178236e-05, + "loss": 1.054, + "step": 44610 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 6.910592013133584e-05, + "loss": 0.811, + "step": 44615 + }, + { + "epoch": 0.64, + "grad_norm": 0.53515625, + "learning_rate": 6.908210963736546e-05, + "loss": 0.9427, + "step": 44620 + }, + { + "epoch": 0.64, + "grad_norm": 0.51953125, + "learning_rate": 6.905830108136362e-05, + "loss": 0.8712, + "step": 44625 + }, + { + "epoch": 0.64, + "grad_norm": 0.5546875, + "learning_rate": 6.903449446482271e-05, + "loss": 0.8157, + "step": 44630 + }, + { + "epoch": 0.64, + "grad_norm": 0.6171875, + "learning_rate": 6.901068978923495e-05, + "loss": 1.0176, + "step": 44635 + }, + { + "epoch": 0.64, + "grad_norm": 0.6171875, + "learning_rate": 6.898688705609246e-05, + "loss": 0.875, + "step": 44640 + }, + { + "epoch": 0.64, + "grad_norm": 0.62109375, + "learning_rate": 6.896308626688719e-05, + "loss": 1.0577, + "step": 44645 + }, + { + "epoch": 0.64, + "grad_norm": 0.52734375, + "learning_rate": 6.893928742311104e-05, + "loss": 0.9952, + "step": 44650 + }, + { + "epoch": 0.64, + "grad_norm": 0.494140625, + "learning_rate": 6.891549052625574e-05, + "loss": 0.9785, + "step": 44655 + }, + { + "epoch": 0.64, + "grad_norm": 0.55859375, + "learning_rate": 6.889169557781285e-05, + "loss": 0.9298, + "step": 44660 + }, + { + "epoch": 0.64, + "grad_norm": 0.55078125, + "learning_rate": 6.886790257927395e-05, + "loss": 0.9041, + "step": 44665 + }, + { + "epoch": 0.64, + "grad_norm": 0.58203125, + "learning_rate": 6.884411153213037e-05, + "loss": 1.0325, + "step": 44670 + }, + { + "epoch": 0.64, + "grad_norm": 0.5390625, + "learning_rate": 6.88203224378734e-05, + "loss": 0.9072, + "step": 44675 + }, + { + "epoch": 0.64, + "grad_norm": 0.53515625, + "learning_rate": 6.879653529799408e-05, + "loss": 1.0979, + "step": 44680 + }, + { + "epoch": 0.64, + "grad_norm": 0.55078125, + "learning_rate": 6.877275011398346e-05, + "loss": 0.9291, + "step": 44685 + }, + { + "epoch": 0.64, + "grad_norm": 0.546875, + "learning_rate": 6.874896688733246e-05, + "loss": 0.953, + "step": 44690 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 6.872518561953178e-05, + "loss": 0.9055, + "step": 44695 + }, + { + "epoch": 0.64, + "grad_norm": 0.72265625, + "learning_rate": 6.870140631207207e-05, + "loss": 1.0469, + "step": 44700 + }, + { + "epoch": 0.64, + "grad_norm": 0.5546875, + "learning_rate": 6.86776289664439e-05, + "loss": 0.885, + "step": 44705 + }, + { + "epoch": 0.64, + "grad_norm": 0.59375, + "learning_rate": 6.865385358413761e-05, + "loss": 0.9648, + "step": 44710 + }, + { + "epoch": 0.64, + "grad_norm": 0.66015625, + "learning_rate": 6.863008016664344e-05, + "loss": 0.9232, + "step": 44715 + }, + { + "epoch": 0.64, + "grad_norm": 0.5703125, + "learning_rate": 6.86063087154516e-05, + "loss": 0.974, + "step": 44720 + }, + { + "epoch": 0.64, + "grad_norm": 0.5625, + "learning_rate": 6.85825392320521e-05, + "loss": 1.0025, + "step": 44725 + }, + { + "epoch": 0.64, + "grad_norm": 0.55078125, + "learning_rate": 6.855877171793484e-05, + "loss": 0.9794, + "step": 44730 + }, + { + "epoch": 0.64, + "grad_norm": 0.546875, + "learning_rate": 6.853500617458955e-05, + "loss": 1.0223, + "step": 44735 + }, + { + "epoch": 0.64, + "grad_norm": 0.54296875, + "learning_rate": 6.851124260350588e-05, + "loss": 1.0199, + "step": 44740 + }, + { + "epoch": 0.64, + "grad_norm": 0.62109375, + "learning_rate": 6.848748100617342e-05, + "loss": 0.9964, + "step": 44745 + }, + { + "epoch": 0.64, + "grad_norm": 0.494140625, + "learning_rate": 6.846372138408152e-05, + "loss": 0.8822, + "step": 44750 + }, + { + "epoch": 0.64, + "grad_norm": 0.51953125, + "learning_rate": 6.843996373871948e-05, + "loss": 0.8908, + "step": 44755 + }, + { + "epoch": 0.64, + "grad_norm": 0.57421875, + "learning_rate": 6.841620807157647e-05, + "loss": 0.9091, + "step": 44760 + }, + { + "epoch": 0.64, + "grad_norm": 0.53515625, + "learning_rate": 6.839245438414152e-05, + "loss": 0.9188, + "step": 44765 + }, + { + "epoch": 0.64, + "grad_norm": 0.609375, + "learning_rate": 6.83687026779035e-05, + "loss": 1.1365, + "step": 44770 + }, + { + "epoch": 0.64, + "grad_norm": 0.58984375, + "learning_rate": 6.834495295435123e-05, + "loss": 1.0363, + "step": 44775 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 6.832120521497339e-05, + "loss": 0.9475, + "step": 44780 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 6.829745946125847e-05, + "loss": 0.9322, + "step": 44785 + }, + { + "epoch": 0.64, + "grad_norm": 0.63671875, + "learning_rate": 6.827371569469489e-05, + "loss": 1.1476, + "step": 44790 + }, + { + "epoch": 0.64, + "grad_norm": 0.53125, + "learning_rate": 6.824997391677092e-05, + "loss": 0.9128, + "step": 44795 + }, + { + "epoch": 0.64, + "grad_norm": 0.640625, + "learning_rate": 6.822623412897479e-05, + "loss": 0.9565, + "step": 44800 + }, + { + "epoch": 0.64, + "grad_norm": 0.5390625, + "learning_rate": 6.820249633279448e-05, + "loss": 0.9199, + "step": 44805 + }, + { + "epoch": 0.64, + "grad_norm": 0.5546875, + "learning_rate": 6.817876052971788e-05, + "loss": 1.0906, + "step": 44810 + }, + { + "epoch": 0.64, + "grad_norm": 0.48046875, + "learning_rate": 6.815502672123284e-05, + "loss": 0.8355, + "step": 44815 + }, + { + "epoch": 0.64, + "grad_norm": 0.578125, + "learning_rate": 6.813129490882699e-05, + "loss": 0.9748, + "step": 44820 + }, + { + "epoch": 0.64, + "grad_norm": 0.52734375, + "learning_rate": 6.810756509398786e-05, + "loss": 0.9507, + "step": 44825 + }, + { + "epoch": 0.64, + "grad_norm": 0.52734375, + "learning_rate": 6.808383727820292e-05, + "loss": 0.9011, + "step": 44830 + }, + { + "epoch": 0.64, + "grad_norm": 0.57421875, + "learning_rate": 6.806011146295937e-05, + "loss": 1.0287, + "step": 44835 + }, + { + "epoch": 0.64, + "grad_norm": 0.5390625, + "learning_rate": 6.80363876497444e-05, + "loss": 1.0217, + "step": 44840 + }, + { + "epoch": 0.64, + "grad_norm": 0.55078125, + "learning_rate": 6.801266584004507e-05, + "loss": 1.0007, + "step": 44845 + }, + { + "epoch": 0.64, + "grad_norm": 0.6953125, + "learning_rate": 6.798894603534827e-05, + "loss": 0.9481, + "step": 44850 + }, + { + "epoch": 0.64, + "grad_norm": 0.546875, + "learning_rate": 6.796522823714079e-05, + "loss": 1.0604, + "step": 44855 + }, + { + "epoch": 0.64, + "grad_norm": 0.59765625, + "learning_rate": 6.79415124469093e-05, + "loss": 0.9995, + "step": 44860 + }, + { + "epoch": 0.64, + "grad_norm": 0.53515625, + "learning_rate": 6.791779866614028e-05, + "loss": 0.951, + "step": 44865 + }, + { + "epoch": 0.64, + "grad_norm": 0.6015625, + "learning_rate": 6.789408689632021e-05, + "loss": 0.898, + "step": 44870 + }, + { + "epoch": 0.64, + "grad_norm": 0.51171875, + "learning_rate": 6.787037713893536e-05, + "loss": 1.0025, + "step": 44875 + }, + { + "epoch": 0.64, + "grad_norm": 0.56640625, + "learning_rate": 6.784666939547182e-05, + "loss": 0.974, + "step": 44880 + }, + { + "epoch": 0.64, + "grad_norm": 0.55859375, + "learning_rate": 6.782296366741574e-05, + "loss": 0.9054, + "step": 44885 + }, + { + "epoch": 0.64, + "grad_norm": 0.5703125, + "learning_rate": 6.779925995625287e-05, + "loss": 1.0215, + "step": 44890 + }, + { + "epoch": 0.64, + "grad_norm": 0.490234375, + "learning_rate": 6.777555826346907e-05, + "loss": 0.9463, + "step": 44895 + }, + { + "epoch": 0.64, + "grad_norm": 0.6328125, + "learning_rate": 6.775185859055e-05, + "loss": 1.0455, + "step": 44900 + }, + { + "epoch": 0.64, + "grad_norm": 0.51171875, + "learning_rate": 6.772816093898114e-05, + "loss": 0.8271, + "step": 44905 + }, + { + "epoch": 0.64, + "grad_norm": 0.490234375, + "learning_rate": 6.77044653102479e-05, + "loss": 0.9981, + "step": 44910 + }, + { + "epoch": 0.64, + "grad_norm": 0.53125, + "learning_rate": 6.768077170583558e-05, + "loss": 0.9118, + "step": 44915 + }, + { + "epoch": 0.64, + "grad_norm": 0.58984375, + "learning_rate": 6.765708012722927e-05, + "loss": 1.0437, + "step": 44920 + }, + { + "epoch": 0.64, + "grad_norm": 0.5390625, + "learning_rate": 6.763339057591404e-05, + "loss": 0.8474, + "step": 44925 + }, + { + "epoch": 0.64, + "grad_norm": 0.46875, + "learning_rate": 6.760970305337475e-05, + "loss": 0.8097, + "step": 44930 + }, + { + "epoch": 0.64, + "grad_norm": 0.52734375, + "learning_rate": 6.758601756109617e-05, + "loss": 0.8699, + "step": 44935 + }, + { + "epoch": 0.64, + "grad_norm": 0.546875, + "learning_rate": 6.756233410056292e-05, + "loss": 0.9699, + "step": 44940 + }, + { + "epoch": 0.64, + "grad_norm": 0.515625, + "learning_rate": 6.753865267325949e-05, + "loss": 0.9083, + "step": 44945 + }, + { + "epoch": 0.64, + "grad_norm": 0.57421875, + "learning_rate": 6.75149732806703e-05, + "loss": 0.8572, + "step": 44950 + }, + { + "epoch": 0.64, + "grad_norm": 0.5546875, + "learning_rate": 6.749129592427958e-05, + "loss": 1.0053, + "step": 44955 + }, + { + "epoch": 0.64, + "grad_norm": 0.453125, + "learning_rate": 6.746762060557143e-05, + "loss": 0.7956, + "step": 44960 + }, + { + "epoch": 0.65, + "grad_norm": 0.6171875, + "learning_rate": 6.74439473260299e-05, + "loss": 1.0633, + "step": 44965 + }, + { + "epoch": 0.65, + "grad_norm": 0.57421875, + "learning_rate": 6.742027608713883e-05, + "loss": 0.8319, + "step": 44970 + }, + { + "epoch": 0.65, + "grad_norm": 0.53125, + "learning_rate": 6.739660689038193e-05, + "loss": 0.9019, + "step": 44975 + }, + { + "epoch": 0.65, + "grad_norm": 0.6484375, + "learning_rate": 6.737293973724287e-05, + "loss": 0.9892, + "step": 44980 + }, + { + "epoch": 0.65, + "grad_norm": 0.55078125, + "learning_rate": 6.73492746292051e-05, + "loss": 0.9203, + "step": 44985 + }, + { + "epoch": 0.65, + "grad_norm": 0.5390625, + "learning_rate": 6.732561156775202e-05, + "loss": 1.0347, + "step": 44990 + }, + { + "epoch": 0.65, + "grad_norm": 0.6171875, + "learning_rate": 6.730195055436677e-05, + "loss": 0.9512, + "step": 44995 + }, + { + "epoch": 0.65, + "grad_norm": 0.5859375, + "learning_rate": 6.727829159053251e-05, + "loss": 1.0785, + "step": 45000 + }, + { + "epoch": 0.65, + "grad_norm": 0.54296875, + "learning_rate": 6.725463467773221e-05, + "loss": 0.9478, + "step": 45005 + }, + { + "epoch": 0.65, + "grad_norm": 0.5859375, + "learning_rate": 6.72309798174487e-05, + "loss": 0.943, + "step": 45010 + }, + { + "epoch": 0.65, + "grad_norm": 0.546875, + "learning_rate": 6.720732701116468e-05, + "loss": 0.9247, + "step": 45015 + }, + { + "epoch": 0.65, + "grad_norm": 0.58203125, + "learning_rate": 6.718367626036276e-05, + "loss": 0.8705, + "step": 45020 + }, + { + "epoch": 0.65, + "grad_norm": 0.478515625, + "learning_rate": 6.71600275665254e-05, + "loss": 0.9431, + "step": 45025 + }, + { + "epoch": 0.65, + "grad_norm": 0.5078125, + "learning_rate": 6.713638093113488e-05, + "loss": 0.932, + "step": 45030 + }, + { + "epoch": 0.65, + "grad_norm": 0.51953125, + "learning_rate": 6.711273635567346e-05, + "loss": 1.0349, + "step": 45035 + }, + { + "epoch": 0.65, + "grad_norm": 0.578125, + "learning_rate": 6.70890938416232e-05, + "loss": 0.988, + "step": 45040 + }, + { + "epoch": 0.65, + "grad_norm": 0.53515625, + "learning_rate": 6.7065453390466e-05, + "loss": 0.9581, + "step": 45045 + }, + { + "epoch": 0.65, + "grad_norm": 0.51953125, + "learning_rate": 6.704181500368368e-05, + "loss": 0.93, + "step": 45050 + }, + { + "epoch": 0.65, + "grad_norm": 0.51171875, + "learning_rate": 6.701817868275792e-05, + "loss": 1.0163, + "step": 45055 + }, + { + "epoch": 0.65, + "grad_norm": 0.61328125, + "learning_rate": 6.699454442917031e-05, + "loss": 0.9731, + "step": 45060 + }, + { + "epoch": 0.65, + "grad_norm": 0.5625, + "learning_rate": 6.697091224440221e-05, + "loss": 0.8179, + "step": 45065 + }, + { + "epoch": 0.65, + "grad_norm": 0.5703125, + "learning_rate": 6.6947282129935e-05, + "loss": 0.9169, + "step": 45070 + }, + { + "epoch": 0.65, + "grad_norm": 0.55078125, + "learning_rate": 6.692365408724976e-05, + "loss": 0.888, + "step": 45075 + }, + { + "epoch": 0.65, + "grad_norm": 0.515625, + "learning_rate": 6.690002811782754e-05, + "loss": 0.9491, + "step": 45080 + }, + { + "epoch": 0.65, + "grad_norm": 0.55859375, + "learning_rate": 6.687640422314927e-05, + "loss": 0.8988, + "step": 45085 + }, + { + "epoch": 0.65, + "grad_norm": 0.5703125, + "learning_rate": 6.685278240469572e-05, + "loss": 1.0214, + "step": 45090 + }, + { + "epoch": 0.65, + "grad_norm": 0.5546875, + "learning_rate": 6.682916266394753e-05, + "loss": 1.192, + "step": 45095 + }, + { + "epoch": 0.65, + "grad_norm": 0.5390625, + "learning_rate": 6.680554500238519e-05, + "loss": 0.9118, + "step": 45100 + }, + { + "epoch": 0.65, + "grad_norm": 0.62890625, + "learning_rate": 6.678192942148907e-05, + "loss": 1.0664, + "step": 45105 + }, + { + "epoch": 0.65, + "grad_norm": 0.640625, + "learning_rate": 6.675831592273947e-05, + "loss": 0.8504, + "step": 45110 + }, + { + "epoch": 0.65, + "grad_norm": 0.546875, + "learning_rate": 6.673470450761647e-05, + "loss": 0.974, + "step": 45115 + }, + { + "epoch": 0.65, + "grad_norm": 0.5390625, + "learning_rate": 6.671109517760009e-05, + "loss": 0.9236, + "step": 45120 + }, + { + "epoch": 0.65, + "grad_norm": 0.6953125, + "learning_rate": 6.668748793417017e-05, + "loss": 0.9077, + "step": 45125 + }, + { + "epoch": 0.65, + "grad_norm": 0.59375, + "learning_rate": 6.666388277880646e-05, + "loss": 0.9977, + "step": 45130 + }, + { + "epoch": 0.65, + "grad_norm": 0.56640625, + "learning_rate": 6.664027971298852e-05, + "loss": 1.0262, + "step": 45135 + }, + { + "epoch": 0.65, + "grad_norm": 0.6640625, + "learning_rate": 6.661667873819586e-05, + "loss": 1.0116, + "step": 45140 + }, + { + "epoch": 0.65, + "grad_norm": 0.66015625, + "learning_rate": 6.659307985590779e-05, + "loss": 0.8108, + "step": 45145 + }, + { + "epoch": 0.65, + "grad_norm": 0.53125, + "learning_rate": 6.656948306760356e-05, + "loss": 0.9178, + "step": 45150 + }, + { + "epoch": 0.65, + "grad_norm": 0.5859375, + "learning_rate": 6.654588837476216e-05, + "loss": 0.8277, + "step": 45155 + }, + { + "epoch": 0.65, + "grad_norm": 0.50390625, + "learning_rate": 6.652229577886258e-05, + "loss": 1.0075, + "step": 45160 + }, + { + "epoch": 0.65, + "grad_norm": 0.5546875, + "learning_rate": 6.649870528138364e-05, + "loss": 1.0151, + "step": 45165 + }, + { + "epoch": 0.65, + "grad_norm": 0.52734375, + "learning_rate": 6.647511688380402e-05, + "loss": 0.8129, + "step": 45170 + }, + { + "epoch": 0.65, + "grad_norm": 0.5546875, + "learning_rate": 6.64515305876022e-05, + "loss": 1.0257, + "step": 45175 + }, + { + "epoch": 0.65, + "grad_norm": 0.51171875, + "learning_rate": 6.642794639425671e-05, + "loss": 0.8383, + "step": 45180 + }, + { + "epoch": 0.65, + "grad_norm": 0.5546875, + "learning_rate": 6.640436430524576e-05, + "loss": 0.9116, + "step": 45185 + }, + { + "epoch": 0.65, + "grad_norm": 0.68359375, + "learning_rate": 6.638078432204749e-05, + "loss": 0.8931, + "step": 45190 + }, + { + "epoch": 0.65, + "grad_norm": 0.53125, + "learning_rate": 6.635720644613998e-05, + "loss": 0.8874, + "step": 45195 + }, + { + "epoch": 0.65, + "grad_norm": 0.546875, + "learning_rate": 6.63336306790011e-05, + "loss": 0.8541, + "step": 45200 + }, + { + "epoch": 0.65, + "grad_norm": 0.50390625, + "learning_rate": 6.631005702210857e-05, + "loss": 0.9455, + "step": 45205 + }, + { + "epoch": 0.65, + "grad_norm": 0.55859375, + "learning_rate": 6.628648547694006e-05, + "loss": 0.8394, + "step": 45210 + }, + { + "epoch": 0.65, + "grad_norm": 0.5859375, + "learning_rate": 6.626291604497299e-05, + "loss": 1.0215, + "step": 45215 + }, + { + "epoch": 0.65, + "grad_norm": 0.53515625, + "learning_rate": 6.623934872768478e-05, + "loss": 0.9224, + "step": 45220 + }, + { + "epoch": 0.65, + "grad_norm": 0.609375, + "learning_rate": 6.621578352655267e-05, + "loss": 0.9382, + "step": 45225 + }, + { + "epoch": 0.65, + "grad_norm": 0.6328125, + "learning_rate": 6.619222044305368e-05, + "loss": 1.0595, + "step": 45230 + }, + { + "epoch": 0.65, + "grad_norm": 0.55078125, + "learning_rate": 6.616865947866484e-05, + "loss": 0.8566, + "step": 45235 + }, + { + "epoch": 0.65, + "grad_norm": 0.53515625, + "learning_rate": 6.614510063486296e-05, + "loss": 0.8293, + "step": 45240 + }, + { + "epoch": 0.65, + "grad_norm": 0.54296875, + "learning_rate": 6.61215439131247e-05, + "loss": 0.8726, + "step": 45245 + }, + { + "epoch": 0.65, + "grad_norm": 0.578125, + "learning_rate": 6.609798931492671e-05, + "loss": 1.0158, + "step": 45250 + }, + { + "epoch": 0.65, + "grad_norm": 0.53515625, + "learning_rate": 6.607443684174533e-05, + "loss": 0.9727, + "step": 45255 + }, + { + "epoch": 0.65, + "grad_norm": 0.54296875, + "learning_rate": 6.605088649505689e-05, + "loss": 0.9833, + "step": 45260 + }, + { + "epoch": 0.65, + "grad_norm": 0.58203125, + "learning_rate": 6.602733827633756e-05, + "loss": 0.9667, + "step": 45265 + }, + { + "epoch": 0.65, + "grad_norm": 0.61328125, + "learning_rate": 6.600379218706331e-05, + "loss": 1.0605, + "step": 45270 + }, + { + "epoch": 0.65, + "grad_norm": 0.625, + "learning_rate": 6.598024822871014e-05, + "loss": 0.8976, + "step": 45275 + }, + { + "epoch": 0.65, + "grad_norm": 0.578125, + "learning_rate": 6.595670640275373e-05, + "loss": 0.9378, + "step": 45280 + }, + { + "epoch": 0.65, + "grad_norm": 0.64453125, + "learning_rate": 6.593316671066972e-05, + "loss": 0.9807, + "step": 45285 + }, + { + "epoch": 0.65, + "grad_norm": 0.470703125, + "learning_rate": 6.590962915393364e-05, + "loss": 0.9207, + "step": 45290 + }, + { + "epoch": 0.65, + "grad_norm": 0.625, + "learning_rate": 6.588609373402084e-05, + "loss": 1.0662, + "step": 45295 + }, + { + "epoch": 0.65, + "grad_norm": 0.55078125, + "learning_rate": 6.58625604524065e-05, + "loss": 0.9258, + "step": 45300 + }, + { + "epoch": 0.65, + "grad_norm": 0.5234375, + "learning_rate": 6.583902931056582e-05, + "loss": 0.9294, + "step": 45305 + }, + { + "epoch": 0.65, + "grad_norm": 0.5546875, + "learning_rate": 6.581550030997363e-05, + "loss": 0.9434, + "step": 45310 + }, + { + "epoch": 0.65, + "grad_norm": 0.56640625, + "learning_rate": 6.579197345210483e-05, + "loss": 1.028, + "step": 45315 + }, + { + "epoch": 0.65, + "grad_norm": 0.5390625, + "learning_rate": 6.576844873843409e-05, + "loss": 0.8776, + "step": 45320 + }, + { + "epoch": 0.65, + "grad_norm": 0.58984375, + "learning_rate": 6.574492617043596e-05, + "loss": 1.091, + "step": 45325 + }, + { + "epoch": 0.65, + "grad_norm": 0.53515625, + "learning_rate": 6.572140574958488e-05, + "loss": 0.8153, + "step": 45330 + }, + { + "epoch": 0.65, + "grad_norm": 0.71484375, + "learning_rate": 6.569788747735515e-05, + "loss": 1.0016, + "step": 45335 + }, + { + "epoch": 0.65, + "grad_norm": 0.546875, + "learning_rate": 6.567437135522085e-05, + "loss": 0.9558, + "step": 45340 + }, + { + "epoch": 0.65, + "grad_norm": 0.60546875, + "learning_rate": 6.565085738465608e-05, + "loss": 1.0025, + "step": 45345 + }, + { + "epoch": 0.65, + "grad_norm": 0.51171875, + "learning_rate": 6.56273455671347e-05, + "loss": 0.8308, + "step": 45350 + }, + { + "epoch": 0.65, + "grad_norm": 0.578125, + "learning_rate": 6.560383590413042e-05, + "loss": 1.0665, + "step": 45355 + }, + { + "epoch": 0.65, + "grad_norm": 0.6171875, + "learning_rate": 6.558032839711693e-05, + "loss": 0.9764, + "step": 45360 + }, + { + "epoch": 0.65, + "grad_norm": 0.55078125, + "learning_rate": 6.555682304756761e-05, + "loss": 0.8749, + "step": 45365 + }, + { + "epoch": 0.65, + "grad_norm": 0.54296875, + "learning_rate": 6.553331985695586e-05, + "loss": 1.0905, + "step": 45370 + }, + { + "epoch": 0.65, + "grad_norm": 0.578125, + "learning_rate": 6.550981882675487e-05, + "loss": 1.0649, + "step": 45375 + }, + { + "epoch": 0.65, + "grad_norm": 0.54296875, + "learning_rate": 6.54863199584377e-05, + "loss": 0.9374, + "step": 45380 + }, + { + "epoch": 0.65, + "grad_norm": 0.56640625, + "learning_rate": 6.546282325347733e-05, + "loss": 1.1363, + "step": 45385 + }, + { + "epoch": 0.65, + "grad_norm": 0.60546875, + "learning_rate": 6.543932871334652e-05, + "loss": 0.8631, + "step": 45390 + }, + { + "epoch": 0.65, + "grad_norm": 0.54296875, + "learning_rate": 6.541583633951795e-05, + "loss": 0.7846, + "step": 45395 + }, + { + "epoch": 0.65, + "grad_norm": 0.55078125, + "learning_rate": 6.539234613346415e-05, + "loss": 0.9328, + "step": 45400 + }, + { + "epoch": 0.65, + "grad_norm": 0.58203125, + "learning_rate": 6.536885809665752e-05, + "loss": 1.0687, + "step": 45405 + }, + { + "epoch": 0.65, + "grad_norm": 0.51953125, + "learning_rate": 6.53453722305703e-05, + "loss": 0.7834, + "step": 45410 + }, + { + "epoch": 0.65, + "grad_norm": 0.546875, + "learning_rate": 6.532188853667462e-05, + "loss": 0.9781, + "step": 45415 + }, + { + "epoch": 0.65, + "grad_norm": 0.4921875, + "learning_rate": 6.529840701644245e-05, + "loss": 0.9171, + "step": 45420 + }, + { + "epoch": 0.65, + "grad_norm": 0.5546875, + "learning_rate": 6.527492767134566e-05, + "loss": 0.9773, + "step": 45425 + }, + { + "epoch": 0.65, + "grad_norm": 0.48046875, + "learning_rate": 6.525145050285594e-05, + "loss": 0.8882, + "step": 45430 + }, + { + "epoch": 0.65, + "grad_norm": 0.51171875, + "learning_rate": 6.522797551244487e-05, + "loss": 0.944, + "step": 45435 + }, + { + "epoch": 0.65, + "grad_norm": 0.53125, + "learning_rate": 6.520450270158391e-05, + "loss": 0.9338, + "step": 45440 + }, + { + "epoch": 0.65, + "grad_norm": 0.60546875, + "learning_rate": 6.518103207174436e-05, + "loss": 0.9582, + "step": 45445 + }, + { + "epoch": 0.65, + "grad_norm": 0.55078125, + "learning_rate": 6.515756362439736e-05, + "loss": 0.9155, + "step": 45450 + }, + { + "epoch": 0.65, + "grad_norm": 0.53125, + "learning_rate": 6.513409736101396e-05, + "loss": 0.9894, + "step": 45455 + }, + { + "epoch": 0.65, + "grad_norm": 0.6171875, + "learning_rate": 6.511063328306504e-05, + "loss": 1.0464, + "step": 45460 + }, + { + "epoch": 0.65, + "grad_norm": 0.56640625, + "learning_rate": 6.508717139202139e-05, + "loss": 0.9744, + "step": 45465 + }, + { + "epoch": 0.65, + "grad_norm": 0.6015625, + "learning_rate": 6.506371168935359e-05, + "loss": 0.996, + "step": 45470 + }, + { + "epoch": 0.65, + "grad_norm": 0.5234375, + "learning_rate": 6.50402541765321e-05, + "loss": 0.8536, + "step": 45475 + }, + { + "epoch": 0.65, + "grad_norm": 0.63671875, + "learning_rate": 6.501679885502731e-05, + "loss": 1.0797, + "step": 45480 + }, + { + "epoch": 0.65, + "grad_norm": 0.62890625, + "learning_rate": 6.499334572630942e-05, + "loss": 0.9744, + "step": 45485 + }, + { + "epoch": 0.65, + "grad_norm": 0.5390625, + "learning_rate": 6.496989479184847e-05, + "loss": 0.921, + "step": 45490 + }, + { + "epoch": 0.65, + "grad_norm": 0.6171875, + "learning_rate": 6.494644605311442e-05, + "loss": 1.087, + "step": 45495 + }, + { + "epoch": 0.65, + "grad_norm": 0.5625, + "learning_rate": 6.492299951157706e-05, + "loss": 1.0464, + "step": 45500 + }, + { + "epoch": 0.65, + "grad_norm": 0.61328125, + "learning_rate": 6.489955516870601e-05, + "loss": 0.8365, + "step": 45505 + }, + { + "epoch": 0.65, + "grad_norm": 0.609375, + "learning_rate": 6.487611302597085e-05, + "loss": 0.9386, + "step": 45510 + }, + { + "epoch": 0.65, + "grad_norm": 0.55859375, + "learning_rate": 6.485267308484095e-05, + "loss": 0.9561, + "step": 45515 + }, + { + "epoch": 0.65, + "grad_norm": 0.5703125, + "learning_rate": 6.482923534678552e-05, + "loss": 0.8684, + "step": 45520 + }, + { + "epoch": 0.65, + "grad_norm": 0.6171875, + "learning_rate": 6.480579981327365e-05, + "loss": 0.9156, + "step": 45525 + }, + { + "epoch": 0.65, + "grad_norm": 0.5859375, + "learning_rate": 6.478236648577431e-05, + "loss": 0.9276, + "step": 45530 + }, + { + "epoch": 0.65, + "grad_norm": 0.625, + "learning_rate": 6.475893536575639e-05, + "loss": 0.8871, + "step": 45535 + }, + { + "epoch": 0.65, + "grad_norm": 0.546875, + "learning_rate": 6.473550645468853e-05, + "loss": 0.9564, + "step": 45540 + }, + { + "epoch": 0.65, + "grad_norm": 0.5078125, + "learning_rate": 6.471207975403926e-05, + "loss": 0.7958, + "step": 45545 + }, + { + "epoch": 0.65, + "grad_norm": 0.486328125, + "learning_rate": 6.468865526527704e-05, + "loss": 0.8408, + "step": 45550 + }, + { + "epoch": 0.65, + "grad_norm": 0.5078125, + "learning_rate": 6.466523298987013e-05, + "loss": 0.8427, + "step": 45555 + }, + { + "epoch": 0.65, + "grad_norm": 0.53515625, + "learning_rate": 6.464181292928664e-05, + "loss": 1.0888, + "step": 45560 + }, + { + "epoch": 0.65, + "grad_norm": 0.5546875, + "learning_rate": 6.461839508499461e-05, + "loss": 0.9834, + "step": 45565 + }, + { + "epoch": 0.65, + "grad_norm": 0.6953125, + "learning_rate": 6.459497945846189e-05, + "loss": 1.12, + "step": 45570 + }, + { + "epoch": 0.65, + "grad_norm": 0.640625, + "learning_rate": 6.457156605115615e-05, + "loss": 1.0514, + "step": 45575 + }, + { + "epoch": 0.65, + "grad_norm": 0.57421875, + "learning_rate": 6.454815486454501e-05, + "loss": 0.982, + "step": 45580 + }, + { + "epoch": 0.65, + "grad_norm": 0.51171875, + "learning_rate": 6.452474590009587e-05, + "loss": 0.9026, + "step": 45585 + }, + { + "epoch": 0.65, + "grad_norm": 0.578125, + "learning_rate": 6.450133915927609e-05, + "loss": 0.9451, + "step": 45590 + }, + { + "epoch": 0.65, + "grad_norm": 0.490234375, + "learning_rate": 6.447793464355279e-05, + "loss": 0.986, + "step": 45595 + }, + { + "epoch": 0.65, + "grad_norm": 0.58203125, + "learning_rate": 6.445453235439299e-05, + "loss": 0.9617, + "step": 45600 + }, + { + "epoch": 0.65, + "grad_norm": 0.6015625, + "learning_rate": 6.44311322932636e-05, + "loss": 0.7838, + "step": 45605 + }, + { + "epoch": 0.65, + "grad_norm": 0.6953125, + "learning_rate": 6.440773446163135e-05, + "loss": 1.0951, + "step": 45610 + }, + { + "epoch": 0.65, + "grad_norm": 0.56640625, + "learning_rate": 6.438433886096283e-05, + "loss": 0.9337, + "step": 45615 + }, + { + "epoch": 0.65, + "grad_norm": 0.61328125, + "learning_rate": 6.436094549272457e-05, + "loss": 1.0079, + "step": 45620 + }, + { + "epoch": 0.65, + "grad_norm": 0.51171875, + "learning_rate": 6.433755435838277e-05, + "loss": 0.937, + "step": 45625 + }, + { + "epoch": 0.65, + "grad_norm": 0.65234375, + "learning_rate": 6.43141654594037e-05, + "loss": 0.85, + "step": 45630 + }, + { + "epoch": 0.65, + "grad_norm": 0.5859375, + "learning_rate": 6.429077879725338e-05, + "loss": 1.2354, + "step": 45635 + }, + { + "epoch": 0.65, + "grad_norm": 0.5625, + "learning_rate": 6.42673943733977e-05, + "loss": 1.0328, + "step": 45640 + }, + { + "epoch": 0.65, + "grad_norm": 0.4765625, + "learning_rate": 6.424401218930245e-05, + "loss": 0.9628, + "step": 45645 + }, + { + "epoch": 0.65, + "grad_norm": 0.625, + "learning_rate": 6.422063224643325e-05, + "loss": 1.0466, + "step": 45650 + }, + { + "epoch": 0.65, + "grad_norm": 0.58984375, + "learning_rate": 6.419725454625554e-05, + "loss": 1.0123, + "step": 45655 + }, + { + "epoch": 0.65, + "grad_norm": 0.515625, + "learning_rate": 6.417387909023471e-05, + "loss": 0.8303, + "step": 45660 + }, + { + "epoch": 0.66, + "grad_norm": 0.4765625, + "learning_rate": 6.415050587983593e-05, + "loss": 0.9342, + "step": 45665 + }, + { + "epoch": 0.66, + "grad_norm": 0.5078125, + "learning_rate": 6.412713491652427e-05, + "loss": 0.8515, + "step": 45670 + }, + { + "epoch": 0.66, + "grad_norm": 0.5703125, + "learning_rate": 6.410376620176468e-05, + "loss": 0.9153, + "step": 45675 + }, + { + "epoch": 0.66, + "grad_norm": 0.62890625, + "learning_rate": 6.40803997370219e-05, + "loss": 0.895, + "step": 45680 + }, + { + "epoch": 0.66, + "grad_norm": 0.55078125, + "learning_rate": 6.405703552376057e-05, + "loss": 0.9686, + "step": 45685 + }, + { + "epoch": 0.66, + "grad_norm": 0.486328125, + "learning_rate": 6.403367356344517e-05, + "loss": 0.9334, + "step": 45690 + }, + { + "epoch": 0.66, + "grad_norm": 0.5703125, + "learning_rate": 6.401031385754006e-05, + "loss": 0.9136, + "step": 45695 + }, + { + "epoch": 0.66, + "grad_norm": 0.5546875, + "learning_rate": 6.39869564075095e-05, + "loss": 1.0172, + "step": 45700 + }, + { + "epoch": 0.66, + "grad_norm": 0.61328125, + "learning_rate": 6.396360121481752e-05, + "loss": 1.1214, + "step": 45705 + }, + { + "epoch": 0.66, + "grad_norm": 0.53515625, + "learning_rate": 6.394024828092804e-05, + "loss": 0.891, + "step": 45710 + }, + { + "epoch": 0.66, + "grad_norm": 0.486328125, + "learning_rate": 6.391689760730488e-05, + "loss": 0.8239, + "step": 45715 + }, + { + "epoch": 0.66, + "grad_norm": 0.578125, + "learning_rate": 6.389354919541169e-05, + "loss": 0.9533, + "step": 45720 + }, + { + "epoch": 0.66, + "grad_norm": 0.55078125, + "learning_rate": 6.387020304671197e-05, + "loss": 0.9281, + "step": 45725 + }, + { + "epoch": 0.66, + "grad_norm": 0.671875, + "learning_rate": 6.384685916266901e-05, + "loss": 0.9008, + "step": 45730 + }, + { + "epoch": 0.66, + "grad_norm": 0.6484375, + "learning_rate": 6.382351754474614e-05, + "loss": 0.8932, + "step": 45735 + }, + { + "epoch": 0.66, + "grad_norm": 0.51171875, + "learning_rate": 6.380017819440638e-05, + "loss": 0.8201, + "step": 45740 + }, + { + "epoch": 0.66, + "grad_norm": 0.5546875, + "learning_rate": 6.377684111311267e-05, + "loss": 0.9648, + "step": 45745 + }, + { + "epoch": 0.66, + "grad_norm": 0.5234375, + "learning_rate": 6.375350630232782e-05, + "loss": 0.916, + "step": 45750 + }, + { + "epoch": 0.66, + "grad_norm": 0.482421875, + "learning_rate": 6.373017376351447e-05, + "loss": 0.9437, + "step": 45755 + }, + { + "epoch": 0.66, + "grad_norm": 0.52734375, + "learning_rate": 6.370684349813515e-05, + "loss": 0.894, + "step": 45760 + }, + { + "epoch": 0.66, + "grad_norm": 0.5390625, + "learning_rate": 6.36835155076522e-05, + "loss": 1.1337, + "step": 45765 + }, + { + "epoch": 0.66, + "grad_norm": 0.5703125, + "learning_rate": 6.366018979352786e-05, + "loss": 1.0193, + "step": 45770 + }, + { + "epoch": 0.66, + "grad_norm": 0.6328125, + "learning_rate": 6.363686635722421e-05, + "loss": 1.1689, + "step": 45775 + }, + { + "epoch": 0.66, + "grad_norm": 0.52734375, + "learning_rate": 6.361354520020324e-05, + "loss": 1.0801, + "step": 45780 + }, + { + "epoch": 0.66, + "grad_norm": 0.53515625, + "learning_rate": 6.359022632392663e-05, + "loss": 0.8107, + "step": 45785 + }, + { + "epoch": 0.66, + "grad_norm": 0.51953125, + "learning_rate": 6.356690972985612e-05, + "loss": 0.9673, + "step": 45790 + }, + { + "epoch": 0.66, + "grad_norm": 0.52734375, + "learning_rate": 6.354359541945323e-05, + "loss": 0.963, + "step": 45795 + }, + { + "epoch": 0.66, + "grad_norm": 0.671875, + "learning_rate": 6.352028339417926e-05, + "loss": 0.7841, + "step": 45800 + }, + { + "epoch": 0.66, + "grad_norm": 0.64453125, + "learning_rate": 6.349697365549549e-05, + "loss": 1.0819, + "step": 45805 + }, + { + "epoch": 0.66, + "grad_norm": 0.59765625, + "learning_rate": 6.3473666204863e-05, + "loss": 1.0213, + "step": 45810 + }, + { + "epoch": 0.66, + "grad_norm": 0.60546875, + "learning_rate": 6.345036104374267e-05, + "loss": 0.9277, + "step": 45815 + }, + { + "epoch": 0.66, + "grad_norm": 0.52734375, + "learning_rate": 6.342705817359538e-05, + "loss": 1.005, + "step": 45820 + }, + { + "epoch": 0.66, + "grad_norm": 0.578125, + "learning_rate": 6.340375759588173e-05, + "loss": 1.0071, + "step": 45825 + }, + { + "epoch": 0.66, + "grad_norm": 0.52734375, + "learning_rate": 6.338045931206224e-05, + "loss": 0.9861, + "step": 45830 + }, + { + "epoch": 0.66, + "grad_norm": 0.5390625, + "learning_rate": 6.335716332359725e-05, + "loss": 0.9195, + "step": 45835 + }, + { + "epoch": 0.66, + "grad_norm": 0.58203125, + "learning_rate": 6.333386963194699e-05, + "loss": 1.1381, + "step": 45840 + }, + { + "epoch": 0.66, + "grad_norm": 0.58203125, + "learning_rate": 6.331057823857156e-05, + "loss": 0.8934, + "step": 45845 + }, + { + "epoch": 0.66, + "grad_norm": 0.5390625, + "learning_rate": 6.328728914493085e-05, + "loss": 0.8748, + "step": 45850 + }, + { + "epoch": 0.66, + "grad_norm": 0.51953125, + "learning_rate": 6.326400235248466e-05, + "loss": 0.8566, + "step": 45855 + }, + { + "epoch": 0.66, + "grad_norm": 0.50390625, + "learning_rate": 6.324071786269268e-05, + "loss": 0.8186, + "step": 45860 + }, + { + "epoch": 0.66, + "grad_norm": 0.57421875, + "learning_rate": 6.321743567701435e-05, + "loss": 1.1196, + "step": 45865 + }, + { + "epoch": 0.66, + "grad_norm": 0.71875, + "learning_rate": 6.319415579690902e-05, + "loss": 0.9681, + "step": 45870 + }, + { + "epoch": 0.66, + "grad_norm": 0.51953125, + "learning_rate": 6.317087822383596e-05, + "loss": 0.8658, + "step": 45875 + }, + { + "epoch": 0.66, + "grad_norm": 0.5234375, + "learning_rate": 6.314760295925418e-05, + "loss": 1.0102, + "step": 45880 + }, + { + "epoch": 0.66, + "grad_norm": 0.5390625, + "learning_rate": 6.312433000462266e-05, + "loss": 1.0275, + "step": 45885 + }, + { + "epoch": 0.66, + "grad_norm": 0.56640625, + "learning_rate": 6.310105936140009e-05, + "loss": 1.1024, + "step": 45890 + }, + { + "epoch": 0.66, + "grad_norm": 0.546875, + "learning_rate": 6.307779103104513e-05, + "loss": 0.876, + "step": 45895 + }, + { + "epoch": 0.66, + "grad_norm": 0.498046875, + "learning_rate": 6.30545250150163e-05, + "loss": 0.8698, + "step": 45900 + }, + { + "epoch": 0.66, + "grad_norm": 0.53515625, + "learning_rate": 6.303126131477193e-05, + "loss": 0.8891, + "step": 45905 + }, + { + "epoch": 0.66, + "grad_norm": 0.65234375, + "learning_rate": 6.300799993177017e-05, + "loss": 0.8649, + "step": 45910 + }, + { + "epoch": 0.66, + "grad_norm": 0.65234375, + "learning_rate": 6.298474086746913e-05, + "loss": 0.9766, + "step": 45915 + }, + { + "epoch": 0.66, + "grad_norm": 0.53125, + "learning_rate": 6.29614841233267e-05, + "loss": 0.9994, + "step": 45920 + }, + { + "epoch": 0.66, + "grad_norm": 0.58203125, + "learning_rate": 6.293822970080059e-05, + "loss": 0.9491, + "step": 45925 + }, + { + "epoch": 0.66, + "grad_norm": 0.59375, + "learning_rate": 6.291497760134848e-05, + "loss": 0.8919, + "step": 45930 + }, + { + "epoch": 0.66, + "grad_norm": 0.49609375, + "learning_rate": 6.289172782642782e-05, + "loss": 1.018, + "step": 45935 + }, + { + "epoch": 0.66, + "grad_norm": 0.5078125, + "learning_rate": 6.286848037749593e-05, + "loss": 0.9458, + "step": 45940 + }, + { + "epoch": 0.66, + "grad_norm": 0.51953125, + "learning_rate": 6.284523525600996e-05, + "loss": 0.8374, + "step": 45945 + }, + { + "epoch": 0.66, + "grad_norm": 0.58203125, + "learning_rate": 6.282199246342694e-05, + "loss": 0.9878, + "step": 45950 + }, + { + "epoch": 0.66, + "grad_norm": 0.578125, + "learning_rate": 6.27987520012038e-05, + "loss": 0.874, + "step": 45955 + }, + { + "epoch": 0.66, + "grad_norm": 0.5546875, + "learning_rate": 6.277551387079725e-05, + "loss": 0.9338, + "step": 45960 + }, + { + "epoch": 0.66, + "grad_norm": 0.55078125, + "learning_rate": 6.275227807366387e-05, + "loss": 0.95, + "step": 45965 + }, + { + "epoch": 0.66, + "grad_norm": 0.625, + "learning_rate": 6.272904461126012e-05, + "loss": 0.9569, + "step": 45970 + }, + { + "epoch": 0.66, + "grad_norm": 0.55859375, + "learning_rate": 6.270581348504233e-05, + "loss": 0.9139, + "step": 45975 + }, + { + "epoch": 0.66, + "grad_norm": 0.494140625, + "learning_rate": 6.26825846964666e-05, + "loss": 1.0085, + "step": 45980 + }, + { + "epoch": 0.66, + "grad_norm": 0.5625, + "learning_rate": 6.265935824698897e-05, + "loss": 1.03, + "step": 45985 + }, + { + "epoch": 0.66, + "grad_norm": 0.59375, + "learning_rate": 6.263613413806532e-05, + "loss": 0.9165, + "step": 45990 + }, + { + "epoch": 0.66, + "grad_norm": 0.609375, + "learning_rate": 6.261291237115132e-05, + "loss": 1.1229, + "step": 45995 + }, + { + "epoch": 0.66, + "grad_norm": 0.54296875, + "learning_rate": 6.258969294770255e-05, + "loss": 0.9052, + "step": 46000 + }, + { + "epoch": 0.66, + "grad_norm": 0.61328125, + "learning_rate": 6.256647586917441e-05, + "loss": 0.9974, + "step": 46005 + }, + { + "epoch": 0.66, + "grad_norm": 0.50390625, + "learning_rate": 6.254326113702222e-05, + "loss": 0.8159, + "step": 46010 + }, + { + "epoch": 0.66, + "grad_norm": 0.5625, + "learning_rate": 6.252004875270107e-05, + "loss": 0.9784, + "step": 46015 + }, + { + "epoch": 0.66, + "grad_norm": 0.50390625, + "learning_rate": 6.249683871766594e-05, + "loss": 0.8097, + "step": 46020 + }, + { + "epoch": 0.66, + "grad_norm": 0.5546875, + "learning_rate": 6.24736310333717e-05, + "loss": 1.0112, + "step": 46025 + }, + { + "epoch": 0.66, + "grad_norm": 0.5859375, + "learning_rate": 6.245042570127299e-05, + "loss": 0.9121, + "step": 46030 + }, + { + "epoch": 0.66, + "grad_norm": 0.51953125, + "learning_rate": 6.242722272282436e-05, + "loss": 0.9148, + "step": 46035 + }, + { + "epoch": 0.66, + "grad_norm": 0.60546875, + "learning_rate": 6.240402209948024e-05, + "loss": 1.0444, + "step": 46040 + }, + { + "epoch": 0.66, + "grad_norm": 0.5078125, + "learning_rate": 6.23808238326948e-05, + "loss": 0.8895, + "step": 46045 + }, + { + "epoch": 0.66, + "grad_norm": 0.5390625, + "learning_rate": 6.235762792392215e-05, + "loss": 0.839, + "step": 46050 + }, + { + "epoch": 0.66, + "grad_norm": 0.5625, + "learning_rate": 6.233443437461628e-05, + "loss": 1.0243, + "step": 46055 + }, + { + "epoch": 0.66, + "grad_norm": 0.52734375, + "learning_rate": 6.231124318623094e-05, + "loss": 0.81, + "step": 46060 + }, + { + "epoch": 0.66, + "grad_norm": 0.6484375, + "learning_rate": 6.22880543602198e-05, + "loss": 1.1031, + "step": 46065 + }, + { + "epoch": 0.66, + "grad_norm": 0.62890625, + "learning_rate": 6.226486789803638e-05, + "loss": 0.9281, + "step": 46070 + }, + { + "epoch": 0.66, + "grad_norm": 0.54296875, + "learning_rate": 6.2241683801134e-05, + "loss": 1.0277, + "step": 46075 + }, + { + "epoch": 0.66, + "grad_norm": 0.51171875, + "learning_rate": 6.221850207096589e-05, + "loss": 0.9444, + "step": 46080 + }, + { + "epoch": 0.66, + "grad_norm": 0.5703125, + "learning_rate": 6.219532270898511e-05, + "loss": 0.9583, + "step": 46085 + }, + { + "epoch": 0.66, + "grad_norm": 0.439453125, + "learning_rate": 6.217214571664453e-05, + "loss": 0.7989, + "step": 46090 + }, + { + "epoch": 0.66, + "grad_norm": 0.65234375, + "learning_rate": 6.214897109539701e-05, + "loss": 1.0164, + "step": 46095 + }, + { + "epoch": 0.66, + "grad_norm": 0.58984375, + "learning_rate": 6.212579884669503e-05, + "loss": 0.9877, + "step": 46100 + }, + { + "epoch": 0.66, + "grad_norm": 0.52734375, + "learning_rate": 6.210262897199113e-05, + "loss": 0.8748, + "step": 46105 + }, + { + "epoch": 0.66, + "grad_norm": 0.5859375, + "learning_rate": 6.20794614727376e-05, + "loss": 1.0434, + "step": 46110 + }, + { + "epoch": 0.66, + "grad_norm": 0.66796875, + "learning_rate": 6.20562963503866e-05, + "loss": 1.0184, + "step": 46115 + }, + { + "epoch": 0.66, + "grad_norm": 0.65234375, + "learning_rate": 6.203313360639019e-05, + "loss": 0.9778, + "step": 46120 + }, + { + "epoch": 0.66, + "grad_norm": 0.62890625, + "learning_rate": 6.200997324220018e-05, + "loss": 1.0332, + "step": 46125 + }, + { + "epoch": 0.66, + "grad_norm": 0.5546875, + "learning_rate": 6.198681525926831e-05, + "loss": 0.9329, + "step": 46130 + }, + { + "epoch": 0.66, + "grad_norm": 0.515625, + "learning_rate": 6.196365965904617e-05, + "loss": 0.8952, + "step": 46135 + }, + { + "epoch": 0.66, + "grad_norm": 0.625, + "learning_rate": 6.194050644298517e-05, + "loss": 0.8317, + "step": 46140 + }, + { + "epoch": 0.66, + "grad_norm": 0.52734375, + "learning_rate": 6.191735561253658e-05, + "loss": 1.0471, + "step": 46145 + }, + { + "epoch": 0.66, + "grad_norm": 0.51171875, + "learning_rate": 6.189420716915149e-05, + "loss": 1.0214, + "step": 46150 + }, + { + "epoch": 0.66, + "grad_norm": 0.56640625, + "learning_rate": 6.187106111428089e-05, + "loss": 1.022, + "step": 46155 + }, + { + "epoch": 0.66, + "grad_norm": 0.5546875, + "learning_rate": 6.184791744937562e-05, + "loss": 0.9737, + "step": 46160 + }, + { + "epoch": 0.66, + "grad_norm": 0.765625, + "learning_rate": 6.182477617588634e-05, + "loss": 0.9934, + "step": 46165 + }, + { + "epoch": 0.66, + "grad_norm": 0.58984375, + "learning_rate": 6.180163729526353e-05, + "loss": 0.9681, + "step": 46170 + }, + { + "epoch": 0.66, + "grad_norm": 0.578125, + "learning_rate": 6.177850080895764e-05, + "loss": 0.9406, + "step": 46175 + }, + { + "epoch": 0.66, + "grad_norm": 0.58984375, + "learning_rate": 6.175536671841882e-05, + "loss": 0.8751, + "step": 46180 + }, + { + "epoch": 0.66, + "grad_norm": 0.498046875, + "learning_rate": 6.173223502509717e-05, + "loss": 0.8101, + "step": 46185 + }, + { + "epoch": 0.66, + "grad_norm": 0.53515625, + "learning_rate": 6.170910573044265e-05, + "loss": 0.8607, + "step": 46190 + }, + { + "epoch": 0.66, + "grad_norm": 0.578125, + "learning_rate": 6.168597883590495e-05, + "loss": 0.954, + "step": 46195 + }, + { + "epoch": 0.66, + "grad_norm": 0.53515625, + "learning_rate": 6.166285434293378e-05, + "loss": 0.9483, + "step": 46200 + }, + { + "epoch": 0.66, + "grad_norm": 0.494140625, + "learning_rate": 6.163973225297855e-05, + "loss": 1.0113, + "step": 46205 + }, + { + "epoch": 0.66, + "grad_norm": 0.486328125, + "learning_rate": 6.161661256748856e-05, + "loss": 0.9647, + "step": 46210 + }, + { + "epoch": 0.66, + "grad_norm": 0.53125, + "learning_rate": 6.159349528791302e-05, + "loss": 1.0275, + "step": 46215 + }, + { + "epoch": 0.66, + "grad_norm": 0.5390625, + "learning_rate": 6.157038041570094e-05, + "loss": 0.9002, + "step": 46220 + }, + { + "epoch": 0.66, + "grad_norm": 0.60546875, + "learning_rate": 6.154726795230117e-05, + "loss": 1.0381, + "step": 46225 + }, + { + "epoch": 0.66, + "grad_norm": 0.57421875, + "learning_rate": 6.152415789916246e-05, + "loss": 0.9387, + "step": 46230 + }, + { + "epoch": 0.66, + "grad_norm": 0.5859375, + "learning_rate": 6.150105025773335e-05, + "loss": 0.9686, + "step": 46235 + }, + { + "epoch": 0.66, + "grad_norm": 0.60546875, + "learning_rate": 6.147794502946223e-05, + "loss": 1.0323, + "step": 46240 + }, + { + "epoch": 0.66, + "grad_norm": 0.60546875, + "learning_rate": 6.145484221579743e-05, + "loss": 0.8602, + "step": 46245 + }, + { + "epoch": 0.66, + "grad_norm": 0.5625, + "learning_rate": 6.143174181818701e-05, + "loss": 0.866, + "step": 46250 + }, + { + "epoch": 0.66, + "grad_norm": 0.61328125, + "learning_rate": 6.140864383807894e-05, + "loss": 1.0404, + "step": 46255 + }, + { + "epoch": 0.66, + "grad_norm": 0.5703125, + "learning_rate": 6.138554827692103e-05, + "loss": 1.0325, + "step": 46260 + }, + { + "epoch": 0.66, + "grad_norm": 0.65234375, + "learning_rate": 6.13624551361609e-05, + "loss": 1.0028, + "step": 46265 + }, + { + "epoch": 0.66, + "grad_norm": 0.52734375, + "learning_rate": 6.13393644172461e-05, + "loss": 0.7928, + "step": 46270 + }, + { + "epoch": 0.66, + "grad_norm": 0.5234375, + "learning_rate": 6.131627612162397e-05, + "loss": 1.0011, + "step": 46275 + }, + { + "epoch": 0.66, + "grad_norm": 0.51171875, + "learning_rate": 6.12931902507417e-05, + "loss": 0.9153, + "step": 46280 + }, + { + "epoch": 0.66, + "grad_norm": 0.56640625, + "learning_rate": 6.127010680604636e-05, + "loss": 0.9783, + "step": 46285 + }, + { + "epoch": 0.66, + "grad_norm": 0.59765625, + "learning_rate": 6.124702578898484e-05, + "loss": 1.0021, + "step": 46290 + }, + { + "epoch": 0.66, + "grad_norm": 0.65234375, + "learning_rate": 6.122394720100386e-05, + "loss": 0.9419, + "step": 46295 + }, + { + "epoch": 0.66, + "grad_norm": 0.6796875, + "learning_rate": 6.120087104355006e-05, + "loss": 0.9133, + "step": 46300 + }, + { + "epoch": 0.66, + "grad_norm": 0.482421875, + "learning_rate": 6.117779731806986e-05, + "loss": 0.9986, + "step": 46305 + }, + { + "epoch": 0.66, + "grad_norm": 0.5859375, + "learning_rate": 6.115472602600951e-05, + "loss": 0.9771, + "step": 46310 + }, + { + "epoch": 0.66, + "grad_norm": 0.67578125, + "learning_rate": 6.11316571688152e-05, + "loss": 1.0309, + "step": 46315 + }, + { + "epoch": 0.66, + "grad_norm": 0.53515625, + "learning_rate": 6.110859074793284e-05, + "loss": 0.9542, + "step": 46320 + }, + { + "epoch": 0.66, + "grad_norm": 0.5078125, + "learning_rate": 6.108552676480834e-05, + "loss": 0.8965, + "step": 46325 + }, + { + "epoch": 0.66, + "grad_norm": 0.578125, + "learning_rate": 6.106246522088735e-05, + "loss": 0.9091, + "step": 46330 + }, + { + "epoch": 0.66, + "grad_norm": 0.5859375, + "learning_rate": 6.103940611761535e-05, + "loss": 0.9457, + "step": 46335 + }, + { + "epoch": 0.66, + "grad_norm": 0.5546875, + "learning_rate": 6.101634945643777e-05, + "loss": 1.0486, + "step": 46340 + }, + { + "epoch": 0.66, + "grad_norm": 0.578125, + "learning_rate": 6.099329523879981e-05, + "loss": 1.0533, + "step": 46345 + }, + { + "epoch": 0.66, + "grad_norm": 0.53125, + "learning_rate": 6.09702434661465e-05, + "loss": 1.1038, + "step": 46350 + }, + { + "epoch": 0.66, + "grad_norm": 0.55859375, + "learning_rate": 6.0947194139922824e-05, + "loss": 0.8647, + "step": 46355 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 6.092414726157348e-05, + "loss": 0.9562, + "step": 46360 + }, + { + "epoch": 0.67, + "grad_norm": 0.478515625, + "learning_rate": 6.090110283254309e-05, + "loss": 0.9561, + "step": 46365 + }, + { + "epoch": 0.67, + "grad_norm": 0.53515625, + "learning_rate": 6.087806085427611e-05, + "loss": 0.8962, + "step": 46370 + }, + { + "epoch": 0.67, + "grad_norm": 0.486328125, + "learning_rate": 6.08550213282168e-05, + "loss": 0.8846, + "step": 46375 + }, + { + "epoch": 0.67, + "grad_norm": 0.5859375, + "learning_rate": 6.083198425580936e-05, + "loss": 0.8637, + "step": 46380 + }, + { + "epoch": 0.67, + "grad_norm": 0.52734375, + "learning_rate": 6.080894963849776e-05, + "loss": 0.907, + "step": 46385 + }, + { + "epoch": 0.67, + "grad_norm": 0.70703125, + "learning_rate": 6.0785917477725806e-05, + "loss": 1.0066, + "step": 46390 + }, + { + "epoch": 0.67, + "grad_norm": 0.57421875, + "learning_rate": 6.076288777493723e-05, + "loss": 0.925, + "step": 46395 + }, + { + "epoch": 0.67, + "grad_norm": 0.48046875, + "learning_rate": 6.073986053157553e-05, + "loss": 0.9988, + "step": 46400 + }, + { + "epoch": 0.67, + "grad_norm": 0.59375, + "learning_rate": 6.071683574908407e-05, + "loss": 1.0245, + "step": 46405 + }, + { + "epoch": 0.67, + "grad_norm": 0.5390625, + "learning_rate": 6.0693813428906124e-05, + "loss": 1.0269, + "step": 46410 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 6.0670793572484696e-05, + "loss": 0.8969, + "step": 46415 + }, + { + "epoch": 0.67, + "grad_norm": 0.58984375, + "learning_rate": 6.064777618126272e-05, + "loss": 0.8421, + "step": 46420 + }, + { + "epoch": 0.67, + "grad_norm": 0.53125, + "learning_rate": 6.062476125668293e-05, + "loss": 1.0216, + "step": 46425 + }, + { + "epoch": 0.67, + "grad_norm": 0.58984375, + "learning_rate": 6.060174880018798e-05, + "loss": 1.041, + "step": 46430 + }, + { + "epoch": 0.67, + "grad_norm": 0.478515625, + "learning_rate": 6.0578738813220273e-05, + "loss": 0.9485, + "step": 46435 + }, + { + "epoch": 0.67, + "grad_norm": 0.59375, + "learning_rate": 6.055573129722212e-05, + "loss": 1.0267, + "step": 46440 + }, + { + "epoch": 0.67, + "grad_norm": 0.5625, + "learning_rate": 6.053272625363562e-05, + "loss": 0.98, + "step": 46445 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 6.050972368390282e-05, + "loss": 0.994, + "step": 46450 + }, + { + "epoch": 0.67, + "grad_norm": 0.62109375, + "learning_rate": 6.048672358946552e-05, + "loss": 1.0051, + "step": 46455 + }, + { + "epoch": 0.67, + "grad_norm": 0.59765625, + "learning_rate": 6.0463725971765396e-05, + "loss": 1.0058, + "step": 46460 + }, + { + "epoch": 0.67, + "grad_norm": 0.5703125, + "learning_rate": 6.044073083224393e-05, + "loss": 0.9911, + "step": 46465 + }, + { + "epoch": 0.67, + "grad_norm": 0.5078125, + "learning_rate": 6.041773817234251e-05, + "loss": 0.908, + "step": 46470 + }, + { + "epoch": 0.67, + "grad_norm": 0.60546875, + "learning_rate": 6.0394747993502354e-05, + "loss": 1.0566, + "step": 46475 + }, + { + "epoch": 0.67, + "grad_norm": 0.515625, + "learning_rate": 6.037176029716448e-05, + "loss": 0.9758, + "step": 46480 + }, + { + "epoch": 0.67, + "grad_norm": 0.62890625, + "learning_rate": 6.034877508476982e-05, + "loss": 1.0319, + "step": 46485 + }, + { + "epoch": 0.67, + "grad_norm": 0.64453125, + "learning_rate": 6.0325792357759106e-05, + "loss": 0.855, + "step": 46490 + }, + { + "epoch": 0.67, + "grad_norm": 0.5859375, + "learning_rate": 6.0302812117572883e-05, + "loss": 0.963, + "step": 46495 + }, + { + "epoch": 0.67, + "grad_norm": 0.5234375, + "learning_rate": 6.027983436565163e-05, + "loss": 0.9689, + "step": 46500 + }, + { + "epoch": 0.67, + "grad_norm": 0.53515625, + "learning_rate": 6.0256859103435595e-05, + "loss": 1.0157, + "step": 46505 + }, + { + "epoch": 0.67, + "grad_norm": 0.59375, + "learning_rate": 6.0233886332364894e-05, + "loss": 1.0156, + "step": 46510 + }, + { + "epoch": 0.67, + "grad_norm": 0.54296875, + "learning_rate": 6.0210916053879515e-05, + "loss": 1.0249, + "step": 46515 + }, + { + "epoch": 0.67, + "grad_norm": 0.5078125, + "learning_rate": 6.018794826941918e-05, + "loss": 0.9484, + "step": 46520 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 6.0164982980423636e-05, + "loss": 0.9495, + "step": 46525 + }, + { + "epoch": 0.67, + "grad_norm": 0.609375, + "learning_rate": 6.01420201883323e-05, + "loss": 1.0027, + "step": 46530 + }, + { + "epoch": 0.67, + "grad_norm": 0.60546875, + "learning_rate": 6.011905989458453e-05, + "loss": 0.967, + "step": 46535 + }, + { + "epoch": 0.67, + "grad_norm": 0.6640625, + "learning_rate": 6.009610210061951e-05, + "loss": 1.1281, + "step": 46540 + }, + { + "epoch": 0.67, + "grad_norm": 0.5, + "learning_rate": 6.0073146807876266e-05, + "loss": 0.8276, + "step": 46545 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 6.0050194017793635e-05, + "loss": 1.025, + "step": 46550 + }, + { + "epoch": 0.67, + "grad_norm": 0.5859375, + "learning_rate": 6.0027243731810355e-05, + "loss": 0.9618, + "step": 46555 + }, + { + "epoch": 0.67, + "grad_norm": 0.53125, + "learning_rate": 6.000429595136497e-05, + "loss": 1.0105, + "step": 46560 + }, + { + "epoch": 0.67, + "grad_norm": 0.5, + "learning_rate": 5.998135067789589e-05, + "loss": 0.8233, + "step": 46565 + }, + { + "epoch": 0.67, + "grad_norm": 0.53515625, + "learning_rate": 5.99584079128413e-05, + "loss": 0.9368, + "step": 46570 + }, + { + "epoch": 0.67, + "grad_norm": 0.58203125, + "learning_rate": 5.9935467657639286e-05, + "loss": 0.9314, + "step": 46575 + }, + { + "epoch": 0.67, + "grad_norm": 0.60546875, + "learning_rate": 5.99125299137278e-05, + "loss": 0.921, + "step": 46580 + }, + { + "epoch": 0.67, + "grad_norm": 0.61328125, + "learning_rate": 5.9889594682544604e-05, + "loss": 0.9308, + "step": 46585 + }, + { + "epoch": 0.67, + "grad_norm": 0.640625, + "learning_rate": 5.986666196552728e-05, + "loss": 1.0665, + "step": 46590 + }, + { + "epoch": 0.67, + "grad_norm": 0.5625, + "learning_rate": 5.984373176411331e-05, + "loss": 0.8952, + "step": 46595 + }, + { + "epoch": 0.67, + "grad_norm": 0.609375, + "learning_rate": 5.982080407973996e-05, + "loss": 0.8521, + "step": 46600 + }, + { + "epoch": 0.67, + "grad_norm": 0.55859375, + "learning_rate": 5.9797878913844364e-05, + "loss": 1.0417, + "step": 46605 + }, + { + "epoch": 0.67, + "grad_norm": 0.6171875, + "learning_rate": 5.977495626786351e-05, + "loss": 1.0472, + "step": 46610 + }, + { + "epoch": 0.67, + "grad_norm": 0.423828125, + "learning_rate": 5.975203614323421e-05, + "loss": 0.9462, + "step": 46615 + }, + { + "epoch": 0.67, + "grad_norm": 0.62109375, + "learning_rate": 5.9729118541393156e-05, + "loss": 1.0091, + "step": 46620 + }, + { + "epoch": 0.67, + "grad_norm": 0.53125, + "learning_rate": 5.970620346377678e-05, + "loss": 1.0154, + "step": 46625 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 5.968329091182145e-05, + "loss": 0.9673, + "step": 46630 + }, + { + "epoch": 0.67, + "grad_norm": 0.6484375, + "learning_rate": 5.966038088696338e-05, + "loss": 1.0245, + "step": 46635 + }, + { + "epoch": 0.67, + "grad_norm": 0.55078125, + "learning_rate": 5.963747339063859e-05, + "loss": 0.925, + "step": 46640 + }, + { + "epoch": 0.67, + "grad_norm": 0.50390625, + "learning_rate": 5.96145684242829e-05, + "loss": 0.8854, + "step": 46645 + }, + { + "epoch": 0.67, + "grad_norm": 0.578125, + "learning_rate": 5.959166598933209e-05, + "loss": 0.9141, + "step": 46650 + }, + { + "epoch": 0.67, + "grad_norm": 0.55859375, + "learning_rate": 5.956876608722167e-05, + "loss": 0.8664, + "step": 46655 + }, + { + "epoch": 0.67, + "grad_norm": 0.59765625, + "learning_rate": 5.954586871938702e-05, + "loss": 0.9962, + "step": 46660 + }, + { + "epoch": 0.67, + "grad_norm": 0.494140625, + "learning_rate": 5.952297388726342e-05, + "loss": 0.9225, + "step": 46665 + }, + { + "epoch": 0.67, + "grad_norm": 0.60546875, + "learning_rate": 5.950008159228593e-05, + "loss": 0.8407, + "step": 46670 + }, + { + "epoch": 0.67, + "grad_norm": 0.5703125, + "learning_rate": 5.9477191835889425e-05, + "loss": 1.0356, + "step": 46675 + }, + { + "epoch": 0.67, + "grad_norm": 0.5625, + "learning_rate": 5.945430461950869e-05, + "loss": 1.0125, + "step": 46680 + }, + { + "epoch": 0.67, + "grad_norm": 0.64453125, + "learning_rate": 5.9431419944578305e-05, + "loss": 1.1157, + "step": 46685 + }, + { + "epoch": 0.67, + "grad_norm": 0.56640625, + "learning_rate": 5.940853781253274e-05, + "loss": 1.0508, + "step": 46690 + }, + { + "epoch": 0.67, + "grad_norm": 0.71484375, + "learning_rate": 5.938565822480625e-05, + "loss": 1.0621, + "step": 46695 + }, + { + "epoch": 0.67, + "grad_norm": 0.61328125, + "learning_rate": 5.936278118283294e-05, + "loss": 1.008, + "step": 46700 + }, + { + "epoch": 0.67, + "grad_norm": 0.53515625, + "learning_rate": 5.933990668804681e-05, + "loss": 0.9433, + "step": 46705 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 5.931703474188164e-05, + "loss": 0.9275, + "step": 46710 + }, + { + "epoch": 0.67, + "grad_norm": 0.51171875, + "learning_rate": 5.9294165345771036e-05, + "loss": 0.9586, + "step": 46715 + }, + { + "epoch": 0.67, + "grad_norm": 0.515625, + "learning_rate": 5.9271298501148545e-05, + "loss": 0.8778, + "step": 46720 + }, + { + "epoch": 0.67, + "grad_norm": 0.55078125, + "learning_rate": 5.9248434209447456e-05, + "loss": 0.9347, + "step": 46725 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 5.9225572472100895e-05, + "loss": 1.152, + "step": 46730 + }, + { + "epoch": 0.67, + "grad_norm": 0.63671875, + "learning_rate": 5.92027132905419e-05, + "loss": 0.9424, + "step": 46735 + }, + { + "epoch": 0.67, + "grad_norm": 0.69140625, + "learning_rate": 5.917985666620329e-05, + "loss": 1.0837, + "step": 46740 + }, + { + "epoch": 0.67, + "grad_norm": 0.5625, + "learning_rate": 5.9157002600517766e-05, + "loss": 0.9528, + "step": 46745 + }, + { + "epoch": 0.67, + "grad_norm": 0.62109375, + "learning_rate": 5.9134151094917834e-05, + "loss": 0.9801, + "step": 46750 + }, + { + "epoch": 0.67, + "grad_norm": 0.5390625, + "learning_rate": 5.9111302150835836e-05, + "loss": 0.9547, + "step": 46755 + }, + { + "epoch": 0.67, + "grad_norm": 0.53515625, + "learning_rate": 5.908845576970401e-05, + "loss": 0.96, + "step": 46760 + }, + { + "epoch": 0.67, + "grad_norm": 0.46875, + "learning_rate": 5.906561195295436e-05, + "loss": 0.7969, + "step": 46765 + }, + { + "epoch": 0.67, + "grad_norm": 0.51953125, + "learning_rate": 5.904277070201876e-05, + "loss": 0.9844, + "step": 46770 + }, + { + "epoch": 0.67, + "grad_norm": 0.484375, + "learning_rate": 5.901993201832901e-05, + "loss": 0.9173, + "step": 46775 + }, + { + "epoch": 0.67, + "grad_norm": 0.53125, + "learning_rate": 5.89970959033165e-05, + "loss": 0.8882, + "step": 46780 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 5.897426235841277e-05, + "loss": 0.9216, + "step": 46785 + }, + { + "epoch": 0.67, + "grad_norm": 0.5625, + "learning_rate": 5.895143138504899e-05, + "loss": 0.963, + "step": 46790 + }, + { + "epoch": 0.67, + "grad_norm": 0.478515625, + "learning_rate": 5.8928602984656213e-05, + "loss": 0.9357, + "step": 46795 + }, + { + "epoch": 0.67, + "grad_norm": 0.53515625, + "learning_rate": 5.890577715866541e-05, + "loss": 0.7911, + "step": 46800 + }, + { + "epoch": 0.67, + "grad_norm": 0.46484375, + "learning_rate": 5.888295390850729e-05, + "loss": 0.9172, + "step": 46805 + }, + { + "epoch": 0.67, + "grad_norm": 0.58203125, + "learning_rate": 5.886013323561244e-05, + "loss": 0.8876, + "step": 46810 + }, + { + "epoch": 0.67, + "grad_norm": 0.55859375, + "learning_rate": 5.8837315141411284e-05, + "loss": 0.9875, + "step": 46815 + }, + { + "epoch": 0.67, + "grad_norm": 0.52734375, + "learning_rate": 5.881449962733412e-05, + "loss": 0.8395, + "step": 46820 + }, + { + "epoch": 0.67, + "grad_norm": 0.5625, + "learning_rate": 5.8791686694811e-05, + "loss": 1.0331, + "step": 46825 + }, + { + "epoch": 0.67, + "grad_norm": 0.5546875, + "learning_rate": 5.876887634527195e-05, + "loss": 1.0062, + "step": 46830 + }, + { + "epoch": 0.67, + "grad_norm": 0.5859375, + "learning_rate": 5.874606858014662e-05, + "loss": 1.0152, + "step": 46835 + }, + { + "epoch": 0.67, + "grad_norm": 0.7109375, + "learning_rate": 5.872326340086474e-05, + "loss": 0.887, + "step": 46840 + }, + { + "epoch": 0.67, + "grad_norm": 0.59375, + "learning_rate": 5.87004608088557e-05, + "loss": 0.7325, + "step": 46845 + }, + { + "epoch": 0.67, + "grad_norm": 0.6015625, + "learning_rate": 5.86776608055488e-05, + "loss": 1.0844, + "step": 46850 + }, + { + "epoch": 0.67, + "grad_norm": 0.54296875, + "learning_rate": 5.8654863392373185e-05, + "loss": 0.9183, + "step": 46855 + }, + { + "epoch": 0.67, + "grad_norm": 0.671875, + "learning_rate": 5.863206857075785e-05, + "loss": 0.9098, + "step": 46860 + }, + { + "epoch": 0.67, + "grad_norm": 0.60546875, + "learning_rate": 5.860927634213154e-05, + "loss": 0.9646, + "step": 46865 + }, + { + "epoch": 0.67, + "grad_norm": 0.609375, + "learning_rate": 5.8586486707922924e-05, + "loss": 0.9475, + "step": 46870 + }, + { + "epoch": 0.67, + "grad_norm": 0.578125, + "learning_rate": 5.8563699669560524e-05, + "loss": 0.94, + "step": 46875 + }, + { + "epoch": 0.67, + "grad_norm": 0.5859375, + "learning_rate": 5.8540915228472624e-05, + "loss": 0.9383, + "step": 46880 + }, + { + "epoch": 0.67, + "grad_norm": 0.52734375, + "learning_rate": 5.8518133386087356e-05, + "loss": 1.0302, + "step": 46885 + }, + { + "epoch": 0.67, + "grad_norm": 0.6796875, + "learning_rate": 5.8495354143832716e-05, + "loss": 0.9661, + "step": 46890 + }, + { + "epoch": 0.67, + "grad_norm": 0.53515625, + "learning_rate": 5.847257750313656e-05, + "loss": 1.0525, + "step": 46895 + }, + { + "epoch": 0.67, + "grad_norm": 0.578125, + "learning_rate": 5.8449803465426545e-05, + "loss": 0.8351, + "step": 46900 + }, + { + "epoch": 0.67, + "grad_norm": 0.5234375, + "learning_rate": 5.842703203213016e-05, + "loss": 0.992, + "step": 46905 + }, + { + "epoch": 0.67, + "grad_norm": 0.5625, + "learning_rate": 5.8404263204674756e-05, + "loss": 1.0304, + "step": 46910 + }, + { + "epoch": 0.67, + "grad_norm": 0.50390625, + "learning_rate": 5.8381496984487495e-05, + "loss": 0.7929, + "step": 46915 + }, + { + "epoch": 0.67, + "grad_norm": 0.671875, + "learning_rate": 5.835873337299544e-05, + "loss": 1.0232, + "step": 46920 + }, + { + "epoch": 0.67, + "grad_norm": 0.62890625, + "learning_rate": 5.833597237162538e-05, + "loss": 0.952, + "step": 46925 + }, + { + "epoch": 0.67, + "grad_norm": 0.58203125, + "learning_rate": 5.8313213981804005e-05, + "loss": 0.9321, + "step": 46930 + }, + { + "epoch": 0.67, + "grad_norm": 0.54296875, + "learning_rate": 5.829045820495791e-05, + "loss": 0.8802, + "step": 46935 + }, + { + "epoch": 0.67, + "grad_norm": 0.55859375, + "learning_rate": 5.826770504251339e-05, + "loss": 1.1071, + "step": 46940 + }, + { + "epoch": 0.67, + "grad_norm": 0.65234375, + "learning_rate": 5.82449544958966e-05, + "loss": 1.0217, + "step": 46945 + }, + { + "epoch": 0.67, + "grad_norm": 0.48046875, + "learning_rate": 5.822220656653363e-05, + "loss": 0.891, + "step": 46950 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 5.8199461255850365e-05, + "loss": 0.9092, + "step": 46955 + }, + { + "epoch": 0.67, + "grad_norm": 0.53515625, + "learning_rate": 5.8176718565272436e-05, + "loss": 1.0907, + "step": 46960 + }, + { + "epoch": 0.67, + "grad_norm": 0.7109375, + "learning_rate": 5.8153978496225415e-05, + "loss": 0.9233, + "step": 46965 + }, + { + "epoch": 0.67, + "grad_norm": 0.6171875, + "learning_rate": 5.813124105013473e-05, + "loss": 0.962, + "step": 46970 + }, + { + "epoch": 0.67, + "grad_norm": 0.5390625, + "learning_rate": 5.81085062284255e-05, + "loss": 0.9517, + "step": 46975 + }, + { + "epoch": 0.67, + "grad_norm": 0.66796875, + "learning_rate": 5.8085774032522814e-05, + "loss": 0.919, + "step": 46980 + }, + { + "epoch": 0.67, + "grad_norm": 0.462890625, + "learning_rate": 5.8063044463851623e-05, + "loss": 0.8406, + "step": 46985 + }, + { + "epoch": 0.67, + "grad_norm": 0.52734375, + "learning_rate": 5.8040317523836475e-05, + "loss": 0.9927, + "step": 46990 + }, + { + "epoch": 0.67, + "grad_norm": 0.54296875, + "learning_rate": 5.8017593213902036e-05, + "loss": 0.8488, + "step": 46995 + }, + { + "epoch": 0.67, + "grad_norm": 0.52734375, + "learning_rate": 5.7994871535472684e-05, + "loss": 0.9303, + "step": 47000 + }, + { + "epoch": 0.67, + "grad_norm": 0.5703125, + "learning_rate": 5.79721524899726e-05, + "loss": 0.9435, + "step": 47005 + }, + { + "epoch": 0.67, + "grad_norm": 0.5625, + "learning_rate": 5.794943607882586e-05, + "loss": 0.8904, + "step": 47010 + }, + { + "epoch": 0.67, + "grad_norm": 0.5234375, + "learning_rate": 5.792672230345639e-05, + "loss": 1.0229, + "step": 47015 + }, + { + "epoch": 0.67, + "grad_norm": 0.56640625, + "learning_rate": 5.790401116528785e-05, + "loss": 0.7795, + "step": 47020 + }, + { + "epoch": 0.67, + "grad_norm": 0.60546875, + "learning_rate": 5.788130266574383e-05, + "loss": 0.9443, + "step": 47025 + }, + { + "epoch": 0.67, + "grad_norm": 0.5, + "learning_rate": 5.785859680624779e-05, + "loss": 0.987, + "step": 47030 + }, + { + "epoch": 0.67, + "grad_norm": 0.55859375, + "learning_rate": 5.783589358822283e-05, + "loss": 1.0296, + "step": 47035 + }, + { + "epoch": 0.67, + "grad_norm": 0.4921875, + "learning_rate": 5.781319301309214e-05, + "loss": 1.0317, + "step": 47040 + }, + { + "epoch": 0.67, + "grad_norm": 0.51953125, + "learning_rate": 5.77904950822785e-05, + "loss": 0.9373, + "step": 47045 + }, + { + "epoch": 0.67, + "grad_norm": 0.546875, + "learning_rate": 5.776779979720475e-05, + "loss": 0.9986, + "step": 47050 + }, + { + "epoch": 0.67, + "grad_norm": 0.63671875, + "learning_rate": 5.7745107159293365e-05, + "loss": 1.0654, + "step": 47055 + }, + { + "epoch": 0.68, + "grad_norm": 0.5078125, + "learning_rate": 5.772241716996679e-05, + "loss": 0.9629, + "step": 47060 + }, + { + "epoch": 0.68, + "grad_norm": 0.5390625, + "learning_rate": 5.76997298306473e-05, + "loss": 0.8715, + "step": 47065 + }, + { + "epoch": 0.68, + "grad_norm": 0.482421875, + "learning_rate": 5.7677045142756866e-05, + "loss": 0.9028, + "step": 47070 + }, + { + "epoch": 0.68, + "grad_norm": 0.5703125, + "learning_rate": 5.765436310771746e-05, + "loss": 0.9967, + "step": 47075 + }, + { + "epoch": 0.68, + "grad_norm": 0.57421875, + "learning_rate": 5.763168372695085e-05, + "loss": 0.8681, + "step": 47080 + }, + { + "epoch": 0.68, + "grad_norm": 0.64453125, + "learning_rate": 5.76090070018785e-05, + "loss": 1.0474, + "step": 47085 + }, + { + "epoch": 0.68, + "grad_norm": 0.61328125, + "learning_rate": 5.758633293392191e-05, + "loss": 0.927, + "step": 47090 + }, + { + "epoch": 0.68, + "grad_norm": 0.6015625, + "learning_rate": 5.7563661524502275e-05, + "loss": 0.9565, + "step": 47095 + }, + { + "epoch": 0.68, + "grad_norm": 0.5234375, + "learning_rate": 5.754099277504064e-05, + "loss": 0.8303, + "step": 47100 + }, + { + "epoch": 0.68, + "grad_norm": 0.57421875, + "learning_rate": 5.751832668695792e-05, + "loss": 1.017, + "step": 47105 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.749566326167492e-05, + "loss": 0.99, + "step": 47110 + }, + { + "epoch": 0.68, + "grad_norm": 0.59375, + "learning_rate": 5.7473002500612114e-05, + "loss": 1.0008, + "step": 47115 + }, + { + "epoch": 0.68, + "grad_norm": 0.48828125, + "learning_rate": 5.745034440518993e-05, + "loss": 0.9646, + "step": 47120 + }, + { + "epoch": 0.68, + "grad_norm": 0.5625, + "learning_rate": 5.742768897682868e-05, + "loss": 0.9078, + "step": 47125 + }, + { + "epoch": 0.68, + "grad_norm": 0.53515625, + "learning_rate": 5.7405036216948315e-05, + "loss": 0.9729, + "step": 47130 + }, + { + "epoch": 0.68, + "grad_norm": 0.50390625, + "learning_rate": 5.73823861269688e-05, + "loss": 0.9689, + "step": 47135 + }, + { + "epoch": 0.68, + "grad_norm": 0.53515625, + "learning_rate": 5.7359738708309885e-05, + "loss": 0.987, + "step": 47140 + }, + { + "epoch": 0.68, + "grad_norm": 0.51953125, + "learning_rate": 5.733709396239113e-05, + "loss": 0.9306, + "step": 47145 + }, + { + "epoch": 0.68, + "grad_norm": 0.60546875, + "learning_rate": 5.731445189063187e-05, + "loss": 0.8408, + "step": 47150 + }, + { + "epoch": 0.68, + "grad_norm": 0.51171875, + "learning_rate": 5.729181249445137e-05, + "loss": 0.9015, + "step": 47155 + }, + { + "epoch": 0.68, + "grad_norm": 0.5859375, + "learning_rate": 5.726917577526876e-05, + "loss": 0.9337, + "step": 47160 + }, + { + "epoch": 0.68, + "grad_norm": 0.59375, + "learning_rate": 5.724654173450281e-05, + "loss": 0.6944, + "step": 47165 + }, + { + "epoch": 0.68, + "grad_norm": 0.6875, + "learning_rate": 5.722391037357234e-05, + "loss": 1.0884, + "step": 47170 + }, + { + "epoch": 0.68, + "grad_norm": 0.5859375, + "learning_rate": 5.720128169389593e-05, + "loss": 0.8736, + "step": 47175 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.717865569689187e-05, + "loss": 0.9979, + "step": 47180 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.715603238397845e-05, + "loss": 0.9299, + "step": 47185 + }, + { + "epoch": 0.68, + "grad_norm": 0.54296875, + "learning_rate": 5.713341175657376e-05, + "loss": 0.9496, + "step": 47190 + }, + { + "epoch": 0.68, + "grad_norm": 0.62109375, + "learning_rate": 5.7110793816095634e-05, + "loss": 0.8789, + "step": 47195 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.708817856396176e-05, + "loss": 0.9788, + "step": 47200 + }, + { + "epoch": 0.68, + "grad_norm": 0.51953125, + "learning_rate": 5.7065566001589764e-05, + "loss": 0.837, + "step": 47205 + }, + { + "epoch": 0.68, + "grad_norm": 0.5546875, + "learning_rate": 5.704295613039694e-05, + "loss": 0.8921, + "step": 47210 + }, + { + "epoch": 0.68, + "grad_norm": 0.56640625, + "learning_rate": 5.7020348951800574e-05, + "loss": 1.0042, + "step": 47215 + }, + { + "epoch": 0.68, + "grad_norm": 0.50390625, + "learning_rate": 5.699774446721771e-05, + "loss": 0.9389, + "step": 47220 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.6975142678065166e-05, + "loss": 0.8968, + "step": 47225 + }, + { + "epoch": 0.68, + "grad_norm": 0.59375, + "learning_rate": 5.695254358575966e-05, + "loss": 0.9572, + "step": 47230 + }, + { + "epoch": 0.68, + "grad_norm": 0.58984375, + "learning_rate": 5.6929947191717804e-05, + "loss": 0.9398, + "step": 47235 + }, + { + "epoch": 0.68, + "grad_norm": 0.5, + "learning_rate": 5.6907353497355874e-05, + "loss": 0.8282, + "step": 47240 + }, + { + "epoch": 0.68, + "grad_norm": 0.54296875, + "learning_rate": 5.688476250409011e-05, + "loss": 0.8719, + "step": 47245 + }, + { + "epoch": 0.68, + "grad_norm": 0.5546875, + "learning_rate": 5.686217421333656e-05, + "loss": 0.9109, + "step": 47250 + }, + { + "epoch": 0.68, + "grad_norm": 0.58203125, + "learning_rate": 5.683958862651106e-05, + "loss": 0.9396, + "step": 47255 + }, + { + "epoch": 0.68, + "grad_norm": 0.48046875, + "learning_rate": 5.6817005745029284e-05, + "loss": 0.9259, + "step": 47260 + }, + { + "epoch": 0.68, + "grad_norm": 0.60546875, + "learning_rate": 5.679442557030674e-05, + "loss": 0.9045, + "step": 47265 + }, + { + "epoch": 0.68, + "grad_norm": 0.58203125, + "learning_rate": 5.6771848103758875e-05, + "loss": 0.9453, + "step": 47270 + }, + { + "epoch": 0.68, + "grad_norm": 0.63671875, + "learning_rate": 5.674927334680075e-05, + "loss": 0.9764, + "step": 47275 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.672670130084743e-05, + "loss": 0.9651, + "step": 47280 + }, + { + "epoch": 0.68, + "grad_norm": 0.5390625, + "learning_rate": 5.670413196731379e-05, + "loss": 0.8923, + "step": 47285 + }, + { + "epoch": 0.68, + "grad_norm": 0.53125, + "learning_rate": 5.668156534761444e-05, + "loss": 0.8737, + "step": 47290 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.665900144316391e-05, + "loss": 0.9184, + "step": 47295 + }, + { + "epoch": 0.68, + "grad_norm": 0.48828125, + "learning_rate": 5.66364402553766e-05, + "loss": 1.0944, + "step": 47300 + }, + { + "epoch": 0.68, + "grad_norm": 0.64453125, + "learning_rate": 5.661388178566653e-05, + "loss": 0.9557, + "step": 47305 + }, + { + "epoch": 0.68, + "grad_norm": 0.59375, + "learning_rate": 5.659132603544776e-05, + "loss": 1.2007, + "step": 47310 + }, + { + "epoch": 0.68, + "grad_norm": 0.5703125, + "learning_rate": 5.6568773006134144e-05, + "loss": 0.7776, + "step": 47315 + }, + { + "epoch": 0.68, + "grad_norm": 0.578125, + "learning_rate": 5.654622269913927e-05, + "loss": 1.1321, + "step": 47320 + }, + { + "epoch": 0.68, + "grad_norm": 0.51953125, + "learning_rate": 5.652367511587665e-05, + "loss": 0.957, + "step": 47325 + }, + { + "epoch": 0.68, + "grad_norm": 0.546875, + "learning_rate": 5.650113025775963e-05, + "loss": 0.9662, + "step": 47330 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.647858812620126e-05, + "loss": 0.8585, + "step": 47335 + }, + { + "epoch": 0.68, + "grad_norm": 0.58984375, + "learning_rate": 5.645604872261455e-05, + "loss": 0.9303, + "step": 47340 + }, + { + "epoch": 0.68, + "grad_norm": 0.66015625, + "learning_rate": 5.643351204841234e-05, + "loss": 0.9571, + "step": 47345 + }, + { + "epoch": 0.68, + "grad_norm": 0.61328125, + "learning_rate": 5.6410978105007175e-05, + "loss": 0.9188, + "step": 47350 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.6388446893811585e-05, + "loss": 0.9066, + "step": 47355 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.6365918416237775e-05, + "loss": 0.9174, + "step": 47360 + }, + { + "epoch": 0.68, + "grad_norm": 0.55078125, + "learning_rate": 5.634339267369794e-05, + "loss": 0.9251, + "step": 47365 + }, + { + "epoch": 0.68, + "grad_norm": 0.59375, + "learning_rate": 5.6320869667603925e-05, + "loss": 0.9401, + "step": 47370 + }, + { + "epoch": 0.68, + "grad_norm": 0.62109375, + "learning_rate": 5.629834939936755e-05, + "loss": 1.0269, + "step": 47375 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.6275831870400444e-05, + "loss": 0.9956, + "step": 47380 + }, + { + "epoch": 0.68, + "grad_norm": 0.5703125, + "learning_rate": 5.625331708211394e-05, + "loss": 0.9787, + "step": 47385 + }, + { + "epoch": 0.68, + "grad_norm": 0.53125, + "learning_rate": 5.6230805035919365e-05, + "loss": 0.9678, + "step": 47390 + }, + { + "epoch": 0.68, + "grad_norm": 0.58203125, + "learning_rate": 5.620829573322781e-05, + "loss": 0.8578, + "step": 47395 + }, + { + "epoch": 0.68, + "grad_norm": 0.59765625, + "learning_rate": 5.618578917545012e-05, + "loss": 0.8684, + "step": 47400 + }, + { + "epoch": 0.68, + "grad_norm": 0.61328125, + "learning_rate": 5.61632853639971e-05, + "loss": 1.0012, + "step": 47405 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.614078430027924e-05, + "loss": 1.0491, + "step": 47410 + }, + { + "epoch": 0.68, + "grad_norm": 0.53125, + "learning_rate": 5.6118285985707006e-05, + "loss": 0.9195, + "step": 47415 + }, + { + "epoch": 0.68, + "grad_norm": 0.53125, + "learning_rate": 5.6095790421690554e-05, + "loss": 0.8142, + "step": 47420 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.607329760964e-05, + "loss": 0.9404, + "step": 47425 + }, + { + "epoch": 0.68, + "grad_norm": 0.57421875, + "learning_rate": 5.6050807550965125e-05, + "loss": 0.8679, + "step": 47430 + }, + { + "epoch": 0.68, + "grad_norm": 0.70703125, + "learning_rate": 5.60283202470757e-05, + "loss": 0.826, + "step": 47435 + }, + { + "epoch": 0.68, + "grad_norm": 0.59765625, + "learning_rate": 5.600583569938127e-05, + "loss": 0.9905, + "step": 47440 + }, + { + "epoch": 0.68, + "grad_norm": 0.58203125, + "learning_rate": 5.598335390929114e-05, + "loss": 0.9148, + "step": 47445 + }, + { + "epoch": 0.68, + "grad_norm": 0.52734375, + "learning_rate": 5.596087487821451e-05, + "loss": 1.0001, + "step": 47450 + }, + { + "epoch": 0.68, + "grad_norm": 0.5625, + "learning_rate": 5.593839860756044e-05, + "loss": 0.9885, + "step": 47455 + }, + { + "epoch": 0.68, + "grad_norm": 0.640625, + "learning_rate": 5.591592509873772e-05, + "loss": 0.9997, + "step": 47460 + }, + { + "epoch": 0.68, + "grad_norm": 0.52734375, + "learning_rate": 5.589345435315498e-05, + "loss": 0.9769, + "step": 47465 + }, + { + "epoch": 0.68, + "grad_norm": 0.52734375, + "learning_rate": 5.587098637222077e-05, + "loss": 0.892, + "step": 47470 + }, + { + "epoch": 0.68, + "grad_norm": 0.578125, + "learning_rate": 5.584852115734343e-05, + "loss": 0.9215, + "step": 47475 + }, + { + "epoch": 0.68, + "grad_norm": 0.53515625, + "learning_rate": 5.582605870993103e-05, + "loss": 0.8581, + "step": 47480 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.580359903139156e-05, + "loss": 0.8876, + "step": 47485 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.5781142123132904e-05, + "loss": 1.1096, + "step": 47490 + }, + { + "epoch": 0.68, + "grad_norm": 0.5546875, + "learning_rate": 5.575868798656256e-05, + "loss": 0.9166, + "step": 47495 + }, + { + "epoch": 0.68, + "grad_norm": 0.6328125, + "learning_rate": 5.573623662308804e-05, + "loss": 0.9829, + "step": 47500 + }, + { + "epoch": 0.68, + "grad_norm": 0.59375, + "learning_rate": 5.5713788034116673e-05, + "loss": 0.9592, + "step": 47505 + }, + { + "epoch": 0.68, + "grad_norm": 0.48046875, + "learning_rate": 5.569134222105551e-05, + "loss": 0.9754, + "step": 47510 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.566889918531144e-05, + "loss": 0.9408, + "step": 47515 + }, + { + "epoch": 0.68, + "grad_norm": 0.5078125, + "learning_rate": 5.564645892829126e-05, + "loss": 0.9398, + "step": 47520 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.5624021451401576e-05, + "loss": 0.9964, + "step": 47525 + }, + { + "epoch": 0.68, + "grad_norm": 0.5859375, + "learning_rate": 5.560158675604873e-05, + "loss": 0.9931, + "step": 47530 + }, + { + "epoch": 0.68, + "grad_norm": 0.52734375, + "learning_rate": 5.5579154843639e-05, + "loss": 0.9833, + "step": 47535 + }, + { + "epoch": 0.68, + "grad_norm": 0.58203125, + "learning_rate": 5.555672571557846e-05, + "loss": 0.9731, + "step": 47540 + }, + { + "epoch": 0.68, + "grad_norm": 0.54296875, + "learning_rate": 5.5534299373272944e-05, + "loss": 1.032, + "step": 47545 + }, + { + "epoch": 0.68, + "grad_norm": 0.5, + "learning_rate": 5.5511875818128176e-05, + "loss": 0.9431, + "step": 47550 + }, + { + "epoch": 0.68, + "grad_norm": 0.5234375, + "learning_rate": 5.5489455051549756e-05, + "loss": 0.9872, + "step": 47555 + }, + { + "epoch": 0.68, + "grad_norm": 0.59375, + "learning_rate": 5.546703707494293e-05, + "loss": 0.9955, + "step": 47560 + }, + { + "epoch": 0.68, + "grad_norm": 0.53515625, + "learning_rate": 5.544462188971298e-05, + "loss": 0.8273, + "step": 47565 + }, + { + "epoch": 0.68, + "grad_norm": 0.765625, + "learning_rate": 5.5422209497264885e-05, + "loss": 1.0531, + "step": 47570 + }, + { + "epoch": 0.68, + "grad_norm": 0.6953125, + "learning_rate": 5.5399799899003414e-05, + "loss": 0.9159, + "step": 47575 + }, + { + "epoch": 0.68, + "grad_norm": 0.58984375, + "learning_rate": 5.53773930963333e-05, + "loss": 0.8641, + "step": 47580 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.535498909065904e-05, + "loss": 0.83, + "step": 47585 + }, + { + "epoch": 0.68, + "grad_norm": 0.5703125, + "learning_rate": 5.5332587883384854e-05, + "loss": 0.9063, + "step": 47590 + }, + { + "epoch": 0.68, + "grad_norm": 0.53125, + "learning_rate": 5.5310189475914956e-05, + "loss": 1.099, + "step": 47595 + }, + { + "epoch": 0.68, + "grad_norm": 0.57421875, + "learning_rate": 5.5287793869653305e-05, + "loss": 0.8519, + "step": 47600 + }, + { + "epoch": 0.68, + "grad_norm": 0.51953125, + "learning_rate": 5.5265401066003617e-05, + "loss": 1.0752, + "step": 47605 + }, + { + "epoch": 0.68, + "grad_norm": 0.5546875, + "learning_rate": 5.5243011066369534e-05, + "loss": 0.8892, + "step": 47610 + }, + { + "epoch": 0.68, + "grad_norm": 0.546875, + "learning_rate": 5.522062387215454e-05, + "loss": 0.9662, + "step": 47615 + }, + { + "epoch": 0.68, + "grad_norm": 0.54296875, + "learning_rate": 5.519823948476184e-05, + "loss": 0.9145, + "step": 47620 + }, + { + "epoch": 0.68, + "grad_norm": 0.56640625, + "learning_rate": 5.517585790559448e-05, + "loss": 0.9575, + "step": 47625 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.515347913605539e-05, + "loss": 0.9744, + "step": 47630 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.513110317754734e-05, + "loss": 0.8392, + "step": 47635 + }, + { + "epoch": 0.68, + "grad_norm": 0.60546875, + "learning_rate": 5.510873003147281e-05, + "loss": 1.0129, + "step": 47640 + }, + { + "epoch": 0.68, + "grad_norm": 0.60546875, + "learning_rate": 5.5086359699234205e-05, + "loss": 0.9679, + "step": 47645 + }, + { + "epoch": 0.68, + "grad_norm": 0.5859375, + "learning_rate": 5.506399218223377e-05, + "loss": 1.0772, + "step": 47650 + }, + { + "epoch": 0.68, + "grad_norm": 0.515625, + "learning_rate": 5.504162748187344e-05, + "loss": 0.9394, + "step": 47655 + }, + { + "epoch": 0.68, + "grad_norm": 0.609375, + "learning_rate": 5.50192655995551e-05, + "loss": 0.9517, + "step": 47660 + }, + { + "epoch": 0.68, + "grad_norm": 0.609375, + "learning_rate": 5.499690653668046e-05, + "loss": 0.9318, + "step": 47665 + }, + { + "epoch": 0.68, + "grad_norm": 0.56640625, + "learning_rate": 5.4974550294650985e-05, + "loss": 0.9086, + "step": 47670 + }, + { + "epoch": 0.68, + "grad_norm": 0.546875, + "learning_rate": 5.495219687486792e-05, + "loss": 0.9837, + "step": 47675 + }, + { + "epoch": 0.68, + "grad_norm": 0.62890625, + "learning_rate": 5.4929846278732525e-05, + "loss": 0.9616, + "step": 47680 + }, + { + "epoch": 0.68, + "grad_norm": 0.54296875, + "learning_rate": 5.4907498507645626e-05, + "loss": 0.9245, + "step": 47685 + }, + { + "epoch": 0.68, + "grad_norm": 0.55859375, + "learning_rate": 5.4885153563008095e-05, + "loss": 1.0802, + "step": 47690 + }, + { + "epoch": 0.68, + "grad_norm": 0.53125, + "learning_rate": 5.4862811446220564e-05, + "loss": 0.9033, + "step": 47695 + }, + { + "epoch": 0.68, + "grad_norm": 0.52734375, + "learning_rate": 5.484047215868336e-05, + "loss": 0.9888, + "step": 47700 + }, + { + "epoch": 0.68, + "grad_norm": 0.57421875, + "learning_rate": 5.4818135701796814e-05, + "loss": 0.9262, + "step": 47705 + }, + { + "epoch": 0.68, + "grad_norm": 0.56640625, + "learning_rate": 5.4795802076961e-05, + "loss": 0.9719, + "step": 47710 + }, + { + "epoch": 0.68, + "grad_norm": 0.58203125, + "learning_rate": 5.477347128557577e-05, + "loss": 0.8993, + "step": 47715 + }, + { + "epoch": 0.68, + "grad_norm": 0.57421875, + "learning_rate": 5.47511433290409e-05, + "loss": 0.8943, + "step": 47720 + }, + { + "epoch": 0.68, + "grad_norm": 0.53515625, + "learning_rate": 5.4728818208755864e-05, + "loss": 0.909, + "step": 47725 + }, + { + "epoch": 0.68, + "grad_norm": 0.498046875, + "learning_rate": 5.47064959261201e-05, + "loss": 0.9259, + "step": 47730 + }, + { + "epoch": 0.68, + "grad_norm": 0.625, + "learning_rate": 5.4684176482532723e-05, + "loss": 0.9023, + "step": 47735 + }, + { + "epoch": 0.68, + "grad_norm": 0.58984375, + "learning_rate": 5.4661859879392764e-05, + "loss": 0.8699, + "step": 47740 + }, + { + "epoch": 0.68, + "grad_norm": 0.56640625, + "learning_rate": 5.463954611809912e-05, + "loss": 0.9654, + "step": 47745 + }, + { + "epoch": 0.68, + "grad_norm": 0.72265625, + "learning_rate": 5.461723520005033e-05, + "loss": 0.9852, + "step": 47750 + }, + { + "epoch": 0.69, + "grad_norm": 0.515625, + "learning_rate": 5.459492712664493e-05, + "loss": 0.8854, + "step": 47755 + }, + { + "epoch": 0.69, + "grad_norm": 0.51953125, + "learning_rate": 5.457262189928125e-05, + "loss": 1.0325, + "step": 47760 + }, + { + "epoch": 0.69, + "grad_norm": 0.59765625, + "learning_rate": 5.455031951935732e-05, + "loss": 1.0155, + "step": 47765 + }, + { + "epoch": 0.69, + "grad_norm": 0.58984375, + "learning_rate": 5.452801998827114e-05, + "loss": 0.9049, + "step": 47770 + }, + { + "epoch": 0.69, + "grad_norm": 0.55078125, + "learning_rate": 5.450572330742052e-05, + "loss": 0.978, + "step": 47775 + }, + { + "epoch": 0.69, + "grad_norm": 0.50390625, + "learning_rate": 5.4483429478202906e-05, + "loss": 0.9369, + "step": 47780 + }, + { + "epoch": 0.69, + "grad_norm": 0.5546875, + "learning_rate": 5.446113850201578e-05, + "loss": 1.041, + "step": 47785 + }, + { + "epoch": 0.69, + "grad_norm": 0.53125, + "learning_rate": 5.443885038025638e-05, + "loss": 0.9147, + "step": 47790 + }, + { + "epoch": 0.69, + "grad_norm": 0.546875, + "learning_rate": 5.441656511432169e-05, + "loss": 0.854, + "step": 47795 + }, + { + "epoch": 0.69, + "grad_norm": 0.466796875, + "learning_rate": 5.4394282705608626e-05, + "loss": 0.918, + "step": 47800 + }, + { + "epoch": 0.69, + "grad_norm": 0.5078125, + "learning_rate": 5.4372003155513896e-05, + "loss": 0.8515, + "step": 47805 + }, + { + "epoch": 0.69, + "grad_norm": 0.60546875, + "learning_rate": 5.434972646543393e-05, + "loss": 1.1099, + "step": 47810 + }, + { + "epoch": 0.69, + "grad_norm": 0.5390625, + "learning_rate": 5.432745263676511e-05, + "loss": 0.8933, + "step": 47815 + }, + { + "epoch": 0.69, + "grad_norm": 0.5, + "learning_rate": 5.430518167090359e-05, + "loss": 1.062, + "step": 47820 + }, + { + "epoch": 0.69, + "grad_norm": 0.57421875, + "learning_rate": 5.428291356924535e-05, + "loss": 0.8631, + "step": 47825 + }, + { + "epoch": 0.69, + "grad_norm": 0.55078125, + "learning_rate": 5.4260648333186114e-05, + "loss": 0.998, + "step": 47830 + }, + { + "epoch": 0.69, + "grad_norm": 0.58203125, + "learning_rate": 5.423838596412152e-05, + "loss": 0.9869, + "step": 47835 + }, + { + "epoch": 0.69, + "grad_norm": 0.59765625, + "learning_rate": 5.421612646344707e-05, + "loss": 1.0076, + "step": 47840 + }, + { + "epoch": 0.69, + "grad_norm": 0.63671875, + "learning_rate": 5.41938698325579e-05, + "loss": 0.9925, + "step": 47845 + }, + { + "epoch": 0.69, + "grad_norm": 0.49609375, + "learning_rate": 5.417161607284915e-05, + "loss": 0.8627, + "step": 47850 + }, + { + "epoch": 0.69, + "grad_norm": 0.625, + "learning_rate": 5.4149365185715726e-05, + "loss": 1.0033, + "step": 47855 + }, + { + "epoch": 0.69, + "grad_norm": 0.54296875, + "learning_rate": 5.412711717255228e-05, + "loss": 1.0183, + "step": 47860 + }, + { + "epoch": 0.69, + "grad_norm": 0.6171875, + "learning_rate": 5.410487203475338e-05, + "loss": 0.9942, + "step": 47865 + }, + { + "epoch": 0.69, + "grad_norm": 0.61328125, + "learning_rate": 5.40826297737134e-05, + "loss": 0.9463, + "step": 47870 + }, + { + "epoch": 0.69, + "grad_norm": 0.56640625, + "learning_rate": 5.4060390390826444e-05, + "loss": 0.9178, + "step": 47875 + }, + { + "epoch": 0.69, + "grad_norm": 0.58984375, + "learning_rate": 5.403815388748659e-05, + "loss": 1.0525, + "step": 47880 + }, + { + "epoch": 0.69, + "grad_norm": 0.55078125, + "learning_rate": 5.4015920265087574e-05, + "loss": 0.8748, + "step": 47885 + }, + { + "epoch": 0.69, + "grad_norm": 0.49609375, + "learning_rate": 5.399368952502302e-05, + "loss": 0.91, + "step": 47890 + }, + { + "epoch": 0.69, + "grad_norm": 0.5078125, + "learning_rate": 5.397146166868641e-05, + "loss": 0.9965, + "step": 47895 + }, + { + "epoch": 0.69, + "grad_norm": 0.6015625, + "learning_rate": 5.3949236697471025e-05, + "loss": 0.8512, + "step": 47900 + }, + { + "epoch": 0.69, + "grad_norm": 0.58203125, + "learning_rate": 5.3927014612769925e-05, + "loss": 1.1435, + "step": 47905 + }, + { + "epoch": 0.69, + "grad_norm": 0.55859375, + "learning_rate": 5.3904795415975996e-05, + "loss": 0.92, + "step": 47910 + }, + { + "epoch": 0.69, + "grad_norm": 0.53125, + "learning_rate": 5.3882579108482024e-05, + "loss": 0.9276, + "step": 47915 + }, + { + "epoch": 0.69, + "grad_norm": 0.6015625, + "learning_rate": 5.386036569168049e-05, + "loss": 1.0497, + "step": 47920 + }, + { + "epoch": 0.69, + "grad_norm": 0.62109375, + "learning_rate": 5.383815516696378e-05, + "loss": 0.9812, + "step": 47925 + }, + { + "epoch": 0.69, + "grad_norm": 0.66015625, + "learning_rate": 5.3815947535724124e-05, + "loss": 0.973, + "step": 47930 + }, + { + "epoch": 0.69, + "grad_norm": 0.578125, + "learning_rate": 5.3793742799353464e-05, + "loss": 1.0856, + "step": 47935 + }, + { + "epoch": 0.69, + "grad_norm": 0.515625, + "learning_rate": 5.377154095924359e-05, + "loss": 0.9601, + "step": 47940 + }, + { + "epoch": 0.69, + "grad_norm": 0.546875, + "learning_rate": 5.3749342016786186e-05, + "loss": 0.9086, + "step": 47945 + }, + { + "epoch": 0.69, + "grad_norm": 0.56640625, + "learning_rate": 5.372714597337274e-05, + "loss": 0.9846, + "step": 47950 + }, + { + "epoch": 0.69, + "grad_norm": 0.58203125, + "learning_rate": 5.370495283039445e-05, + "loss": 1.0759, + "step": 47955 + }, + { + "epoch": 0.69, + "grad_norm": 0.62890625, + "learning_rate": 5.368276258924243e-05, + "loss": 0.9919, + "step": 47960 + }, + { + "epoch": 0.69, + "grad_norm": 0.53125, + "learning_rate": 5.3660575251307646e-05, + "loss": 0.9385, + "step": 47965 + }, + { + "epoch": 0.69, + "grad_norm": 0.57421875, + "learning_rate": 5.3638390817980744e-05, + "loss": 0.8852, + "step": 47970 + }, + { + "epoch": 0.69, + "grad_norm": 0.5546875, + "learning_rate": 5.3616209290652296e-05, + "loss": 0.8375, + "step": 47975 + }, + { + "epoch": 0.69, + "grad_norm": 0.50390625, + "learning_rate": 5.359403067071274e-05, + "loss": 1.0317, + "step": 47980 + }, + { + "epoch": 0.69, + "grad_norm": 0.625, + "learning_rate": 5.357185495955217e-05, + "loss": 0.9038, + "step": 47985 + }, + { + "epoch": 0.69, + "grad_norm": 0.4921875, + "learning_rate": 5.3549682158560574e-05, + "loss": 0.9486, + "step": 47990 + }, + { + "epoch": 0.69, + "grad_norm": 0.5625, + "learning_rate": 5.3527512269127844e-05, + "loss": 0.9461, + "step": 47995 + }, + { + "epoch": 0.69, + "grad_norm": 0.48828125, + "learning_rate": 5.350534529264353e-05, + "loss": 0.9131, + "step": 48000 + }, + { + "epoch": 0.69, + "grad_norm": 0.5234375, + "learning_rate": 5.348318123049714e-05, + "loss": 0.9683, + "step": 48005 + }, + { + "epoch": 0.69, + "grad_norm": 0.53125, + "learning_rate": 5.3461020084077964e-05, + "loss": 0.9655, + "step": 48010 + }, + { + "epoch": 0.69, + "grad_norm": 0.5078125, + "learning_rate": 5.3438861854775e-05, + "loss": 0.8866, + "step": 48015 + }, + { + "epoch": 0.69, + "grad_norm": 0.62890625, + "learning_rate": 5.341670654397721e-05, + "loss": 0.924, + "step": 48020 + }, + { + "epoch": 0.69, + "grad_norm": 0.546875, + "learning_rate": 5.339455415307335e-05, + "loss": 0.9218, + "step": 48025 + }, + { + "epoch": 0.69, + "grad_norm": 0.53515625, + "learning_rate": 5.337240468345187e-05, + "loss": 0.8772, + "step": 48030 + }, + { + "epoch": 0.69, + "grad_norm": 0.5703125, + "learning_rate": 5.335025813650121e-05, + "loss": 0.9987, + "step": 48035 + }, + { + "epoch": 0.69, + "grad_norm": 0.52734375, + "learning_rate": 5.332811451360946e-05, + "loss": 0.9083, + "step": 48040 + }, + { + "epoch": 0.69, + "grad_norm": 0.69140625, + "learning_rate": 5.330597381616469e-05, + "loss": 1.1062, + "step": 48045 + }, + { + "epoch": 0.69, + "grad_norm": 0.73828125, + "learning_rate": 5.328383604555462e-05, + "loss": 0.9678, + "step": 48050 + }, + { + "epoch": 0.69, + "grad_norm": 0.48046875, + "learning_rate": 5.326170120316691e-05, + "loss": 0.8508, + "step": 48055 + }, + { + "epoch": 0.69, + "grad_norm": 0.52734375, + "learning_rate": 5.3239569290389043e-05, + "loss": 1.0304, + "step": 48060 + }, + { + "epoch": 0.69, + "grad_norm": 0.5, + "learning_rate": 5.321744030860819e-05, + "loss": 0.9192, + "step": 48065 + }, + { + "epoch": 0.69, + "grad_norm": 0.71484375, + "learning_rate": 5.319531425921146e-05, + "loss": 1.0754, + "step": 48070 + }, + { + "epoch": 0.69, + "grad_norm": 0.5390625, + "learning_rate": 5.317319114358576e-05, + "loss": 1.098, + "step": 48075 + }, + { + "epoch": 0.69, + "grad_norm": 0.9140625, + "learning_rate": 5.3151070963117754e-05, + "loss": 0.9126, + "step": 48080 + }, + { + "epoch": 0.69, + "grad_norm": 0.57421875, + "learning_rate": 5.3128953719193975e-05, + "loss": 1.1703, + "step": 48085 + }, + { + "epoch": 0.69, + "grad_norm": 0.55078125, + "learning_rate": 5.310683941320082e-05, + "loss": 1.0177, + "step": 48090 + }, + { + "epoch": 0.69, + "grad_norm": 0.515625, + "learning_rate": 5.3084728046524315e-05, + "loss": 0.9678, + "step": 48095 + }, + { + "epoch": 0.69, + "grad_norm": 0.6640625, + "learning_rate": 5.306261962055048e-05, + "loss": 0.9722, + "step": 48100 + }, + { + "epoch": 0.69, + "grad_norm": 0.51953125, + "learning_rate": 5.3040514136665154e-05, + "loss": 1.0295, + "step": 48105 + }, + { + "epoch": 0.69, + "grad_norm": 0.61328125, + "learning_rate": 5.3018411596253824e-05, + "loss": 1.0052, + "step": 48110 + }, + { + "epoch": 0.69, + "grad_norm": 0.56640625, + "learning_rate": 5.2996312000701986e-05, + "loss": 0.9311, + "step": 48115 + }, + { + "epoch": 0.69, + "grad_norm": 0.53125, + "learning_rate": 5.2974215351394863e-05, + "loss": 1.0246, + "step": 48120 + }, + { + "epoch": 0.69, + "grad_norm": 0.474609375, + "learning_rate": 5.295212164971746e-05, + "loss": 0.8808, + "step": 48125 + }, + { + "epoch": 0.69, + "grad_norm": 0.546875, + "learning_rate": 5.293003089705464e-05, + "loss": 0.9481, + "step": 48130 + }, + { + "epoch": 0.69, + "grad_norm": 0.50390625, + "learning_rate": 5.290794309479113e-05, + "loss": 0.9324, + "step": 48135 + }, + { + "epoch": 0.69, + "grad_norm": 0.5078125, + "learning_rate": 5.288585824431139e-05, + "loss": 0.8866, + "step": 48140 + }, + { + "epoch": 0.69, + "grad_norm": 0.59765625, + "learning_rate": 5.286377634699967e-05, + "loss": 0.9753, + "step": 48145 + }, + { + "epoch": 0.69, + "grad_norm": 0.5625, + "learning_rate": 5.284169740424014e-05, + "loss": 0.9452, + "step": 48150 + }, + { + "epoch": 0.69, + "grad_norm": 0.5859375, + "learning_rate": 5.2819621417416755e-05, + "loss": 1.1019, + "step": 48155 + }, + { + "epoch": 0.69, + "grad_norm": 0.55078125, + "learning_rate": 5.27975483879132e-05, + "loss": 0.8032, + "step": 48160 + }, + { + "epoch": 0.69, + "grad_norm": 0.578125, + "learning_rate": 5.277547831711307e-05, + "loss": 0.9602, + "step": 48165 + }, + { + "epoch": 0.69, + "grad_norm": 0.53515625, + "learning_rate": 5.275341120639977e-05, + "loss": 1.0003, + "step": 48170 + }, + { + "epoch": 0.69, + "grad_norm": 0.5859375, + "learning_rate": 5.273134705715643e-05, + "loss": 1.1599, + "step": 48175 + }, + { + "epoch": 0.69, + "grad_norm": 0.58984375, + "learning_rate": 5.270928587076611e-05, + "loss": 0.9654, + "step": 48180 + }, + { + "epoch": 0.69, + "grad_norm": 0.51953125, + "learning_rate": 5.268722764861164e-05, + "loss": 0.8464, + "step": 48185 + }, + { + "epoch": 0.69, + "grad_norm": 0.58203125, + "learning_rate": 5.266517239207558e-05, + "loss": 0.8963, + "step": 48190 + }, + { + "epoch": 0.69, + "grad_norm": 0.59375, + "learning_rate": 5.2643120102540466e-05, + "loss": 1.0255, + "step": 48195 + }, + { + "epoch": 0.69, + "grad_norm": 0.80078125, + "learning_rate": 5.2621070781388485e-05, + "loss": 1.0463, + "step": 48200 + }, + { + "epoch": 0.69, + "grad_norm": 0.58984375, + "learning_rate": 5.259902443000177e-05, + "loss": 0.8804, + "step": 48205 + }, + { + "epoch": 0.69, + "grad_norm": 0.5703125, + "learning_rate": 5.257698104976217e-05, + "loss": 0.8568, + "step": 48210 + }, + { + "epoch": 0.69, + "grad_norm": 0.55078125, + "learning_rate": 5.25549406420514e-05, + "loss": 0.894, + "step": 48215 + }, + { + "epoch": 0.69, + "grad_norm": 0.56640625, + "learning_rate": 5.253290320825104e-05, + "loss": 0.8669, + "step": 48220 + }, + { + "epoch": 0.69, + "grad_norm": 0.5859375, + "learning_rate": 5.25108687497423e-05, + "loss": 0.8684, + "step": 48225 + }, + { + "epoch": 0.69, + "grad_norm": 0.50390625, + "learning_rate": 5.248883726790646e-05, + "loss": 0.9638, + "step": 48230 + }, + { + "epoch": 0.69, + "grad_norm": 0.734375, + "learning_rate": 5.246680876412435e-05, + "loss": 0.898, + "step": 48235 + }, + { + "epoch": 0.69, + "grad_norm": 0.51171875, + "learning_rate": 5.244478323977681e-05, + "loss": 0.954, + "step": 48240 + }, + { + "epoch": 0.69, + "grad_norm": 0.58203125, + "learning_rate": 5.242276069624445e-05, + "loss": 0.8637, + "step": 48245 + }, + { + "epoch": 0.69, + "grad_norm": 0.62109375, + "learning_rate": 5.240074113490765e-05, + "loss": 0.8344, + "step": 48250 + }, + { + "epoch": 0.69, + "grad_norm": 0.6640625, + "learning_rate": 5.2378724557146566e-05, + "loss": 0.9092, + "step": 48255 + }, + { + "epoch": 0.69, + "grad_norm": 0.51953125, + "learning_rate": 5.235671096434125e-05, + "loss": 0.9158, + "step": 48260 + }, + { + "epoch": 0.69, + "grad_norm": 0.60546875, + "learning_rate": 5.233470035787159e-05, + "loss": 0.8737, + "step": 48265 + }, + { + "epoch": 0.69, + "grad_norm": 0.58203125, + "learning_rate": 5.2312692739117165e-05, + "loss": 0.9542, + "step": 48270 + }, + { + "epoch": 0.69, + "grad_norm": 0.50390625, + "learning_rate": 5.229068810945748e-05, + "loss": 0.8575, + "step": 48275 + }, + { + "epoch": 0.69, + "grad_norm": 0.498046875, + "learning_rate": 5.226868647027183e-05, + "loss": 0.931, + "step": 48280 + }, + { + "epoch": 0.69, + "grad_norm": 0.50390625, + "learning_rate": 5.224668782293923e-05, + "loss": 0.954, + "step": 48285 + }, + { + "epoch": 0.69, + "grad_norm": 0.58203125, + "learning_rate": 5.222469216883863e-05, + "loss": 0.8797, + "step": 48290 + }, + { + "epoch": 0.69, + "grad_norm": 0.52734375, + "learning_rate": 5.220269950934877e-05, + "loss": 1.0477, + "step": 48295 + }, + { + "epoch": 0.69, + "grad_norm": 0.54296875, + "learning_rate": 5.218070984584815e-05, + "loss": 0.912, + "step": 48300 + }, + { + "epoch": 0.69, + "grad_norm": 0.609375, + "learning_rate": 5.2158723179715064e-05, + "loss": 1.0759, + "step": 48305 + }, + { + "epoch": 0.69, + "grad_norm": 0.490234375, + "learning_rate": 5.213673951232768e-05, + "loss": 0.871, + "step": 48310 + }, + { + "epoch": 0.69, + "grad_norm": 0.5703125, + "learning_rate": 5.2114758845064026e-05, + "loss": 0.8752, + "step": 48315 + }, + { + "epoch": 0.69, + "grad_norm": 0.58984375, + "learning_rate": 5.2092781179301776e-05, + "loss": 0.9911, + "step": 48320 + }, + { + "epoch": 0.69, + "grad_norm": 0.54296875, + "learning_rate": 5.2070806516418565e-05, + "loss": 1.1045, + "step": 48325 + }, + { + "epoch": 0.69, + "grad_norm": 0.515625, + "learning_rate": 5.204883485779184e-05, + "loss": 0.8972, + "step": 48330 + }, + { + "epoch": 0.69, + "grad_norm": 0.55078125, + "learning_rate": 5.2026866204798704e-05, + "loss": 0.9271, + "step": 48335 + }, + { + "epoch": 0.69, + "grad_norm": 0.58984375, + "learning_rate": 5.200490055881624e-05, + "loss": 0.9302, + "step": 48340 + }, + { + "epoch": 0.69, + "grad_norm": 0.53515625, + "learning_rate": 5.198293792122131e-05, + "loss": 0.9276, + "step": 48345 + }, + { + "epoch": 0.69, + "grad_norm": 0.55078125, + "learning_rate": 5.196097829339051e-05, + "loss": 0.93, + "step": 48350 + }, + { + "epoch": 0.69, + "grad_norm": 0.671875, + "learning_rate": 5.193902167670026e-05, + "loss": 1.0635, + "step": 48355 + }, + { + "epoch": 0.69, + "grad_norm": 0.515625, + "learning_rate": 5.191706807252692e-05, + "loss": 0.888, + "step": 48360 + }, + { + "epoch": 0.69, + "grad_norm": 0.57421875, + "learning_rate": 5.189511748224647e-05, + "loss": 0.9246, + "step": 48365 + }, + { + "epoch": 0.69, + "grad_norm": 0.50390625, + "learning_rate": 5.1873169907234843e-05, + "loss": 1.0039, + "step": 48370 + }, + { + "epoch": 0.69, + "grad_norm": 0.625, + "learning_rate": 5.185122534886777e-05, + "loss": 1.0164, + "step": 48375 + }, + { + "epoch": 0.69, + "grad_norm": 0.546875, + "learning_rate": 5.18292838085207e-05, + "loss": 0.8643, + "step": 48380 + }, + { + "epoch": 0.69, + "grad_norm": 0.54296875, + "learning_rate": 5.1807345287568984e-05, + "loss": 0.9587, + "step": 48385 + }, + { + "epoch": 0.69, + "grad_norm": 0.53125, + "learning_rate": 5.178540978738779e-05, + "loss": 1.0061, + "step": 48390 + }, + { + "epoch": 0.69, + "grad_norm": 0.6328125, + "learning_rate": 5.176347730935198e-05, + "loss": 1.1532, + "step": 48395 + }, + { + "epoch": 0.69, + "grad_norm": 0.625, + "learning_rate": 5.1741547854836344e-05, + "loss": 0.7755, + "step": 48400 + }, + { + "epoch": 0.69, + "grad_norm": 0.578125, + "learning_rate": 5.17196214252155e-05, + "loss": 0.9659, + "step": 48405 + }, + { + "epoch": 0.69, + "grad_norm": 0.5234375, + "learning_rate": 5.169769802186377e-05, + "loss": 1.0091, + "step": 48410 + }, + { + "epoch": 0.69, + "grad_norm": 0.71484375, + "learning_rate": 5.16757776461553e-05, + "loss": 0.9977, + "step": 48415 + }, + { + "epoch": 0.69, + "grad_norm": 0.5390625, + "learning_rate": 5.165386029946412e-05, + "loss": 1.0268, + "step": 48420 + }, + { + "epoch": 0.69, + "grad_norm": 0.52734375, + "learning_rate": 5.163194598316408e-05, + "loss": 0.8544, + "step": 48425 + }, + { + "epoch": 0.69, + "grad_norm": 0.52734375, + "learning_rate": 5.1610034698628704e-05, + "loss": 0.9178, + "step": 48430 + }, + { + "epoch": 0.69, + "grad_norm": 0.65234375, + "learning_rate": 5.158812644723148e-05, + "loss": 1.0783, + "step": 48435 + }, + { + "epoch": 0.69, + "grad_norm": 0.53125, + "learning_rate": 5.1566221230345655e-05, + "loss": 0.9574, + "step": 48440 + }, + { + "epoch": 0.69, + "grad_norm": 0.55859375, + "learning_rate": 5.15443190493442e-05, + "loss": 0.9738, + "step": 48445 + }, + { + "epoch": 0.69, + "grad_norm": 0.5625, + "learning_rate": 5.152241990560003e-05, + "loss": 0.8551, + "step": 48450 + }, + { + "epoch": 0.7, + "grad_norm": 0.5234375, + "learning_rate": 5.150052380048587e-05, + "loss": 1.049, + "step": 48455 + }, + { + "epoch": 0.7, + "grad_norm": 0.625, + "learning_rate": 5.147863073537402e-05, + "loss": 0.8248, + "step": 48460 + }, + { + "epoch": 0.7, + "grad_norm": 0.51953125, + "learning_rate": 5.1456740711636866e-05, + "loss": 1.0644, + "step": 48465 + }, + { + "epoch": 0.7, + "grad_norm": 0.52734375, + "learning_rate": 5.143485373064654e-05, + "loss": 0.8591, + "step": 48470 + }, + { + "epoch": 0.7, + "grad_norm": 0.62109375, + "learning_rate": 5.1412969793774835e-05, + "loss": 0.8676, + "step": 48475 + }, + { + "epoch": 0.7, + "grad_norm": 0.515625, + "learning_rate": 5.139108890239353e-05, + "loss": 0.9285, + "step": 48480 + }, + { + "epoch": 0.7, + "grad_norm": 0.5546875, + "learning_rate": 5.136921105787417e-05, + "loss": 1.0226, + "step": 48485 + }, + { + "epoch": 0.7, + "grad_norm": 0.5703125, + "learning_rate": 5.134733626158801e-05, + "loss": 0.8493, + "step": 48490 + }, + { + "epoch": 0.7, + "grad_norm": 0.6328125, + "learning_rate": 5.1325464514906216e-05, + "loss": 1.1499, + "step": 48495 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 5.130359581919979e-05, + "loss": 0.8768, + "step": 48500 + }, + { + "epoch": 0.7, + "grad_norm": 0.5234375, + "learning_rate": 5.1281730175839394e-05, + "loss": 0.9771, + "step": 48505 + }, + { + "epoch": 0.7, + "grad_norm": 0.5703125, + "learning_rate": 5.125986758619567e-05, + "loss": 1.0365, + "step": 48510 + }, + { + "epoch": 0.7, + "grad_norm": 0.5703125, + "learning_rate": 5.123800805163893e-05, + "loss": 0.9222, + "step": 48515 + }, + { + "epoch": 0.7, + "grad_norm": 0.5390625, + "learning_rate": 5.1216151573539404e-05, + "loss": 1.0843, + "step": 48520 + }, + { + "epoch": 0.7, + "grad_norm": 0.49609375, + "learning_rate": 5.119429815326704e-05, + "loss": 1.0634, + "step": 48525 + }, + { + "epoch": 0.7, + "grad_norm": 0.671875, + "learning_rate": 5.1172447792191637e-05, + "loss": 0.9098, + "step": 48530 + }, + { + "epoch": 0.7, + "grad_norm": 0.59765625, + "learning_rate": 5.115060049168286e-05, + "loss": 0.9464, + "step": 48535 + }, + { + "epoch": 0.7, + "grad_norm": 0.53515625, + "learning_rate": 5.1128756253110044e-05, + "loss": 0.853, + "step": 48540 + }, + { + "epoch": 0.7, + "grad_norm": 0.55859375, + "learning_rate": 5.110691507784244e-05, + "loss": 0.95, + "step": 48545 + }, + { + "epoch": 0.7, + "grad_norm": 0.58984375, + "learning_rate": 5.108507696724911e-05, + "loss": 0.8101, + "step": 48550 + }, + { + "epoch": 0.7, + "grad_norm": 0.4609375, + "learning_rate": 5.106324192269884e-05, + "loss": 0.9499, + "step": 48555 + }, + { + "epoch": 0.7, + "grad_norm": 0.52734375, + "learning_rate": 5.104140994556035e-05, + "loss": 1.067, + "step": 48560 + }, + { + "epoch": 0.7, + "grad_norm": 0.61328125, + "learning_rate": 5.1019581037202036e-05, + "loss": 0.9056, + "step": 48565 + }, + { + "epoch": 0.7, + "grad_norm": 0.5625, + "learning_rate": 5.099775519899213e-05, + "loss": 0.972, + "step": 48570 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 5.097593243229872e-05, + "loss": 0.8852, + "step": 48575 + }, + { + "epoch": 0.7, + "grad_norm": 0.515625, + "learning_rate": 5.0954112738489754e-05, + "loss": 0.9366, + "step": 48580 + }, + { + "epoch": 0.7, + "grad_norm": 0.5234375, + "learning_rate": 5.093229611893281e-05, + "loss": 1.0244, + "step": 48585 + }, + { + "epoch": 0.7, + "grad_norm": 0.609375, + "learning_rate": 5.0910482574995445e-05, + "loss": 0.9055, + "step": 48590 + }, + { + "epoch": 0.7, + "grad_norm": 0.52734375, + "learning_rate": 5.088867210804496e-05, + "loss": 0.828, + "step": 48595 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 5.0866864719448414e-05, + "loss": 1.0362, + "step": 48600 + }, + { + "epoch": 0.7, + "grad_norm": 0.59375, + "learning_rate": 5.084506041057274e-05, + "loss": 1.0137, + "step": 48605 + }, + { + "epoch": 0.7, + "grad_norm": 0.57421875, + "learning_rate": 5.08232591827847e-05, + "loss": 0.9519, + "step": 48610 + }, + { + "epoch": 0.7, + "grad_norm": 0.498046875, + "learning_rate": 5.0801461037450794e-05, + "loss": 0.8714, + "step": 48615 + }, + { + "epoch": 0.7, + "grad_norm": 0.58203125, + "learning_rate": 5.077966597593728e-05, + "loss": 0.8594, + "step": 48620 + }, + { + "epoch": 0.7, + "grad_norm": 0.58984375, + "learning_rate": 5.0757873999610385e-05, + "loss": 0.9234, + "step": 48625 + }, + { + "epoch": 0.7, + "grad_norm": 0.546875, + "learning_rate": 5.073608510983606e-05, + "loss": 0.9935, + "step": 48630 + }, + { + "epoch": 0.7, + "grad_norm": 0.6484375, + "learning_rate": 5.0714299307979994e-05, + "loss": 1.1072, + "step": 48635 + }, + { + "epoch": 0.7, + "grad_norm": 0.51171875, + "learning_rate": 5.069251659540777e-05, + "loss": 1.0978, + "step": 48640 + }, + { + "epoch": 0.7, + "grad_norm": 0.578125, + "learning_rate": 5.06707369734848e-05, + "loss": 0.877, + "step": 48645 + }, + { + "epoch": 0.7, + "grad_norm": 0.5625, + "learning_rate": 5.0648960443576185e-05, + "loss": 0.9152, + "step": 48650 + }, + { + "epoch": 0.7, + "grad_norm": 0.515625, + "learning_rate": 5.062718700704693e-05, + "loss": 0.9402, + "step": 48655 + }, + { + "epoch": 0.7, + "grad_norm": 0.56640625, + "learning_rate": 5.0605416665261864e-05, + "loss": 0.9516, + "step": 48660 + }, + { + "epoch": 0.7, + "grad_norm": 0.66015625, + "learning_rate": 5.058364941958553e-05, + "loss": 0.85, + "step": 48665 + }, + { + "epoch": 0.7, + "grad_norm": 0.50390625, + "learning_rate": 5.0561885271382316e-05, + "loss": 0.9248, + "step": 48670 + }, + { + "epoch": 0.7, + "grad_norm": 0.79296875, + "learning_rate": 5.054012422201646e-05, + "loss": 1.043, + "step": 48675 + }, + { + "epoch": 0.7, + "grad_norm": 0.5625, + "learning_rate": 5.051836627285191e-05, + "loss": 0.8783, + "step": 48680 + }, + { + "epoch": 0.7, + "grad_norm": 0.53515625, + "learning_rate": 5.0496611425252515e-05, + "loss": 1.0747, + "step": 48685 + }, + { + "epoch": 0.7, + "grad_norm": 0.640625, + "learning_rate": 5.047485968058194e-05, + "loss": 0.9795, + "step": 48690 + }, + { + "epoch": 0.7, + "grad_norm": 0.5859375, + "learning_rate": 5.045311104020353e-05, + "loss": 1.0219, + "step": 48695 + }, + { + "epoch": 0.7, + "grad_norm": 0.578125, + "learning_rate": 5.0431365505480536e-05, + "loss": 0.924, + "step": 48700 + }, + { + "epoch": 0.7, + "grad_norm": 0.578125, + "learning_rate": 5.040962307777606e-05, + "loss": 0.8713, + "step": 48705 + }, + { + "epoch": 0.7, + "grad_norm": 0.58203125, + "learning_rate": 5.0387883758452836e-05, + "loss": 1.0171, + "step": 48710 + }, + { + "epoch": 0.7, + "grad_norm": 0.5859375, + "learning_rate": 5.036614754887356e-05, + "loss": 0.9331, + "step": 48715 + }, + { + "epoch": 0.7, + "grad_norm": 0.6328125, + "learning_rate": 5.0344414450400734e-05, + "loss": 0.9752, + "step": 48720 + }, + { + "epoch": 0.7, + "grad_norm": 0.58203125, + "learning_rate": 5.032268446439656e-05, + "loss": 0.909, + "step": 48725 + }, + { + "epoch": 0.7, + "grad_norm": 0.61328125, + "learning_rate": 5.030095759222306e-05, + "loss": 0.9741, + "step": 48730 + }, + { + "epoch": 0.7, + "grad_norm": 0.578125, + "learning_rate": 5.0279233835242135e-05, + "loss": 0.9015, + "step": 48735 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 5.025751319481551e-05, + "loss": 0.8833, + "step": 48740 + }, + { + "epoch": 0.7, + "grad_norm": 0.5546875, + "learning_rate": 5.023579567230456e-05, + "loss": 0.8863, + "step": 48745 + }, + { + "epoch": 0.7, + "grad_norm": 0.55859375, + "learning_rate": 5.021408126907063e-05, + "loss": 1.0758, + "step": 48750 + }, + { + "epoch": 0.7, + "grad_norm": 0.640625, + "learning_rate": 5.019236998647482e-05, + "loss": 0.9996, + "step": 48755 + }, + { + "epoch": 0.7, + "grad_norm": 0.515625, + "learning_rate": 5.017066182587794e-05, + "loss": 0.905, + "step": 48760 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 5.0148956788640736e-05, + "loss": 1.0319, + "step": 48765 + }, + { + "epoch": 0.7, + "grad_norm": 0.55078125, + "learning_rate": 5.012725487612376e-05, + "loss": 0.8233, + "step": 48770 + }, + { + "epoch": 0.7, + "grad_norm": 0.50390625, + "learning_rate": 5.0105556089687186e-05, + "loss": 0.9083, + "step": 48775 + }, + { + "epoch": 0.7, + "grad_norm": 0.68359375, + "learning_rate": 5.0083860430691175e-05, + "loss": 1.1075, + "step": 48780 + }, + { + "epoch": 0.7, + "grad_norm": 0.515625, + "learning_rate": 5.0062167900495674e-05, + "loss": 1.0288, + "step": 48785 + }, + { + "epoch": 0.7, + "grad_norm": 0.49609375, + "learning_rate": 5.004047850046034e-05, + "loss": 0.9933, + "step": 48790 + }, + { + "epoch": 0.7, + "grad_norm": 0.625, + "learning_rate": 5.00187922319447e-05, + "loss": 0.9871, + "step": 48795 + }, + { + "epoch": 0.7, + "grad_norm": 0.5703125, + "learning_rate": 4.999710909630813e-05, + "loss": 0.9935, + "step": 48800 + }, + { + "epoch": 0.7, + "grad_norm": 0.5390625, + "learning_rate": 4.997542909490968e-05, + "loss": 0.8804, + "step": 48805 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 4.9953752229108295e-05, + "loss": 0.9151, + "step": 48810 + }, + { + "epoch": 0.7, + "grad_norm": 0.490234375, + "learning_rate": 4.993207850026276e-05, + "loss": 0.93, + "step": 48815 + }, + { + "epoch": 0.7, + "grad_norm": 0.58984375, + "learning_rate": 4.991040790973153e-05, + "loss": 0.9725, + "step": 48820 + }, + { + "epoch": 0.7, + "grad_norm": 0.59765625, + "learning_rate": 4.9888740458873016e-05, + "loss": 0.8681, + "step": 48825 + }, + { + "epoch": 0.7, + "grad_norm": 0.65625, + "learning_rate": 4.986707614904529e-05, + "loss": 0.8718, + "step": 48830 + }, + { + "epoch": 0.7, + "grad_norm": 0.578125, + "learning_rate": 4.984541498160635e-05, + "loss": 0.957, + "step": 48835 + }, + { + "epoch": 0.7, + "grad_norm": 0.5234375, + "learning_rate": 4.982375695791389e-05, + "loss": 1.0334, + "step": 48840 + }, + { + "epoch": 0.7, + "grad_norm": 0.65234375, + "learning_rate": 4.98021020793255e-05, + "loss": 1.1653, + "step": 48845 + }, + { + "epoch": 0.7, + "grad_norm": 0.515625, + "learning_rate": 4.9780450347198546e-05, + "loss": 0.9699, + "step": 48850 + }, + { + "epoch": 0.7, + "grad_norm": 0.60546875, + "learning_rate": 4.975880176289014e-05, + "loss": 0.9317, + "step": 48855 + }, + { + "epoch": 0.7, + "grad_norm": 0.62109375, + "learning_rate": 4.973715632775724e-05, + "loss": 1.0843, + "step": 48860 + }, + { + "epoch": 0.7, + "grad_norm": 0.52734375, + "learning_rate": 4.9715514043156675e-05, + "loss": 0.8991, + "step": 48865 + }, + { + "epoch": 0.7, + "grad_norm": 0.458984375, + "learning_rate": 4.9693874910444914e-05, + "loss": 0.9711, + "step": 48870 + }, + { + "epoch": 0.7, + "grad_norm": 0.71875, + "learning_rate": 4.967223893097841e-05, + "loss": 0.8734, + "step": 48875 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 4.965060610611325e-05, + "loss": 0.8456, + "step": 48880 + }, + { + "epoch": 0.7, + "grad_norm": 0.58203125, + "learning_rate": 4.9628976437205485e-05, + "loss": 0.882, + "step": 48885 + }, + { + "epoch": 0.7, + "grad_norm": 0.6015625, + "learning_rate": 4.96073499256108e-05, + "loss": 0.9383, + "step": 48890 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 4.958572657268482e-05, + "loss": 1.0069, + "step": 48895 + }, + { + "epoch": 0.7, + "grad_norm": 0.625, + "learning_rate": 4.956410637978295e-05, + "loss": 1.146, + "step": 48900 + }, + { + "epoch": 0.7, + "grad_norm": 0.546875, + "learning_rate": 4.954248934826029e-05, + "loss": 0.8967, + "step": 48905 + }, + { + "epoch": 0.7, + "grad_norm": 0.5625, + "learning_rate": 4.9520875479471904e-05, + "loss": 1.0125, + "step": 48910 + }, + { + "epoch": 0.7, + "grad_norm": 0.5078125, + "learning_rate": 4.94992647747725e-05, + "loss": 0.954, + "step": 48915 + }, + { + "epoch": 0.7, + "grad_norm": 0.5625, + "learning_rate": 4.9477657235516696e-05, + "loss": 0.9786, + "step": 48920 + }, + { + "epoch": 0.7, + "grad_norm": 0.73828125, + "learning_rate": 4.945605286305891e-05, + "loss": 0.865, + "step": 48925 + }, + { + "epoch": 0.7, + "grad_norm": 0.51171875, + "learning_rate": 4.94344516587533e-05, + "loss": 0.8614, + "step": 48930 + }, + { + "epoch": 0.7, + "grad_norm": 0.75, + "learning_rate": 4.941285362395382e-05, + "loss": 0.9378, + "step": 48935 + }, + { + "epoch": 0.7, + "grad_norm": 0.515625, + "learning_rate": 4.939125876001427e-05, + "loss": 0.9931, + "step": 48940 + }, + { + "epoch": 0.7, + "grad_norm": 0.578125, + "learning_rate": 4.9369667068288303e-05, + "loss": 1.0125, + "step": 48945 + }, + { + "epoch": 0.7, + "grad_norm": 0.55859375, + "learning_rate": 4.934807855012924e-05, + "loss": 0.9566, + "step": 48950 + }, + { + "epoch": 0.7, + "grad_norm": 0.6640625, + "learning_rate": 4.932649320689029e-05, + "loss": 1.149, + "step": 48955 + }, + { + "epoch": 0.7, + "grad_norm": 0.55859375, + "learning_rate": 4.93049110399245e-05, + "loss": 1.0048, + "step": 48960 + }, + { + "epoch": 0.7, + "grad_norm": 0.4765625, + "learning_rate": 4.928333205058459e-05, + "loss": 1.0096, + "step": 48965 + }, + { + "epoch": 0.7, + "grad_norm": 0.52734375, + "learning_rate": 4.9261756240223175e-05, + "loss": 0.9787, + "step": 48970 + }, + { + "epoch": 0.7, + "grad_norm": 0.57421875, + "learning_rate": 4.924018361019271e-05, + "loss": 0.9944, + "step": 48975 + }, + { + "epoch": 0.7, + "grad_norm": 0.52734375, + "learning_rate": 4.9218614161845345e-05, + "loss": 0.8065, + "step": 48980 + }, + { + "epoch": 0.7, + "grad_norm": 0.52734375, + "learning_rate": 4.919704789653304e-05, + "loss": 1.0229, + "step": 48985 + }, + { + "epoch": 0.7, + "grad_norm": 0.54296875, + "learning_rate": 4.917548481560764e-05, + "loss": 0.8176, + "step": 48990 + }, + { + "epoch": 0.7, + "grad_norm": 0.5390625, + "learning_rate": 4.915392492042078e-05, + "loss": 0.9873, + "step": 48995 + }, + { + "epoch": 0.7, + "grad_norm": 0.58984375, + "learning_rate": 4.913236821232377e-05, + "loss": 0.9549, + "step": 49000 + }, + { + "epoch": 0.7, + "grad_norm": 0.62109375, + "learning_rate": 4.9110814692667836e-05, + "loss": 1.1025, + "step": 49005 + }, + { + "epoch": 0.7, + "grad_norm": 0.5, + "learning_rate": 4.908926436280406e-05, + "loss": 0.9839, + "step": 49010 + }, + { + "epoch": 0.7, + "grad_norm": 0.51953125, + "learning_rate": 4.906771722408312e-05, + "loss": 0.9247, + "step": 49015 + }, + { + "epoch": 0.7, + "grad_norm": 0.60546875, + "learning_rate": 4.904617327785568e-05, + "loss": 0.9522, + "step": 49020 + }, + { + "epoch": 0.7, + "grad_norm": 0.5625, + "learning_rate": 4.902463252547217e-05, + "loss": 0.9195, + "step": 49025 + }, + { + "epoch": 0.7, + "grad_norm": 0.62109375, + "learning_rate": 4.90030949682827e-05, + "loss": 0.9347, + "step": 49030 + }, + { + "epoch": 0.7, + "grad_norm": 0.58984375, + "learning_rate": 4.898156060763737e-05, + "loss": 0.9197, + "step": 49035 + }, + { + "epoch": 0.7, + "grad_norm": 0.63671875, + "learning_rate": 4.896002944488593e-05, + "loss": 0.8434, + "step": 49040 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 4.893850148137792e-05, + "loss": 0.8596, + "step": 49045 + }, + { + "epoch": 0.7, + "grad_norm": 0.59375, + "learning_rate": 4.891697671846279e-05, + "loss": 0.9378, + "step": 49050 + }, + { + "epoch": 0.7, + "grad_norm": 0.62890625, + "learning_rate": 4.889545515748979e-05, + "loss": 0.9958, + "step": 49055 + }, + { + "epoch": 0.7, + "grad_norm": 0.498046875, + "learning_rate": 4.8873936799807816e-05, + "loss": 0.8579, + "step": 49060 + }, + { + "epoch": 0.7, + "grad_norm": 0.50390625, + "learning_rate": 4.885242164676571e-05, + "loss": 0.8905, + "step": 49065 + }, + { + "epoch": 0.7, + "grad_norm": 0.57421875, + "learning_rate": 4.883090969971211e-05, + "loss": 0.9437, + "step": 49070 + }, + { + "epoch": 0.7, + "grad_norm": 0.494140625, + "learning_rate": 4.8809400959995335e-05, + "loss": 0.949, + "step": 49075 + }, + { + "epoch": 0.7, + "grad_norm": 0.53125, + "learning_rate": 4.87878954289636e-05, + "loss": 0.9098, + "step": 49080 + }, + { + "epoch": 0.7, + "grad_norm": 0.5546875, + "learning_rate": 4.876639310796495e-05, + "loss": 0.94, + "step": 49085 + }, + { + "epoch": 0.7, + "grad_norm": 0.52734375, + "learning_rate": 4.874489399834712e-05, + "loss": 0.9608, + "step": 49090 + }, + { + "epoch": 0.7, + "grad_norm": 0.609375, + "learning_rate": 4.872339810145767e-05, + "loss": 1.0145, + "step": 49095 + }, + { + "epoch": 0.7, + "grad_norm": 0.6171875, + "learning_rate": 4.8701905418644044e-05, + "loss": 1.0344, + "step": 49100 + }, + { + "epoch": 0.7, + "grad_norm": 0.5546875, + "learning_rate": 4.8680415951253436e-05, + "loss": 0.9625, + "step": 49105 + }, + { + "epoch": 0.7, + "grad_norm": 0.63671875, + "learning_rate": 4.865892970063276e-05, + "loss": 0.8345, + "step": 49110 + }, + { + "epoch": 0.7, + "grad_norm": 0.6640625, + "learning_rate": 4.863744666812885e-05, + "loss": 1.032, + "step": 49115 + }, + { + "epoch": 0.7, + "grad_norm": 0.71875, + "learning_rate": 4.861596685508832e-05, + "loss": 1.0246, + "step": 49120 + }, + { + "epoch": 0.7, + "grad_norm": 0.51171875, + "learning_rate": 4.8594490262857474e-05, + "loss": 1.0238, + "step": 49125 + }, + { + "epoch": 0.7, + "grad_norm": 0.61328125, + "learning_rate": 4.857301689278252e-05, + "loss": 1.1131, + "step": 49130 + }, + { + "epoch": 0.7, + "grad_norm": 0.6640625, + "learning_rate": 4.8551546746209474e-05, + "loss": 1.04, + "step": 49135 + }, + { + "epoch": 0.7, + "grad_norm": 0.6328125, + "learning_rate": 4.853007982448408e-05, + "loss": 0.9312, + "step": 49140 + }, + { + "epoch": 0.7, + "grad_norm": 0.53515625, + "learning_rate": 4.8508616128951844e-05, + "loss": 1.0589, + "step": 49145 + }, + { + "epoch": 0.71, + "grad_norm": 0.58203125, + "learning_rate": 4.848715566095825e-05, + "loss": 0.9474, + "step": 49150 + }, + { + "epoch": 0.71, + "grad_norm": 0.578125, + "learning_rate": 4.846569842184836e-05, + "loss": 1.0166, + "step": 49155 + }, + { + "epoch": 0.71, + "grad_norm": 0.57421875, + "learning_rate": 4.8444244412967186e-05, + "loss": 1.1753, + "step": 49160 + }, + { + "epoch": 0.71, + "grad_norm": 0.56640625, + "learning_rate": 4.8422793635659535e-05, + "loss": 0.9829, + "step": 49165 + }, + { + "epoch": 0.71, + "grad_norm": 0.51171875, + "learning_rate": 4.8401346091269887e-05, + "loss": 0.89, + "step": 49170 + }, + { + "epoch": 0.71, + "grad_norm": 0.5390625, + "learning_rate": 4.837990178114261e-05, + "loss": 0.9116, + "step": 49175 + }, + { + "epoch": 0.71, + "grad_norm": 0.6328125, + "learning_rate": 4.8358460706621934e-05, + "loss": 0.8536, + "step": 49180 + }, + { + "epoch": 0.71, + "grad_norm": 0.57421875, + "learning_rate": 4.833702286905172e-05, + "loss": 0.9304, + "step": 49185 + }, + { + "epoch": 0.71, + "grad_norm": 0.5390625, + "learning_rate": 4.831558826977578e-05, + "loss": 1.0011, + "step": 49190 + }, + { + "epoch": 0.71, + "grad_norm": 0.5078125, + "learning_rate": 4.82941569101376e-05, + "loss": 0.807, + "step": 49195 + }, + { + "epoch": 0.71, + "grad_norm": 0.54296875, + "learning_rate": 4.827272879148058e-05, + "loss": 0.8885, + "step": 49200 + }, + { + "epoch": 0.71, + "grad_norm": 0.5703125, + "learning_rate": 4.8251303915147805e-05, + "loss": 0.8502, + "step": 49205 + }, + { + "epoch": 0.71, + "grad_norm": 0.515625, + "learning_rate": 4.822988228248224e-05, + "loss": 1.0452, + "step": 49210 + }, + { + "epoch": 0.71, + "grad_norm": 0.515625, + "learning_rate": 4.8208463894826635e-05, + "loss": 0.9482, + "step": 49215 + }, + { + "epoch": 0.71, + "grad_norm": 0.5859375, + "learning_rate": 4.818704875352348e-05, + "loss": 0.8321, + "step": 49220 + }, + { + "epoch": 0.71, + "grad_norm": 0.58203125, + "learning_rate": 4.816563685991511e-05, + "loss": 1.0368, + "step": 49225 + }, + { + "epoch": 0.71, + "grad_norm": 0.52734375, + "learning_rate": 4.81442282153437e-05, + "loss": 0.9997, + "step": 49230 + }, + { + "epoch": 0.71, + "grad_norm": 0.5234375, + "learning_rate": 4.812282282115108e-05, + "loss": 0.9241, + "step": 49235 + }, + { + "epoch": 0.71, + "grad_norm": 0.54296875, + "learning_rate": 4.810142067867901e-05, + "loss": 1.0477, + "step": 49240 + }, + { + "epoch": 0.71, + "grad_norm": 0.55078125, + "learning_rate": 4.808002178926908e-05, + "loss": 0.9772, + "step": 49245 + }, + { + "epoch": 0.71, + "grad_norm": 0.546875, + "learning_rate": 4.805862615426243e-05, + "loss": 0.9679, + "step": 49250 + }, + { + "epoch": 0.71, + "grad_norm": 0.51953125, + "learning_rate": 4.8037233775000254e-05, + "loss": 0.8722, + "step": 49255 + }, + { + "epoch": 0.71, + "grad_norm": 0.57421875, + "learning_rate": 4.8015844652823494e-05, + "loss": 1.0573, + "step": 49260 + }, + { + "epoch": 0.71, + "grad_norm": 0.55859375, + "learning_rate": 4.799445878907275e-05, + "loss": 0.9677, + "step": 49265 + }, + { + "epoch": 0.71, + "grad_norm": 0.53515625, + "learning_rate": 4.797307618508855e-05, + "loss": 0.939, + "step": 49270 + }, + { + "epoch": 0.71, + "grad_norm": 0.609375, + "learning_rate": 4.7951696842211235e-05, + "loss": 0.9858, + "step": 49275 + }, + { + "epoch": 0.71, + "grad_norm": 0.51953125, + "learning_rate": 4.79303207617808e-05, + "loss": 1.0011, + "step": 49280 + }, + { + "epoch": 0.71, + "grad_norm": 0.59375, + "learning_rate": 4.7908947945137164e-05, + "loss": 0.8575, + "step": 49285 + }, + { + "epoch": 0.71, + "grad_norm": 0.51953125, + "learning_rate": 4.788757839362004e-05, + "loss": 0.9424, + "step": 49290 + }, + { + "epoch": 0.71, + "grad_norm": 0.53515625, + "learning_rate": 4.786621210856884e-05, + "loss": 0.9988, + "step": 49295 + }, + { + "epoch": 0.71, + "grad_norm": 0.53515625, + "learning_rate": 4.784484909132281e-05, + "loss": 0.8938, + "step": 49300 + }, + { + "epoch": 0.71, + "grad_norm": 0.60546875, + "learning_rate": 4.782348934322104e-05, + "loss": 0.9112, + "step": 49305 + }, + { + "epoch": 0.71, + "grad_norm": 0.5546875, + "learning_rate": 4.7802132865602425e-05, + "loss": 0.9679, + "step": 49310 + }, + { + "epoch": 0.71, + "grad_norm": 0.62109375, + "learning_rate": 4.778077965980552e-05, + "loss": 0.8787, + "step": 49315 + }, + { + "epoch": 0.71, + "grad_norm": 0.6015625, + "learning_rate": 4.775942972716883e-05, + "loss": 1.1053, + "step": 49320 + }, + { + "epoch": 0.71, + "grad_norm": 0.55859375, + "learning_rate": 4.773808306903061e-05, + "loss": 0.9912, + "step": 49325 + }, + { + "epoch": 0.71, + "grad_norm": 0.59765625, + "learning_rate": 4.771673968672883e-05, + "loss": 1.0677, + "step": 49330 + }, + { + "epoch": 0.71, + "grad_norm": 0.5390625, + "learning_rate": 4.769539958160135e-05, + "loss": 0.8735, + "step": 49335 + }, + { + "epoch": 0.71, + "grad_norm": 0.578125, + "learning_rate": 4.767406275498584e-05, + "loss": 1.1303, + "step": 49340 + }, + { + "epoch": 0.71, + "grad_norm": 0.578125, + "learning_rate": 4.765272920821963e-05, + "loss": 0.9533, + "step": 49345 + }, + { + "epoch": 0.71, + "grad_norm": 0.5546875, + "learning_rate": 4.763139894264e-05, + "loss": 0.97, + "step": 49350 + }, + { + "epoch": 0.71, + "grad_norm": 0.55859375, + "learning_rate": 4.7610071959583935e-05, + "loss": 0.9927, + "step": 49355 + }, + { + "epoch": 0.71, + "grad_norm": 0.55078125, + "learning_rate": 4.758874826038819e-05, + "loss": 0.9571, + "step": 49360 + }, + { + "epoch": 0.71, + "grad_norm": 0.5234375, + "learning_rate": 4.75674278463894e-05, + "loss": 0.8435, + "step": 49365 + }, + { + "epoch": 0.71, + "grad_norm": 0.6015625, + "learning_rate": 4.7546110718923974e-05, + "loss": 1.0449, + "step": 49370 + }, + { + "epoch": 0.71, + "grad_norm": 0.5234375, + "learning_rate": 4.7524796879328035e-05, + "loss": 0.9614, + "step": 49375 + }, + { + "epoch": 0.71, + "grad_norm": 0.57421875, + "learning_rate": 4.750348632893759e-05, + "loss": 0.9131, + "step": 49380 + }, + { + "epoch": 0.71, + "grad_norm": 0.5703125, + "learning_rate": 4.748217906908846e-05, + "loss": 0.9521, + "step": 49385 + }, + { + "epoch": 0.71, + "grad_norm": 0.6015625, + "learning_rate": 4.7460875101116106e-05, + "loss": 0.9026, + "step": 49390 + }, + { + "epoch": 0.71, + "grad_norm": 0.56640625, + "learning_rate": 4.7439574426355946e-05, + "loss": 0.9808, + "step": 49395 + }, + { + "epoch": 0.71, + "grad_norm": 0.65234375, + "learning_rate": 4.741827704614317e-05, + "loss": 0.8594, + "step": 49400 + }, + { + "epoch": 0.71, + "grad_norm": 0.54296875, + "learning_rate": 4.739698296181265e-05, + "loss": 0.9514, + "step": 49405 + }, + { + "epoch": 0.71, + "grad_norm": 0.53515625, + "learning_rate": 4.737569217469913e-05, + "loss": 0.8326, + "step": 49410 + }, + { + "epoch": 0.71, + "grad_norm": 0.625, + "learning_rate": 4.7354404686137155e-05, + "loss": 1.1199, + "step": 49415 + }, + { + "epoch": 0.71, + "grad_norm": 0.53515625, + "learning_rate": 4.733312049746108e-05, + "loss": 0.9097, + "step": 49420 + }, + { + "epoch": 0.71, + "grad_norm": 0.546875, + "learning_rate": 4.731183961000496e-05, + "loss": 0.8891, + "step": 49425 + }, + { + "epoch": 0.71, + "grad_norm": 0.498046875, + "learning_rate": 4.729056202510275e-05, + "loss": 0.934, + "step": 49430 + }, + { + "epoch": 0.71, + "grad_norm": 0.55859375, + "learning_rate": 4.726928774408818e-05, + "loss": 0.8811, + "step": 49435 + }, + { + "epoch": 0.71, + "grad_norm": 0.59765625, + "learning_rate": 4.724801676829466e-05, + "loss": 1.2237, + "step": 49440 + }, + { + "epoch": 0.71, + "grad_norm": 0.5546875, + "learning_rate": 4.7226749099055537e-05, + "loss": 0.9113, + "step": 49445 + }, + { + "epoch": 0.71, + "grad_norm": 0.515625, + "learning_rate": 4.720548473770392e-05, + "loss": 0.9965, + "step": 49450 + }, + { + "epoch": 0.71, + "grad_norm": 0.5703125, + "learning_rate": 4.718422368557265e-05, + "loss": 0.9313, + "step": 49455 + }, + { + "epoch": 0.71, + "grad_norm": 0.58984375, + "learning_rate": 4.716296594399434e-05, + "loss": 1.0277, + "step": 49460 + }, + { + "epoch": 0.71, + "grad_norm": 0.5703125, + "learning_rate": 4.714171151430156e-05, + "loss": 1.0236, + "step": 49465 + }, + { + "epoch": 0.71, + "grad_norm": 0.64453125, + "learning_rate": 4.712046039782645e-05, + "loss": 0.8902, + "step": 49470 + }, + { + "epoch": 0.71, + "grad_norm": 0.578125, + "learning_rate": 4.709921259590111e-05, + "loss": 0.8605, + "step": 49475 + }, + { + "epoch": 0.71, + "grad_norm": 0.55859375, + "learning_rate": 4.707796810985742e-05, + "loss": 0.9146, + "step": 49480 + }, + { + "epoch": 0.71, + "grad_norm": 0.62890625, + "learning_rate": 4.705672694102691e-05, + "loss": 1.152, + "step": 49485 + }, + { + "epoch": 0.71, + "grad_norm": 0.4765625, + "learning_rate": 4.703548909074106e-05, + "loss": 0.9783, + "step": 49490 + }, + { + "epoch": 0.71, + "grad_norm": 0.7109375, + "learning_rate": 4.7014254560331115e-05, + "loss": 0.9032, + "step": 49495 + }, + { + "epoch": 0.71, + "grad_norm": 0.58984375, + "learning_rate": 4.699302335112799e-05, + "loss": 0.9469, + "step": 49500 + }, + { + "epoch": 0.71, + "grad_norm": 0.59765625, + "learning_rate": 4.6971795464462584e-05, + "loss": 0.9139, + "step": 49505 + }, + { + "epoch": 0.71, + "grad_norm": 0.546875, + "learning_rate": 4.695057090166539e-05, + "loss": 0.852, + "step": 49510 + }, + { + "epoch": 0.71, + "grad_norm": 0.5859375, + "learning_rate": 4.692934966406687e-05, + "loss": 0.9828, + "step": 49515 + }, + { + "epoch": 0.71, + "grad_norm": 0.6328125, + "learning_rate": 4.690813175299712e-05, + "loss": 0.9554, + "step": 49520 + }, + { + "epoch": 0.71, + "grad_norm": 0.625, + "learning_rate": 4.688691716978613e-05, + "loss": 0.9984, + "step": 49525 + }, + { + "epoch": 0.71, + "grad_norm": 0.5546875, + "learning_rate": 4.68657059157637e-05, + "loss": 0.9265, + "step": 49530 + }, + { + "epoch": 0.71, + "grad_norm": 0.5625, + "learning_rate": 4.684449799225931e-05, + "loss": 0.9029, + "step": 49535 + }, + { + "epoch": 0.71, + "grad_norm": 0.66796875, + "learning_rate": 4.682329340060232e-05, + "loss": 0.9228, + "step": 49540 + }, + { + "epoch": 0.71, + "grad_norm": 0.6015625, + "learning_rate": 4.68020921421219e-05, + "loss": 0.9272, + "step": 49545 + }, + { + "epoch": 0.71, + "grad_norm": 0.5390625, + "learning_rate": 4.6780894218146886e-05, + "loss": 0.8161, + "step": 49550 + }, + { + "epoch": 0.71, + "grad_norm": 0.5859375, + "learning_rate": 4.6759699630006046e-05, + "loss": 0.9438, + "step": 49555 + }, + { + "epoch": 0.71, + "grad_norm": 0.546875, + "learning_rate": 4.67385083790279e-05, + "loss": 0.8106, + "step": 49560 + }, + { + "epoch": 0.71, + "grad_norm": 0.52734375, + "learning_rate": 4.6717320466540715e-05, + "loss": 0.9336, + "step": 49565 + }, + { + "epoch": 0.71, + "grad_norm": 0.51953125, + "learning_rate": 4.6696135893872525e-05, + "loss": 0.9585, + "step": 49570 + }, + { + "epoch": 0.71, + "grad_norm": 0.6015625, + "learning_rate": 4.667495466235125e-05, + "loss": 0.9121, + "step": 49575 + }, + { + "epoch": 0.71, + "grad_norm": 0.6171875, + "learning_rate": 4.665377677330458e-05, + "loss": 1.0132, + "step": 49580 + }, + { + "epoch": 0.71, + "grad_norm": 0.53515625, + "learning_rate": 4.66326022280599e-05, + "loss": 1.0023, + "step": 49585 + }, + { + "epoch": 0.71, + "grad_norm": 0.53125, + "learning_rate": 4.661143102794453e-05, + "loss": 0.9591, + "step": 49590 + }, + { + "epoch": 0.71, + "grad_norm": 0.55859375, + "learning_rate": 4.659026317428543e-05, + "loss": 0.8945, + "step": 49595 + }, + { + "epoch": 0.71, + "grad_norm": 0.54296875, + "learning_rate": 4.656909866840947e-05, + "loss": 0.9979, + "step": 49600 + }, + { + "epoch": 0.71, + "grad_norm": 0.578125, + "learning_rate": 4.654793751164329e-05, + "loss": 0.7919, + "step": 49605 + }, + { + "epoch": 0.71, + "grad_norm": 0.578125, + "learning_rate": 4.652677970531323e-05, + "loss": 1.1003, + "step": 49610 + }, + { + "epoch": 0.71, + "grad_norm": 0.640625, + "learning_rate": 4.6505625250745557e-05, + "loss": 1.0737, + "step": 49615 + }, + { + "epoch": 0.71, + "grad_norm": 0.46484375, + "learning_rate": 4.6484474149266166e-05, + "loss": 0.842, + "step": 49620 + }, + { + "epoch": 0.71, + "grad_norm": 0.5234375, + "learning_rate": 4.6463326402200934e-05, + "loss": 1.0013, + "step": 49625 + }, + { + "epoch": 0.71, + "grad_norm": 0.466796875, + "learning_rate": 4.6442182010875345e-05, + "loss": 0.8792, + "step": 49630 + }, + { + "epoch": 0.71, + "grad_norm": 0.515625, + "learning_rate": 4.642104097661477e-05, + "loss": 0.7644, + "step": 49635 + }, + { + "epoch": 0.71, + "grad_norm": 0.53125, + "learning_rate": 4.639990330074442e-05, + "loss": 0.9765, + "step": 49640 + }, + { + "epoch": 0.71, + "grad_norm": 0.5625, + "learning_rate": 4.637876898458913e-05, + "loss": 0.8962, + "step": 49645 + }, + { + "epoch": 0.71, + "grad_norm": 0.53515625, + "learning_rate": 4.635763802947367e-05, + "loss": 0.9404, + "step": 49650 + }, + { + "epoch": 0.71, + "grad_norm": 0.48828125, + "learning_rate": 4.633651043672259e-05, + "loss": 0.9556, + "step": 49655 + }, + { + "epoch": 0.71, + "grad_norm": 0.54296875, + "learning_rate": 4.631538620766012e-05, + "loss": 0.8018, + "step": 49660 + }, + { + "epoch": 0.71, + "grad_norm": 0.64453125, + "learning_rate": 4.629426534361042e-05, + "loss": 0.9562, + "step": 49665 + }, + { + "epoch": 0.71, + "grad_norm": 0.59765625, + "learning_rate": 4.627314784589729e-05, + "loss": 0.8734, + "step": 49670 + }, + { + "epoch": 0.71, + "grad_norm": 0.60546875, + "learning_rate": 4.625203371584449e-05, + "loss": 0.9724, + "step": 49675 + }, + { + "epoch": 0.71, + "grad_norm": 0.59375, + "learning_rate": 4.6230922954775394e-05, + "loss": 0.9479, + "step": 49680 + }, + { + "epoch": 0.71, + "grad_norm": 0.57421875, + "learning_rate": 4.620981556401327e-05, + "loss": 0.995, + "step": 49685 + }, + { + "epoch": 0.71, + "grad_norm": 0.63671875, + "learning_rate": 4.6188711544881215e-05, + "loss": 0.8704, + "step": 49690 + }, + { + "epoch": 0.71, + "grad_norm": 0.5625, + "learning_rate": 4.616761089870197e-05, + "loss": 0.8862, + "step": 49695 + }, + { + "epoch": 0.71, + "grad_norm": 0.57421875, + "learning_rate": 4.6146513626798186e-05, + "loss": 0.9441, + "step": 49700 + }, + { + "epoch": 0.71, + "grad_norm": 0.60546875, + "learning_rate": 4.612541973049229e-05, + "loss": 0.8817, + "step": 49705 + }, + { + "epoch": 0.71, + "grad_norm": 0.51953125, + "learning_rate": 4.6104329211106426e-05, + "loss": 0.9116, + "step": 49710 + }, + { + "epoch": 0.71, + "grad_norm": 0.51953125, + "learning_rate": 4.6083242069962564e-05, + "loss": 0.9826, + "step": 49715 + }, + { + "epoch": 0.71, + "grad_norm": 0.53125, + "learning_rate": 4.6062158308382584e-05, + "loss": 0.9834, + "step": 49720 + }, + { + "epoch": 0.71, + "grad_norm": 0.5859375, + "learning_rate": 4.604107792768787e-05, + "loss": 0.8969, + "step": 49725 + }, + { + "epoch": 0.71, + "grad_norm": 0.52734375, + "learning_rate": 4.6020000929199856e-05, + "loss": 0.9264, + "step": 49730 + }, + { + "epoch": 0.71, + "grad_norm": 0.5546875, + "learning_rate": 4.5998927314239694e-05, + "loss": 0.9001, + "step": 49735 + }, + { + "epoch": 0.71, + "grad_norm": 0.62890625, + "learning_rate": 4.597785708412823e-05, + "loss": 1.0587, + "step": 49740 + }, + { + "epoch": 0.71, + "grad_norm": 0.53125, + "learning_rate": 4.595679024018621e-05, + "loss": 0.914, + "step": 49745 + }, + { + "epoch": 0.71, + "grad_norm": 0.53515625, + "learning_rate": 4.593572678373417e-05, + "loss": 0.9823, + "step": 49750 + }, + { + "epoch": 0.71, + "grad_norm": 0.546875, + "learning_rate": 4.59146667160923e-05, + "loss": 0.9742, + "step": 49755 + }, + { + "epoch": 0.71, + "grad_norm": 0.640625, + "learning_rate": 4.589361003858072e-05, + "loss": 1.2027, + "step": 49760 + }, + { + "epoch": 0.71, + "grad_norm": 0.5078125, + "learning_rate": 4.5872556752519335e-05, + "loss": 1.0227, + "step": 49765 + }, + { + "epoch": 0.71, + "grad_norm": 0.54296875, + "learning_rate": 4.585150685922773e-05, + "loss": 0.9103, + "step": 49770 + }, + { + "epoch": 0.71, + "grad_norm": 0.60546875, + "learning_rate": 4.5830460360025315e-05, + "loss": 0.9096, + "step": 49775 + }, + { + "epoch": 0.71, + "grad_norm": 0.515625, + "learning_rate": 4.580941725623134e-05, + "loss": 1.0385, + "step": 49780 + }, + { + "epoch": 0.71, + "grad_norm": 0.53125, + "learning_rate": 4.578837754916483e-05, + "loss": 0.8463, + "step": 49785 + }, + { + "epoch": 0.71, + "grad_norm": 0.490234375, + "learning_rate": 4.576734124014454e-05, + "loss": 0.9174, + "step": 49790 + }, + { + "epoch": 0.71, + "grad_norm": 0.53515625, + "learning_rate": 4.574630833048905e-05, + "loss": 0.9285, + "step": 49795 + }, + { + "epoch": 0.71, + "grad_norm": 0.5546875, + "learning_rate": 4.572527882151678e-05, + "loss": 0.95, + "step": 49800 + }, + { + "epoch": 0.71, + "grad_norm": 0.5546875, + "learning_rate": 4.5704252714545824e-05, + "loss": 0.9859, + "step": 49805 + }, + { + "epoch": 0.71, + "grad_norm": 0.55859375, + "learning_rate": 4.5683230010894143e-05, + "loss": 1.0285, + "step": 49810 + }, + { + "epoch": 0.71, + "grad_norm": 0.72265625, + "learning_rate": 4.5662210711879495e-05, + "loss": 0.9972, + "step": 49815 + }, + { + "epoch": 0.71, + "grad_norm": 0.51171875, + "learning_rate": 4.564119481881933e-05, + "loss": 0.925, + "step": 49820 + }, + { + "epoch": 0.71, + "grad_norm": 0.5859375, + "learning_rate": 4.5620182333031025e-05, + "loss": 0.8579, + "step": 49825 + }, + { + "epoch": 0.71, + "grad_norm": 0.5078125, + "learning_rate": 4.559917325583162e-05, + "loss": 0.8751, + "step": 49830 + }, + { + "epoch": 0.71, + "grad_norm": 0.57421875, + "learning_rate": 4.557816758853796e-05, + "loss": 0.9685, + "step": 49835 + }, + { + "epoch": 0.71, + "grad_norm": 0.55859375, + "learning_rate": 4.5557165332466736e-05, + "loss": 0.9727, + "step": 49840 + }, + { + "epoch": 0.72, + "grad_norm": 0.5234375, + "learning_rate": 4.553616648893443e-05, + "loss": 1.0464, + "step": 49845 + }, + { + "epoch": 0.72, + "grad_norm": 0.5390625, + "learning_rate": 4.55151710592572e-05, + "loss": 0.9426, + "step": 49850 + }, + { + "epoch": 0.72, + "grad_norm": 0.62109375, + "learning_rate": 4.549417904475111e-05, + "loss": 0.8065, + "step": 49855 + }, + { + "epoch": 0.72, + "grad_norm": 0.57421875, + "learning_rate": 4.547319044673197e-05, + "loss": 0.9354, + "step": 49860 + }, + { + "epoch": 0.72, + "grad_norm": 0.4609375, + "learning_rate": 4.545220526651534e-05, + "loss": 0.825, + "step": 49865 + }, + { + "epoch": 0.72, + "grad_norm": 0.55859375, + "learning_rate": 4.54312235054166e-05, + "loss": 0.8915, + "step": 49870 + }, + { + "epoch": 0.72, + "grad_norm": 0.51171875, + "learning_rate": 4.5410245164750955e-05, + "loss": 0.8697, + "step": 49875 + }, + { + "epoch": 0.72, + "grad_norm": 0.435546875, + "learning_rate": 4.5389270245833316e-05, + "loss": 0.884, + "step": 49880 + }, + { + "epoch": 0.72, + "grad_norm": 0.57421875, + "learning_rate": 4.536829874997838e-05, + "loss": 0.9207, + "step": 49885 + }, + { + "epoch": 0.72, + "grad_norm": 0.5234375, + "learning_rate": 4.5347330678500686e-05, + "loss": 0.9551, + "step": 49890 + }, + { + "epoch": 0.72, + "grad_norm": 0.59375, + "learning_rate": 4.5326366032714595e-05, + "loss": 0.9075, + "step": 49895 + }, + { + "epoch": 0.72, + "grad_norm": 0.56640625, + "learning_rate": 4.530540481393412e-05, + "loss": 0.8319, + "step": 49900 + }, + { + "epoch": 0.72, + "grad_norm": 0.53125, + "learning_rate": 4.528444702347315e-05, + "loss": 0.8604, + "step": 49905 + }, + { + "epoch": 0.72, + "grad_norm": 0.52734375, + "learning_rate": 4.5263492662645404e-05, + "loss": 0.8437, + "step": 49910 + }, + { + "epoch": 0.72, + "grad_norm": 0.65625, + "learning_rate": 4.524254173276423e-05, + "loss": 1.0757, + "step": 49915 + }, + { + "epoch": 0.72, + "grad_norm": 0.51171875, + "learning_rate": 4.522159423514292e-05, + "loss": 0.9885, + "step": 49920 + }, + { + "epoch": 0.72, + "grad_norm": 0.58203125, + "learning_rate": 4.5200650171094495e-05, + "loss": 1.059, + "step": 49925 + }, + { + "epoch": 0.72, + "grad_norm": 0.55859375, + "learning_rate": 4.517970954193172e-05, + "loss": 0.9149, + "step": 49930 + }, + { + "epoch": 0.72, + "grad_norm": 0.5859375, + "learning_rate": 4.515877234896717e-05, + "loss": 0.8068, + "step": 49935 + }, + { + "epoch": 0.72, + "grad_norm": 0.53515625, + "learning_rate": 4.5137838593513246e-05, + "loss": 0.9659, + "step": 49940 + }, + { + "epoch": 0.72, + "grad_norm": 0.50390625, + "learning_rate": 4.5116908276882064e-05, + "loss": 0.9508, + "step": 49945 + }, + { + "epoch": 0.72, + "grad_norm": 0.58984375, + "learning_rate": 4.509598140038557e-05, + "loss": 0.9591, + "step": 49950 + }, + { + "epoch": 0.72, + "grad_norm": 0.55859375, + "learning_rate": 4.507505796533552e-05, + "loss": 0.9213, + "step": 49955 + }, + { + "epoch": 0.72, + "grad_norm": 0.67578125, + "learning_rate": 4.505413797304336e-05, + "loss": 0.8213, + "step": 49960 + }, + { + "epoch": 0.72, + "grad_norm": 0.515625, + "learning_rate": 4.5033221424820415e-05, + "loss": 1.0068, + "step": 49965 + }, + { + "epoch": 0.72, + "grad_norm": 0.49609375, + "learning_rate": 4.501230832197778e-05, + "loss": 0.8619, + "step": 49970 + }, + { + "epoch": 0.72, + "grad_norm": 0.62109375, + "learning_rate": 4.499139866582626e-05, + "loss": 1.0136, + "step": 49975 + }, + { + "epoch": 0.72, + "grad_norm": 0.59765625, + "learning_rate": 4.497049245767655e-05, + "loss": 1.099, + "step": 49980 + }, + { + "epoch": 0.72, + "grad_norm": 0.56640625, + "learning_rate": 4.4949589698839015e-05, + "loss": 0.9329, + "step": 49985 + }, + { + "epoch": 0.72, + "grad_norm": 0.52734375, + "learning_rate": 4.4928690390623916e-05, + "loss": 1.0729, + "step": 49990 + }, + { + "epoch": 0.72, + "grad_norm": 0.5, + "learning_rate": 4.490779453434121e-05, + "loss": 0.8685, + "step": 49995 + }, + { + "epoch": 0.72, + "grad_norm": 0.65625, + "learning_rate": 4.488690213130067e-05, + "loss": 1.0066, + "step": 50000 + }, + { + "epoch": 0.72, + "grad_norm": 0.53125, + "learning_rate": 4.486601318281192e-05, + "loss": 0.9442, + "step": 50005 + }, + { + "epoch": 0.72, + "grad_norm": 0.6640625, + "learning_rate": 4.484512769018422e-05, + "loss": 1.0699, + "step": 50010 + }, + { + "epoch": 0.72, + "grad_norm": 0.51953125, + "learning_rate": 4.482424565472673e-05, + "loss": 0.9667, + "step": 50015 + }, + { + "epoch": 0.72, + "grad_norm": 0.5625, + "learning_rate": 4.480336707774839e-05, + "loss": 1.105, + "step": 50020 + }, + { + "epoch": 0.72, + "grad_norm": 0.56640625, + "learning_rate": 4.4782491960557835e-05, + "loss": 1.1043, + "step": 50025 + }, + { + "epoch": 0.72, + "grad_norm": 0.54296875, + "learning_rate": 4.476162030446357e-05, + "loss": 0.9254, + "step": 50030 + }, + { + "epoch": 0.72, + "grad_norm": 0.58203125, + "learning_rate": 4.474075211077393e-05, + "loss": 1.0297, + "step": 50035 + }, + { + "epoch": 0.72, + "grad_norm": 0.53125, + "learning_rate": 4.4719887380796824e-05, + "loss": 0.9864, + "step": 50040 + }, + { + "epoch": 0.72, + "grad_norm": 0.58203125, + "learning_rate": 4.4699026115840115e-05, + "loss": 0.8713, + "step": 50045 + }, + { + "epoch": 0.72, + "grad_norm": 0.50390625, + "learning_rate": 4.4678168317211475e-05, + "loss": 1.0196, + "step": 50050 + }, + { + "epoch": 0.72, + "grad_norm": 0.56640625, + "learning_rate": 4.465731398621822e-05, + "loss": 0.8765, + "step": 50055 + }, + { + "epoch": 0.72, + "grad_norm": 0.609375, + "learning_rate": 4.463646312416754e-05, + "loss": 0.9513, + "step": 50060 + }, + { + "epoch": 0.72, + "grad_norm": 0.6328125, + "learning_rate": 4.461561573236645e-05, + "loss": 0.9159, + "step": 50065 + }, + { + "epoch": 0.72, + "grad_norm": 0.52734375, + "learning_rate": 4.459477181212161e-05, + "loss": 0.9396, + "step": 50070 + }, + { + "epoch": 0.72, + "grad_norm": 0.53125, + "learning_rate": 4.457393136473957e-05, + "loss": 0.9387, + "step": 50075 + }, + { + "epoch": 0.72, + "grad_norm": 0.58984375, + "learning_rate": 4.4553094391526684e-05, + "loss": 0.9666, + "step": 50080 + }, + { + "epoch": 0.72, + "grad_norm": 0.578125, + "learning_rate": 4.453226089378898e-05, + "loss": 1.2035, + "step": 50085 + }, + { + "epoch": 0.72, + "grad_norm": 0.5625, + "learning_rate": 4.4511430872832294e-05, + "loss": 0.9999, + "step": 50090 + }, + { + "epoch": 0.72, + "grad_norm": 0.55078125, + "learning_rate": 4.449060432996233e-05, + "loss": 1.0427, + "step": 50095 + }, + { + "epoch": 0.72, + "grad_norm": 0.51953125, + "learning_rate": 4.4469781266484534e-05, + "loss": 0.9715, + "step": 50100 + }, + { + "epoch": 0.72, + "grad_norm": 0.60546875, + "learning_rate": 4.444896168370406e-05, + "loss": 1.0078, + "step": 50105 + }, + { + "epoch": 0.72, + "grad_norm": 0.6015625, + "learning_rate": 4.442814558292593e-05, + "loss": 1.0199, + "step": 50110 + }, + { + "epoch": 0.72, + "grad_norm": 0.55859375, + "learning_rate": 4.4407332965454975e-05, + "loss": 0.9279, + "step": 50115 + }, + { + "epoch": 0.72, + "grad_norm": 0.515625, + "learning_rate": 4.438652383259565e-05, + "loss": 0.8802, + "step": 50120 + }, + { + "epoch": 0.72, + "grad_norm": 0.58203125, + "learning_rate": 4.436571818565236e-05, + "loss": 1.0009, + "step": 50125 + }, + { + "epoch": 0.72, + "grad_norm": 0.50390625, + "learning_rate": 4.434491602592925e-05, + "loss": 0.9599, + "step": 50130 + }, + { + "epoch": 0.72, + "grad_norm": 0.56640625, + "learning_rate": 4.432411735473015e-05, + "loss": 0.9421, + "step": 50135 + }, + { + "epoch": 0.72, + "grad_norm": 0.51953125, + "learning_rate": 4.430332217335883e-05, + "loss": 0.9521, + "step": 50140 + }, + { + "epoch": 0.72, + "grad_norm": 0.51953125, + "learning_rate": 4.42825304831187e-05, + "loss": 0.9073, + "step": 50145 + }, + { + "epoch": 0.72, + "grad_norm": 0.5859375, + "learning_rate": 4.426174228531299e-05, + "loss": 1.0736, + "step": 50150 + }, + { + "epoch": 0.72, + "grad_norm": 0.53515625, + "learning_rate": 4.424095758124474e-05, + "loss": 0.9449, + "step": 50155 + }, + { + "epoch": 0.72, + "grad_norm": 0.58203125, + "learning_rate": 4.4220176372216826e-05, + "loss": 0.9873, + "step": 50160 + }, + { + "epoch": 0.72, + "grad_norm": 0.546875, + "learning_rate": 4.419939865953173e-05, + "loss": 0.9748, + "step": 50165 + }, + { + "epoch": 0.72, + "grad_norm": 0.3984375, + "learning_rate": 4.417862444449189e-05, + "loss": 0.883, + "step": 50170 + }, + { + "epoch": 0.72, + "grad_norm": 0.46484375, + "learning_rate": 4.4157853728399475e-05, + "loss": 0.8066, + "step": 50175 + }, + { + "epoch": 0.72, + "grad_norm": 0.5703125, + "learning_rate": 4.4137086512556356e-05, + "loss": 0.8805, + "step": 50180 + }, + { + "epoch": 0.72, + "grad_norm": 0.62109375, + "learning_rate": 4.4116322798264276e-05, + "loss": 0.8867, + "step": 50185 + }, + { + "epoch": 0.72, + "grad_norm": 0.55859375, + "learning_rate": 4.4095562586824766e-05, + "loss": 1.1039, + "step": 50190 + }, + { + "epoch": 0.72, + "grad_norm": 0.5625, + "learning_rate": 4.4074805879539064e-05, + "loss": 0.9521, + "step": 50195 + }, + { + "epoch": 0.72, + "grad_norm": 0.50390625, + "learning_rate": 4.405405267770818e-05, + "loss": 0.9552, + "step": 50200 + }, + { + "epoch": 0.72, + "grad_norm": 0.53125, + "learning_rate": 4.4033302982633e-05, + "loss": 0.9926, + "step": 50205 + }, + { + "epoch": 0.72, + "grad_norm": 0.55859375, + "learning_rate": 4.401255679561416e-05, + "loss": 1.058, + "step": 50210 + }, + { + "epoch": 0.72, + "grad_norm": 0.52734375, + "learning_rate": 4.3991814117951994e-05, + "loss": 0.8447, + "step": 50215 + }, + { + "epoch": 0.72, + "grad_norm": 0.6328125, + "learning_rate": 4.39710749509467e-05, + "loss": 1.0724, + "step": 50220 + }, + { + "epoch": 0.72, + "grad_norm": 0.66015625, + "learning_rate": 4.395033929589828e-05, + "loss": 1.06, + "step": 50225 + }, + { + "epoch": 0.72, + "grad_norm": 0.4765625, + "learning_rate": 4.392960715410639e-05, + "loss": 1.0209, + "step": 50230 + }, + { + "epoch": 0.72, + "grad_norm": 0.68359375, + "learning_rate": 4.3908878526870575e-05, + "loss": 0.9987, + "step": 50235 + }, + { + "epoch": 0.72, + "grad_norm": 0.5, + "learning_rate": 4.388815341549019e-05, + "loss": 0.966, + "step": 50240 + }, + { + "epoch": 0.72, + "grad_norm": 0.5, + "learning_rate": 4.386743182126424e-05, + "loss": 0.9384, + "step": 50245 + }, + { + "epoch": 0.72, + "grad_norm": 0.57421875, + "learning_rate": 4.384671374549156e-05, + "loss": 0.9489, + "step": 50250 + }, + { + "epoch": 0.72, + "grad_norm": 0.5234375, + "learning_rate": 4.382599918947082e-05, + "loss": 0.9616, + "step": 50255 + }, + { + "epoch": 0.72, + "grad_norm": 0.609375, + "learning_rate": 4.3805288154500456e-05, + "loss": 1.1693, + "step": 50260 + }, + { + "epoch": 0.72, + "grad_norm": 0.73046875, + "learning_rate": 4.3784580641878606e-05, + "loss": 1.0767, + "step": 50265 + }, + { + "epoch": 0.72, + "grad_norm": 0.5390625, + "learning_rate": 4.376387665290329e-05, + "loss": 0.9063, + "step": 50270 + }, + { + "epoch": 0.72, + "grad_norm": 0.55859375, + "learning_rate": 4.3743176188872205e-05, + "loss": 1.0128, + "step": 50275 + }, + { + "epoch": 0.72, + "grad_norm": 0.57421875, + "learning_rate": 4.37224792510829e-05, + "loss": 0.9175, + "step": 50280 + }, + { + "epoch": 0.72, + "grad_norm": 0.5390625, + "learning_rate": 4.370178584083274e-05, + "loss": 0.9061, + "step": 50285 + }, + { + "epoch": 0.72, + "grad_norm": 0.5546875, + "learning_rate": 4.368109595941872e-05, + "loss": 0.8362, + "step": 50290 + }, + { + "epoch": 0.72, + "grad_norm": 0.51953125, + "learning_rate": 4.366040960813778e-05, + "loss": 0.949, + "step": 50295 + }, + { + "epoch": 0.72, + "grad_norm": 0.5, + "learning_rate": 4.3639726788286495e-05, + "loss": 0.9306, + "step": 50300 + }, + { + "epoch": 0.72, + "grad_norm": 0.6796875, + "learning_rate": 4.361904750116136e-05, + "loss": 1.1314, + "step": 50305 + }, + { + "epoch": 0.72, + "grad_norm": 0.57421875, + "learning_rate": 4.359837174805851e-05, + "loss": 1.0325, + "step": 50310 + }, + { + "epoch": 0.72, + "grad_norm": 0.5390625, + "learning_rate": 4.357769953027394e-05, + "loss": 0.9517, + "step": 50315 + }, + { + "epoch": 0.72, + "grad_norm": 0.5390625, + "learning_rate": 4.3557030849103474e-05, + "loss": 0.8991, + "step": 50320 + }, + { + "epoch": 0.72, + "grad_norm": 0.54296875, + "learning_rate": 4.353636570584255e-05, + "loss": 0.9992, + "step": 50325 + }, + { + "epoch": 0.72, + "grad_norm": 0.55859375, + "learning_rate": 4.351570410178654e-05, + "loss": 0.8851, + "step": 50330 + }, + { + "epoch": 0.72, + "grad_norm": 0.55859375, + "learning_rate": 4.3495046038230556e-05, + "loss": 0.794, + "step": 50335 + }, + { + "epoch": 0.72, + "grad_norm": 0.5546875, + "learning_rate": 4.3474391516469416e-05, + "loss": 0.9003, + "step": 50340 + }, + { + "epoch": 0.72, + "grad_norm": 0.5078125, + "learning_rate": 4.345374053779778e-05, + "loss": 1.0992, + "step": 50345 + }, + { + "epoch": 0.72, + "grad_norm": 0.55078125, + "learning_rate": 4.343309310351013e-05, + "loss": 0.9804, + "step": 50350 + }, + { + "epoch": 0.72, + "grad_norm": 0.54296875, + "learning_rate": 4.341244921490063e-05, + "loss": 1.01, + "step": 50355 + }, + { + "epoch": 0.72, + "grad_norm": 0.62109375, + "learning_rate": 4.3391808873263215e-05, + "loss": 0.7897, + "step": 50360 + }, + { + "epoch": 0.72, + "grad_norm": 0.53125, + "learning_rate": 4.33711720798917e-05, + "loss": 0.9257, + "step": 50365 + }, + { + "epoch": 0.72, + "grad_norm": 0.51953125, + "learning_rate": 4.335053883607965e-05, + "loss": 1.0943, + "step": 50370 + }, + { + "epoch": 0.72, + "grad_norm": 0.52734375, + "learning_rate": 4.332990914312031e-05, + "loss": 1.0418, + "step": 50375 + }, + { + "epoch": 0.72, + "grad_norm": 0.6171875, + "learning_rate": 4.33092830023068e-05, + "loss": 0.913, + "step": 50380 + }, + { + "epoch": 0.72, + "grad_norm": 0.59375, + "learning_rate": 4.3288660414932034e-05, + "loss": 0.9002, + "step": 50385 + }, + { + "epoch": 0.72, + "grad_norm": 0.53125, + "learning_rate": 4.3268041382288594e-05, + "loss": 0.7632, + "step": 50390 + }, + { + "epoch": 0.72, + "grad_norm": 0.6015625, + "learning_rate": 4.324742590566893e-05, + "loss": 1.0351, + "step": 50395 + }, + { + "epoch": 0.72, + "grad_norm": 0.5859375, + "learning_rate": 4.322681398636531e-05, + "loss": 0.8847, + "step": 50400 + }, + { + "epoch": 0.72, + "grad_norm": 0.5234375, + "learning_rate": 4.3206205625669574e-05, + "loss": 1.0687, + "step": 50405 + }, + { + "epoch": 0.72, + "grad_norm": 0.57421875, + "learning_rate": 4.3185600824873565e-05, + "loss": 1.0468, + "step": 50410 + }, + { + "epoch": 0.72, + "grad_norm": 0.51953125, + "learning_rate": 4.316499958526883e-05, + "loss": 1.1039, + "step": 50415 + }, + { + "epoch": 0.72, + "grad_norm": 0.5546875, + "learning_rate": 4.3144401908146614e-05, + "loss": 0.9662, + "step": 50420 + }, + { + "epoch": 0.72, + "grad_norm": 0.62109375, + "learning_rate": 4.3123807794798046e-05, + "loss": 0.8069, + "step": 50425 + }, + { + "epoch": 0.72, + "grad_norm": 0.515625, + "learning_rate": 4.3103217246514015e-05, + "loss": 0.9225, + "step": 50430 + }, + { + "epoch": 0.72, + "grad_norm": 0.56640625, + "learning_rate": 4.308263026458509e-05, + "loss": 0.9417, + "step": 50435 + }, + { + "epoch": 0.72, + "grad_norm": 0.6328125, + "learning_rate": 4.3062046850301716e-05, + "loss": 0.9334, + "step": 50440 + }, + { + "epoch": 0.72, + "grad_norm": 0.515625, + "learning_rate": 4.304146700495414e-05, + "loss": 0.8333, + "step": 50445 + }, + { + "epoch": 0.72, + "grad_norm": 0.5625, + "learning_rate": 4.3020890729832244e-05, + "loss": 0.9604, + "step": 50450 + }, + { + "epoch": 0.72, + "grad_norm": 0.5234375, + "learning_rate": 4.300031802622585e-05, + "loss": 0.8044, + "step": 50455 + }, + { + "epoch": 0.72, + "grad_norm": 0.578125, + "learning_rate": 4.2979748895424396e-05, + "loss": 0.9168, + "step": 50460 + }, + { + "epoch": 0.72, + "grad_norm": 0.61328125, + "learning_rate": 4.2959183338717255e-05, + "loss": 0.9734, + "step": 50465 + }, + { + "epoch": 0.72, + "grad_norm": 0.5234375, + "learning_rate": 4.2938621357393436e-05, + "loss": 0.9601, + "step": 50470 + }, + { + "epoch": 0.72, + "grad_norm": 0.64453125, + "learning_rate": 4.291806295274182e-05, + "loss": 1.0021, + "step": 50475 + }, + { + "epoch": 0.72, + "grad_norm": 0.494140625, + "learning_rate": 4.289750812605107e-05, + "loss": 0.9122, + "step": 50480 + }, + { + "epoch": 0.72, + "grad_norm": 0.53125, + "learning_rate": 4.28769568786095e-05, + "loss": 0.8227, + "step": 50485 + }, + { + "epoch": 0.72, + "grad_norm": 0.54296875, + "learning_rate": 4.285640921170533e-05, + "loss": 0.9285, + "step": 50490 + }, + { + "epoch": 0.72, + "grad_norm": 0.62109375, + "learning_rate": 4.283586512662655e-05, + "loss": 0.8889, + "step": 50495 + }, + { + "epoch": 0.72, + "grad_norm": 0.6015625, + "learning_rate": 4.2815324624660814e-05, + "loss": 1.0344, + "step": 50500 + }, + { + "epoch": 0.72, + "grad_norm": 0.67578125, + "learning_rate": 4.279478770709569e-05, + "loss": 0.9646, + "step": 50505 + }, + { + "epoch": 0.72, + "grad_norm": 0.546875, + "learning_rate": 4.277425437521843e-05, + "loss": 0.842, + "step": 50510 + }, + { + "epoch": 0.72, + "grad_norm": 0.51953125, + "learning_rate": 4.275372463031604e-05, + "loss": 0.8737, + "step": 50515 + }, + { + "epoch": 0.72, + "grad_norm": 0.62109375, + "learning_rate": 4.273319847367539e-05, + "loss": 0.8028, + "step": 50520 + }, + { + "epoch": 0.72, + "grad_norm": 0.69921875, + "learning_rate": 4.271267590658311e-05, + "loss": 0.9467, + "step": 50525 + }, + { + "epoch": 0.72, + "grad_norm": 0.67578125, + "learning_rate": 4.269215693032552e-05, + "loss": 1.0773, + "step": 50530 + }, + { + "epoch": 0.72, + "grad_norm": 0.70703125, + "learning_rate": 4.26716415461888e-05, + "loss": 1.038, + "step": 50535 + }, + { + "epoch": 0.72, + "grad_norm": 0.59765625, + "learning_rate": 4.2651129755458916e-05, + "loss": 1.0356, + "step": 50540 + }, + { + "epoch": 0.73, + "grad_norm": 0.55078125, + "learning_rate": 4.26306215594215e-05, + "loss": 1.1298, + "step": 50545 + }, + { + "epoch": 0.73, + "grad_norm": 0.50390625, + "learning_rate": 4.2610116959362057e-05, + "loss": 0.9046, + "step": 50550 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 4.2589615956565885e-05, + "loss": 0.8925, + "step": 50555 + }, + { + "epoch": 0.73, + "grad_norm": 0.51171875, + "learning_rate": 4.256911855231798e-05, + "loss": 0.97, + "step": 50560 + }, + { + "epoch": 0.73, + "grad_norm": 0.6171875, + "learning_rate": 4.2548624747903076e-05, + "loss": 1.0284, + "step": 50565 + }, + { + "epoch": 0.73, + "grad_norm": 0.61328125, + "learning_rate": 4.2528134544605813e-05, + "loss": 1.0616, + "step": 50570 + }, + { + "epoch": 0.73, + "grad_norm": 0.61328125, + "learning_rate": 4.250764794371057e-05, + "loss": 0.9394, + "step": 50575 + }, + { + "epoch": 0.73, + "grad_norm": 0.5, + "learning_rate": 4.24871649465014e-05, + "loss": 0.9177, + "step": 50580 + }, + { + "epoch": 0.73, + "grad_norm": 0.60546875, + "learning_rate": 4.246668555426223e-05, + "loss": 0.965, + "step": 50585 + }, + { + "epoch": 0.73, + "grad_norm": 0.578125, + "learning_rate": 4.244620976827677e-05, + "loss": 0.9909, + "step": 50590 + }, + { + "epoch": 0.73, + "grad_norm": 0.6484375, + "learning_rate": 4.242573758982839e-05, + "loss": 1.0053, + "step": 50595 + }, + { + "epoch": 0.73, + "grad_norm": 0.5703125, + "learning_rate": 4.240526902020035e-05, + "loss": 0.9533, + "step": 50600 + }, + { + "epoch": 0.73, + "grad_norm": 0.5546875, + "learning_rate": 4.238480406067567e-05, + "loss": 0.9128, + "step": 50605 + }, + { + "epoch": 0.73, + "grad_norm": 0.609375, + "learning_rate": 4.236434271253711e-05, + "loss": 1.0219, + "step": 50610 + }, + { + "epoch": 0.73, + "grad_norm": 0.6328125, + "learning_rate": 4.234388497706715e-05, + "loss": 0.8261, + "step": 50615 + }, + { + "epoch": 0.73, + "grad_norm": 0.50390625, + "learning_rate": 4.2323430855548174e-05, + "loss": 0.8961, + "step": 50620 + }, + { + "epoch": 0.73, + "grad_norm": 1.03125, + "learning_rate": 4.23029803492622e-05, + "loss": 0.8444, + "step": 50625 + }, + { + "epoch": 0.73, + "grad_norm": 0.52734375, + "learning_rate": 4.228253345949114e-05, + "loss": 1.001, + "step": 50630 + }, + { + "epoch": 0.73, + "grad_norm": 0.61328125, + "learning_rate": 4.2262090187516644e-05, + "loss": 0.955, + "step": 50635 + }, + { + "epoch": 0.73, + "grad_norm": 0.5390625, + "learning_rate": 4.224165053462006e-05, + "loss": 0.8988, + "step": 50640 + }, + { + "epoch": 0.73, + "grad_norm": 0.48828125, + "learning_rate": 4.2221214502082594e-05, + "loss": 0.8975, + "step": 50645 + }, + { + "epoch": 0.73, + "grad_norm": 0.6171875, + "learning_rate": 4.220078209118525e-05, + "loss": 0.9368, + "step": 50650 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 4.218035330320869e-05, + "loss": 0.8767, + "step": 50655 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 4.2159928139433427e-05, + "loss": 1.0704, + "step": 50660 + }, + { + "epoch": 0.73, + "grad_norm": 0.73046875, + "learning_rate": 4.2139506601139766e-05, + "loss": 0.8408, + "step": 50665 + }, + { + "epoch": 0.73, + "grad_norm": 0.5625, + "learning_rate": 4.211908868960774e-05, + "loss": 0.9176, + "step": 50670 + }, + { + "epoch": 0.73, + "grad_norm": 0.546875, + "learning_rate": 4.209867440611712e-05, + "loss": 0.9723, + "step": 50675 + }, + { + "epoch": 0.73, + "grad_norm": 0.58984375, + "learning_rate": 4.2078263751947535e-05, + "loss": 0.9628, + "step": 50680 + }, + { + "epoch": 0.73, + "grad_norm": 0.5703125, + "learning_rate": 4.205785672837837e-05, + "loss": 1.221, + "step": 50685 + }, + { + "epoch": 0.73, + "grad_norm": 0.54296875, + "learning_rate": 4.2037453336688715e-05, + "loss": 0.9129, + "step": 50690 + }, + { + "epoch": 0.73, + "grad_norm": 0.58203125, + "learning_rate": 4.201705357815748e-05, + "loss": 1.0118, + "step": 50695 + }, + { + "epoch": 0.73, + "grad_norm": 0.59765625, + "learning_rate": 4.1996657454063415e-05, + "loss": 1.0532, + "step": 50700 + }, + { + "epoch": 0.73, + "grad_norm": 0.50390625, + "learning_rate": 4.197626496568488e-05, + "loss": 0.9336, + "step": 50705 + }, + { + "epoch": 0.73, + "grad_norm": 0.484375, + "learning_rate": 4.195587611430014e-05, + "loss": 1.0174, + "step": 50710 + }, + { + "epoch": 0.73, + "grad_norm": 0.60546875, + "learning_rate": 4.193549090118727e-05, + "loss": 1.0614, + "step": 50715 + }, + { + "epoch": 0.73, + "grad_norm": 0.53515625, + "learning_rate": 4.191510932762388e-05, + "loss": 1.0202, + "step": 50720 + }, + { + "epoch": 0.73, + "grad_norm": 0.6796875, + "learning_rate": 4.189473139488759e-05, + "loss": 0.9998, + "step": 50725 + }, + { + "epoch": 0.73, + "grad_norm": 0.58203125, + "learning_rate": 4.187435710425576e-05, + "loss": 0.8776, + "step": 50730 + }, + { + "epoch": 0.73, + "grad_norm": 0.6328125, + "learning_rate": 4.1853986457005376e-05, + "loss": 0.9999, + "step": 50735 + }, + { + "epoch": 0.73, + "grad_norm": 0.470703125, + "learning_rate": 4.183361945441335e-05, + "loss": 0.8458, + "step": 50740 + }, + { + "epoch": 0.73, + "grad_norm": 0.52734375, + "learning_rate": 4.181325609775634e-05, + "loss": 0.7763, + "step": 50745 + }, + { + "epoch": 0.73, + "grad_norm": 0.66015625, + "learning_rate": 4.179289638831067e-05, + "loss": 1.0876, + "step": 50750 + }, + { + "epoch": 0.73, + "grad_norm": 0.625, + "learning_rate": 4.177254032735254e-05, + "loss": 1.1139, + "step": 50755 + }, + { + "epoch": 0.73, + "grad_norm": 0.84375, + "learning_rate": 4.1752187916157945e-05, + "loss": 1.0421, + "step": 50760 + }, + { + "epoch": 0.73, + "grad_norm": 1.234375, + "learning_rate": 4.173183915600251e-05, + "loss": 0.9571, + "step": 50765 + }, + { + "epoch": 0.73, + "grad_norm": 0.490234375, + "learning_rate": 4.171149404816179e-05, + "loss": 0.9549, + "step": 50770 + }, + { + "epoch": 0.73, + "grad_norm": 0.6953125, + "learning_rate": 4.1691152593910975e-05, + "loss": 0.9044, + "step": 50775 + }, + { + "epoch": 0.73, + "grad_norm": 0.5390625, + "learning_rate": 4.167081479452516e-05, + "loss": 1.0115, + "step": 50780 + }, + { + "epoch": 0.73, + "grad_norm": 0.54296875, + "learning_rate": 4.165048065127907e-05, + "loss": 1.009, + "step": 50785 + }, + { + "epoch": 0.73, + "grad_norm": 0.6328125, + "learning_rate": 4.16301501654473e-05, + "loss": 1.0415, + "step": 50790 + }, + { + "epoch": 0.73, + "grad_norm": 0.55078125, + "learning_rate": 4.1609823338304246e-05, + "loss": 0.9369, + "step": 50795 + }, + { + "epoch": 0.73, + "grad_norm": 0.5078125, + "learning_rate": 4.158950017112392e-05, + "loss": 0.8816, + "step": 50800 + }, + { + "epoch": 0.73, + "grad_norm": 0.55859375, + "learning_rate": 4.156918066518025e-05, + "loss": 0.9797, + "step": 50805 + }, + { + "epoch": 0.73, + "grad_norm": 0.4921875, + "learning_rate": 4.154886482174691e-05, + "loss": 0.8669, + "step": 50810 + }, + { + "epoch": 0.73, + "grad_norm": 0.546875, + "learning_rate": 4.152855264209727e-05, + "loss": 0.9375, + "step": 50815 + }, + { + "epoch": 0.73, + "grad_norm": 0.671875, + "learning_rate": 4.150824412750458e-05, + "loss": 1.0531, + "step": 50820 + }, + { + "epoch": 0.73, + "grad_norm": 0.5, + "learning_rate": 4.148793927924176e-05, + "loss": 0.998, + "step": 50825 + }, + { + "epoch": 0.73, + "grad_norm": 0.53515625, + "learning_rate": 4.146763809858151e-05, + "loss": 0.856, + "step": 50830 + }, + { + "epoch": 0.73, + "grad_norm": 0.58203125, + "learning_rate": 4.144734058679636e-05, + "loss": 0.9849, + "step": 50835 + }, + { + "epoch": 0.73, + "grad_norm": 0.50390625, + "learning_rate": 4.142704674515863e-05, + "loss": 0.9253, + "step": 50840 + }, + { + "epoch": 0.73, + "grad_norm": 0.55078125, + "learning_rate": 4.140675657494029e-05, + "loss": 0.9308, + "step": 50845 + }, + { + "epoch": 0.73, + "grad_norm": 0.49609375, + "learning_rate": 4.1386470077413166e-05, + "loss": 0.9967, + "step": 50850 + }, + { + "epoch": 0.73, + "grad_norm": 0.52734375, + "learning_rate": 4.13661872538489e-05, + "loss": 0.9413, + "step": 50855 + }, + { + "epoch": 0.73, + "grad_norm": 0.392578125, + "learning_rate": 4.134590810551875e-05, + "loss": 0.8395, + "step": 50860 + }, + { + "epoch": 0.73, + "grad_norm": 0.578125, + "learning_rate": 4.1325632633693886e-05, + "loss": 1.0183, + "step": 50865 + }, + { + "epoch": 0.73, + "grad_norm": 0.6875, + "learning_rate": 4.130536083964524e-05, + "loss": 1.0262, + "step": 50870 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 4.12850927246434e-05, + "loss": 1.027, + "step": 50875 + }, + { + "epoch": 0.73, + "grad_norm": 0.703125, + "learning_rate": 4.12648282899588e-05, + "loss": 1.0127, + "step": 50880 + }, + { + "epoch": 0.73, + "grad_norm": 0.50390625, + "learning_rate": 4.124456753686166e-05, + "loss": 0.9388, + "step": 50885 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 4.1224310466621965e-05, + "loss": 0.9285, + "step": 50890 + }, + { + "epoch": 0.73, + "grad_norm": 0.65625, + "learning_rate": 4.120405708050941e-05, + "loss": 1.0692, + "step": 50895 + }, + { + "epoch": 0.73, + "grad_norm": 0.55859375, + "learning_rate": 4.11838073797935e-05, + "loss": 0.9283, + "step": 50900 + }, + { + "epoch": 0.73, + "grad_norm": 0.486328125, + "learning_rate": 4.116356136574359e-05, + "loss": 0.8697, + "step": 50905 + }, + { + "epoch": 0.73, + "grad_norm": 0.490234375, + "learning_rate": 4.11433190396286e-05, + "loss": 1.0036, + "step": 50910 + }, + { + "epoch": 0.73, + "grad_norm": 0.47265625, + "learning_rate": 4.1123080402717415e-05, + "loss": 0.9792, + "step": 50915 + }, + { + "epoch": 0.73, + "grad_norm": 0.55078125, + "learning_rate": 4.110284545627865e-05, + "loss": 1.0078, + "step": 50920 + }, + { + "epoch": 0.73, + "grad_norm": 0.70703125, + "learning_rate": 4.1082614201580604e-05, + "loss": 1.0572, + "step": 50925 + }, + { + "epoch": 0.73, + "grad_norm": 0.37890625, + "learning_rate": 4.106238663989137e-05, + "loss": 0.7276, + "step": 50930 + }, + { + "epoch": 0.73, + "grad_norm": 0.55859375, + "learning_rate": 4.10421627724789e-05, + "loss": 0.9774, + "step": 50935 + }, + { + "epoch": 0.73, + "grad_norm": 0.6640625, + "learning_rate": 4.102194260061078e-05, + "loss": 1.0234, + "step": 50940 + }, + { + "epoch": 0.73, + "grad_norm": 0.5078125, + "learning_rate": 4.100172612555446e-05, + "loss": 0.873, + "step": 50945 + }, + { + "epoch": 0.73, + "grad_norm": 0.58984375, + "learning_rate": 4.098151334857718e-05, + "loss": 1.0226, + "step": 50950 + }, + { + "epoch": 0.73, + "grad_norm": 0.546875, + "learning_rate": 4.0961304270945824e-05, + "loss": 1.0075, + "step": 50955 + }, + { + "epoch": 0.73, + "grad_norm": 0.55859375, + "learning_rate": 4.094109889392715e-05, + "loss": 0.8681, + "step": 50960 + }, + { + "epoch": 0.73, + "grad_norm": 0.609375, + "learning_rate": 4.0920897218787704e-05, + "loss": 1.0173, + "step": 50965 + }, + { + "epoch": 0.73, + "grad_norm": 0.55859375, + "learning_rate": 4.090069924679367e-05, + "loss": 1.1367, + "step": 50970 + }, + { + "epoch": 0.73, + "grad_norm": 0.5859375, + "learning_rate": 4.088050497921111e-05, + "loss": 0.8676, + "step": 50975 + }, + { + "epoch": 0.73, + "grad_norm": 0.5234375, + "learning_rate": 4.086031441730587e-05, + "loss": 1.0029, + "step": 50980 + }, + { + "epoch": 0.73, + "grad_norm": 0.5390625, + "learning_rate": 4.0840127562343476e-05, + "loss": 0.8638, + "step": 50985 + }, + { + "epoch": 0.73, + "grad_norm": 0.609375, + "learning_rate": 4.081994441558923e-05, + "loss": 1.0133, + "step": 50990 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 4.0799764978308265e-05, + "loss": 0.9686, + "step": 50995 + }, + { + "epoch": 0.73, + "grad_norm": 0.6328125, + "learning_rate": 4.0779589251765495e-05, + "loss": 1.0507, + "step": 51000 + }, + { + "epoch": 0.73, + "grad_norm": 0.62890625, + "learning_rate": 4.075941723722547e-05, + "loss": 0.8381, + "step": 51005 + }, + { + "epoch": 0.73, + "grad_norm": 0.55078125, + "learning_rate": 4.0739248935952646e-05, + "loss": 0.9353, + "step": 51010 + }, + { + "epoch": 0.73, + "grad_norm": 0.53125, + "learning_rate": 4.071908434921123e-05, + "loss": 0.8137, + "step": 51015 + }, + { + "epoch": 0.73, + "grad_norm": 0.515625, + "learning_rate": 4.069892347826509e-05, + "loss": 0.9861, + "step": 51020 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 4.0678766324377957e-05, + "loss": 0.8659, + "step": 51025 + }, + { + "epoch": 0.73, + "grad_norm": 0.51171875, + "learning_rate": 4.065861288881335e-05, + "loss": 0.9603, + "step": 51030 + }, + { + "epoch": 0.73, + "grad_norm": 0.5625, + "learning_rate": 4.0638463172834484e-05, + "loss": 0.9187, + "step": 51035 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 4.0618317177704304e-05, + "loss": 0.9147, + "step": 51040 + }, + { + "epoch": 0.73, + "grad_norm": 0.66796875, + "learning_rate": 4.059817490468564e-05, + "loss": 1.0506, + "step": 51045 + }, + { + "epoch": 0.73, + "grad_norm": 0.55859375, + "learning_rate": 4.057803635504105e-05, + "loss": 0.9171, + "step": 51050 + }, + { + "epoch": 0.73, + "grad_norm": 0.5859375, + "learning_rate": 4.0557901530032794e-05, + "loss": 1.1271, + "step": 51055 + }, + { + "epoch": 0.73, + "grad_norm": 0.5625, + "learning_rate": 4.053777043092296e-05, + "loss": 0.9112, + "step": 51060 + }, + { + "epoch": 0.73, + "grad_norm": 0.58203125, + "learning_rate": 4.051764305897344e-05, + "loss": 0.9855, + "step": 51065 + }, + { + "epoch": 0.73, + "grad_norm": 0.546875, + "learning_rate": 4.049751941544576e-05, + "loss": 0.8994, + "step": 51070 + }, + { + "epoch": 0.73, + "grad_norm": 0.58203125, + "learning_rate": 4.047739950160137e-05, + "loss": 0.9347, + "step": 51075 + }, + { + "epoch": 0.73, + "grad_norm": 0.55078125, + "learning_rate": 4.045728331870132e-05, + "loss": 0.9463, + "step": 51080 + }, + { + "epoch": 0.73, + "grad_norm": 0.494140625, + "learning_rate": 4.043717086800661e-05, + "loss": 0.8492, + "step": 51085 + }, + { + "epoch": 0.73, + "grad_norm": 0.5703125, + "learning_rate": 4.041706215077784e-05, + "loss": 1.1038, + "step": 51090 + }, + { + "epoch": 0.73, + "grad_norm": 0.5234375, + "learning_rate": 4.03969571682755e-05, + "loss": 0.8422, + "step": 51095 + }, + { + "epoch": 0.73, + "grad_norm": 0.6328125, + "learning_rate": 4.0376855921759725e-05, + "loss": 1.0364, + "step": 51100 + }, + { + "epoch": 0.73, + "grad_norm": 0.7890625, + "learning_rate": 4.0356758412490535e-05, + "loss": 0.9729, + "step": 51105 + }, + { + "epoch": 0.73, + "grad_norm": 0.515625, + "learning_rate": 4.0336664641727697e-05, + "loss": 1.0945, + "step": 51110 + }, + { + "epoch": 0.73, + "grad_norm": 0.55859375, + "learning_rate": 4.031657461073063e-05, + "loss": 0.9299, + "step": 51115 + }, + { + "epoch": 0.73, + "grad_norm": 0.56640625, + "learning_rate": 4.029648832075863e-05, + "loss": 0.9332, + "step": 51120 + }, + { + "epoch": 0.73, + "grad_norm": 0.55859375, + "learning_rate": 4.027640577307078e-05, + "loss": 1.052, + "step": 51125 + }, + { + "epoch": 0.73, + "grad_norm": 0.55078125, + "learning_rate": 4.025632696892581e-05, + "loss": 1.0119, + "step": 51130 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 4.0236251909582325e-05, + "loss": 0.9903, + "step": 51135 + }, + { + "epoch": 0.73, + "grad_norm": 0.578125, + "learning_rate": 4.021618059629861e-05, + "loss": 1.0225, + "step": 51140 + }, + { + "epoch": 0.73, + "grad_norm": 0.474609375, + "learning_rate": 4.0196113030332814e-05, + "loss": 1.0148, + "step": 51145 + }, + { + "epoch": 0.73, + "grad_norm": 0.5546875, + "learning_rate": 4.017604921294273e-05, + "loss": 0.9159, + "step": 51150 + }, + { + "epoch": 0.73, + "grad_norm": 0.5546875, + "learning_rate": 4.015598914538603e-05, + "loss": 0.9721, + "step": 51155 + }, + { + "epoch": 0.73, + "grad_norm": 0.515625, + "learning_rate": 4.013593282892011e-05, + "loss": 0.9497, + "step": 51160 + }, + { + "epoch": 0.73, + "grad_norm": 0.5546875, + "learning_rate": 4.011588026480206e-05, + "loss": 0.9915, + "step": 51165 + }, + { + "epoch": 0.73, + "grad_norm": 0.53515625, + "learning_rate": 4.009583145428884e-05, + "loss": 0.7746, + "step": 51170 + }, + { + "epoch": 0.73, + "grad_norm": 0.55078125, + "learning_rate": 4.007578639863717e-05, + "loss": 0.8888, + "step": 51175 + }, + { + "epoch": 0.73, + "grad_norm": 0.54296875, + "learning_rate": 4.005574509910342e-05, + "loss": 0.9815, + "step": 51180 + }, + { + "epoch": 0.73, + "grad_norm": 0.6796875, + "learning_rate": 4.0035707556943834e-05, + "loss": 0.9475, + "step": 51185 + }, + { + "epoch": 0.73, + "grad_norm": 0.56640625, + "learning_rate": 4.0015673773414464e-05, + "loss": 0.8932, + "step": 51190 + }, + { + "epoch": 0.73, + "grad_norm": 0.60546875, + "learning_rate": 3.99956437497709e-05, + "loss": 0.8803, + "step": 51195 + }, + { + "epoch": 0.73, + "grad_norm": 0.490234375, + "learning_rate": 3.9975617487268744e-05, + "loss": 0.7229, + "step": 51200 + }, + { + "epoch": 0.73, + "grad_norm": 0.46484375, + "learning_rate": 3.995559498716327e-05, + "loss": 0.9048, + "step": 51205 + }, + { + "epoch": 0.73, + "grad_norm": 0.51953125, + "learning_rate": 3.993557625070945e-05, + "loss": 0.8673, + "step": 51210 + }, + { + "epoch": 0.73, + "grad_norm": 0.58203125, + "learning_rate": 3.9915561279162125e-05, + "loss": 0.9174, + "step": 51215 + }, + { + "epoch": 0.73, + "grad_norm": 0.52734375, + "learning_rate": 3.989555007377588e-05, + "loss": 0.902, + "step": 51220 + }, + { + "epoch": 0.73, + "grad_norm": 0.4765625, + "learning_rate": 3.9875542635804976e-05, + "loss": 0.9872, + "step": 51225 + }, + { + "epoch": 0.73, + "grad_norm": 0.57421875, + "learning_rate": 3.985553896650354e-05, + "loss": 0.9109, + "step": 51230 + }, + { + "epoch": 0.73, + "grad_norm": 0.5078125, + "learning_rate": 3.983553906712544e-05, + "loss": 1.0102, + "step": 51235 + }, + { + "epoch": 0.74, + "grad_norm": 0.494140625, + "learning_rate": 3.9815542938924286e-05, + "loss": 0.8146, + "step": 51240 + }, + { + "epoch": 0.74, + "grad_norm": 0.5625, + "learning_rate": 3.9795550583153404e-05, + "loss": 0.9904, + "step": 51245 + }, + { + "epoch": 0.74, + "grad_norm": 0.58203125, + "learning_rate": 3.977556200106598e-05, + "loss": 0.9373, + "step": 51250 + }, + { + "epoch": 0.74, + "grad_norm": 0.51171875, + "learning_rate": 3.975557719391496e-05, + "loss": 0.9666, + "step": 51255 + }, + { + "epoch": 0.74, + "grad_norm": 0.609375, + "learning_rate": 3.973559616295294e-05, + "loss": 1.1397, + "step": 51260 + }, + { + "epoch": 0.74, + "grad_norm": 0.58203125, + "learning_rate": 3.971561890943237e-05, + "loss": 1.0834, + "step": 51265 + }, + { + "epoch": 0.74, + "grad_norm": 0.55859375, + "learning_rate": 3.969564543460552e-05, + "loss": 0.9221, + "step": 51270 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.967567573972425e-05, + "loss": 1.0058, + "step": 51275 + }, + { + "epoch": 0.74, + "grad_norm": 0.5859375, + "learning_rate": 3.965570982604033e-05, + "loss": 0.9367, + "step": 51280 + }, + { + "epoch": 0.74, + "grad_norm": 0.58984375, + "learning_rate": 3.963574769480528e-05, + "loss": 1.0296, + "step": 51285 + }, + { + "epoch": 0.74, + "grad_norm": 0.53515625, + "learning_rate": 3.9615789347270285e-05, + "loss": 0.8648, + "step": 51290 + }, + { + "epoch": 0.74, + "grad_norm": 0.57421875, + "learning_rate": 3.9595834784686414e-05, + "loss": 1.0255, + "step": 51295 + }, + { + "epoch": 0.74, + "grad_norm": 0.53125, + "learning_rate": 3.957588400830441e-05, + "loss": 0.9507, + "step": 51300 + }, + { + "epoch": 0.74, + "grad_norm": 0.54296875, + "learning_rate": 3.955593701937479e-05, + "loss": 1.0923, + "step": 51305 + }, + { + "epoch": 0.74, + "grad_norm": 0.58984375, + "learning_rate": 3.953599381914787e-05, + "loss": 1.0396, + "step": 51310 + }, + { + "epoch": 0.74, + "grad_norm": 1.09375, + "learning_rate": 3.951605440887375e-05, + "loss": 0.9572, + "step": 51315 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.9496118789802196e-05, + "loss": 0.9524, + "step": 51320 + }, + { + "epoch": 0.74, + "grad_norm": 0.53125, + "learning_rate": 3.947618696318282e-05, + "loss": 0.8093, + "step": 51325 + }, + { + "epoch": 0.74, + "grad_norm": 0.53515625, + "learning_rate": 3.945625893026502e-05, + "loss": 0.862, + "step": 51330 + }, + { + "epoch": 0.74, + "grad_norm": 0.5234375, + "learning_rate": 3.943633469229783e-05, + "loss": 0.8987, + "step": 51335 + }, + { + "epoch": 0.74, + "grad_norm": 0.54296875, + "learning_rate": 3.941641425053014e-05, + "loss": 1.0828, + "step": 51340 + }, + { + "epoch": 0.74, + "grad_norm": 0.5703125, + "learning_rate": 3.939649760621066e-05, + "loss": 0.9208, + "step": 51345 + }, + { + "epoch": 0.74, + "grad_norm": 0.59765625, + "learning_rate": 3.937658476058772e-05, + "loss": 0.8918, + "step": 51350 + }, + { + "epoch": 0.74, + "grad_norm": 0.6953125, + "learning_rate": 3.9356675714909455e-05, + "loss": 1.0362, + "step": 51355 + }, + { + "epoch": 0.74, + "grad_norm": 0.53125, + "learning_rate": 3.933677047042382e-05, + "loss": 0.9233, + "step": 51360 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.931686902837854e-05, + "loss": 1.0053, + "step": 51365 + }, + { + "epoch": 0.74, + "grad_norm": 0.56640625, + "learning_rate": 3.9296971390021e-05, + "loss": 0.9761, + "step": 51370 + }, + { + "epoch": 0.74, + "grad_norm": 0.53515625, + "learning_rate": 3.9277077556598415e-05, + "loss": 0.8039, + "step": 51375 + }, + { + "epoch": 0.74, + "grad_norm": 0.55859375, + "learning_rate": 3.9257187529357806e-05, + "loss": 0.9285, + "step": 51380 + }, + { + "epoch": 0.74, + "grad_norm": 0.50390625, + "learning_rate": 3.9237301309545826e-05, + "loss": 0.8729, + "step": 51385 + }, + { + "epoch": 0.74, + "grad_norm": 0.51171875, + "learning_rate": 3.9217418898408996e-05, + "loss": 0.8877, + "step": 51390 + }, + { + "epoch": 0.74, + "grad_norm": 0.458984375, + "learning_rate": 3.919754029719363e-05, + "loss": 0.7871, + "step": 51395 + }, + { + "epoch": 0.74, + "grad_norm": 0.48828125, + "learning_rate": 3.917766550714567e-05, + "loss": 0.8088, + "step": 51400 + }, + { + "epoch": 0.74, + "grad_norm": 0.56640625, + "learning_rate": 3.915779452951087e-05, + "loss": 0.9516, + "step": 51405 + }, + { + "epoch": 0.74, + "grad_norm": 0.498046875, + "learning_rate": 3.913792736553484e-05, + "loss": 0.8929, + "step": 51410 + }, + { + "epoch": 0.74, + "grad_norm": 0.57421875, + "learning_rate": 3.9118064016462806e-05, + "loss": 0.8689, + "step": 51415 + }, + { + "epoch": 0.74, + "grad_norm": 0.53515625, + "learning_rate": 3.909820448353986e-05, + "loss": 0.8161, + "step": 51420 + }, + { + "epoch": 0.74, + "grad_norm": 0.5625, + "learning_rate": 3.907834876801085e-05, + "loss": 0.9621, + "step": 51425 + }, + { + "epoch": 0.74, + "grad_norm": 0.60546875, + "learning_rate": 3.9058496871120295e-05, + "loss": 0.9807, + "step": 51430 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.903864879411255e-05, + "loss": 0.9334, + "step": 51435 + }, + { + "epoch": 0.74, + "grad_norm": 0.5859375, + "learning_rate": 3.9018804538231776e-05, + "loss": 0.9602, + "step": 51440 + }, + { + "epoch": 0.74, + "grad_norm": 0.50390625, + "learning_rate": 3.8998964104721745e-05, + "loss": 0.8113, + "step": 51445 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.897912749482615e-05, + "loss": 1.0266, + "step": 51450 + }, + { + "epoch": 0.74, + "grad_norm": 0.53515625, + "learning_rate": 3.895929470978831e-05, + "loss": 1.0061, + "step": 51455 + }, + { + "epoch": 0.74, + "grad_norm": 0.6328125, + "learning_rate": 3.8939465750851434e-05, + "loss": 0.9953, + "step": 51460 + }, + { + "epoch": 0.74, + "grad_norm": 0.53515625, + "learning_rate": 3.891964061925835e-05, + "loss": 1.0519, + "step": 51465 + }, + { + "epoch": 0.74, + "grad_norm": 0.3984375, + "learning_rate": 3.8899819316251753e-05, + "loss": 0.7409, + "step": 51470 + }, + { + "epoch": 0.74, + "grad_norm": 0.58984375, + "learning_rate": 3.888000184307411e-05, + "loss": 0.9194, + "step": 51475 + }, + { + "epoch": 0.74, + "grad_norm": 0.55078125, + "learning_rate": 3.8860188200967516e-05, + "loss": 1.035, + "step": 51480 + }, + { + "epoch": 0.74, + "grad_norm": 0.51953125, + "learning_rate": 3.884037839117396e-05, + "loss": 1.0426, + "step": 51485 + }, + { + "epoch": 0.74, + "grad_norm": 0.5078125, + "learning_rate": 3.8820572414935185e-05, + "loss": 1.03, + "step": 51490 + }, + { + "epoch": 0.74, + "grad_norm": 0.54296875, + "learning_rate": 3.880077027349257e-05, + "loss": 0.9797, + "step": 51495 + }, + { + "epoch": 0.74, + "grad_norm": 0.55859375, + "learning_rate": 3.878097196808737e-05, + "loss": 0.8733, + "step": 51500 + }, + { + "epoch": 0.74, + "grad_norm": 0.57421875, + "learning_rate": 3.876117749996064e-05, + "loss": 0.911, + "step": 51505 + }, + { + "epoch": 0.74, + "grad_norm": 0.462890625, + "learning_rate": 3.8741386870352994e-05, + "loss": 1.0591, + "step": 51510 + }, + { + "epoch": 0.74, + "grad_norm": 0.416015625, + "learning_rate": 3.872160008050497e-05, + "loss": 0.8526, + "step": 51515 + }, + { + "epoch": 0.74, + "grad_norm": 0.6015625, + "learning_rate": 3.870181713165688e-05, + "loss": 0.9296, + "step": 51520 + }, + { + "epoch": 0.74, + "grad_norm": 0.486328125, + "learning_rate": 3.868203802504867e-05, + "loss": 0.8969, + "step": 51525 + }, + { + "epoch": 0.74, + "grad_norm": 0.54296875, + "learning_rate": 3.866226276192016e-05, + "loss": 0.9046, + "step": 51530 + }, + { + "epoch": 0.74, + "grad_norm": 0.58203125, + "learning_rate": 3.864249134351091e-05, + "loss": 0.8912, + "step": 51535 + }, + { + "epoch": 0.74, + "grad_norm": 0.54296875, + "learning_rate": 3.8622723771060145e-05, + "loss": 1.1907, + "step": 51540 + }, + { + "epoch": 0.74, + "grad_norm": 0.609375, + "learning_rate": 3.860296004580696e-05, + "loss": 0.9601, + "step": 51545 + }, + { + "epoch": 0.74, + "grad_norm": 0.5703125, + "learning_rate": 3.8583200168990195e-05, + "loss": 0.9729, + "step": 51550 + }, + { + "epoch": 0.74, + "grad_norm": 0.54296875, + "learning_rate": 3.856344414184839e-05, + "loss": 0.9276, + "step": 51555 + }, + { + "epoch": 0.74, + "grad_norm": 0.56640625, + "learning_rate": 3.854369196561984e-05, + "loss": 1.0324, + "step": 51560 + }, + { + "epoch": 0.74, + "grad_norm": 0.51171875, + "learning_rate": 3.852394364154268e-05, + "loss": 0.8486, + "step": 51565 + }, + { + "epoch": 0.74, + "grad_norm": 0.59375, + "learning_rate": 3.850419917085478e-05, + "loss": 1.0582, + "step": 51570 + }, + { + "epoch": 0.74, + "grad_norm": 0.54296875, + "learning_rate": 3.848445855479368e-05, + "loss": 0.9645, + "step": 51575 + }, + { + "epoch": 0.74, + "grad_norm": 0.5625, + "learning_rate": 3.846472179459678e-05, + "loss": 0.8136, + "step": 51580 + }, + { + "epoch": 0.74, + "grad_norm": 0.66015625, + "learning_rate": 3.8444988891501224e-05, + "loss": 1.1109, + "step": 51585 + }, + { + "epoch": 0.74, + "grad_norm": 0.578125, + "learning_rate": 3.8425259846743845e-05, + "loss": 0.9099, + "step": 51590 + }, + { + "epoch": 0.74, + "grad_norm": 0.62109375, + "learning_rate": 3.8405534661561304e-05, + "loss": 1.062, + "step": 51595 + }, + { + "epoch": 0.74, + "grad_norm": 0.5625, + "learning_rate": 3.838581333719004e-05, + "loss": 0.9472, + "step": 51600 + }, + { + "epoch": 0.74, + "grad_norm": 0.58984375, + "learning_rate": 3.836609587486612e-05, + "loss": 0.9514, + "step": 51605 + }, + { + "epoch": 0.74, + "grad_norm": 0.490234375, + "learning_rate": 3.834638227582555e-05, + "loss": 1.0129, + "step": 51610 + }, + { + "epoch": 0.74, + "grad_norm": 0.53125, + "learning_rate": 3.832667254130396e-05, + "loss": 0.9279, + "step": 51615 + }, + { + "epoch": 0.74, + "grad_norm": 0.65234375, + "learning_rate": 3.830696667253674e-05, + "loss": 0.9951, + "step": 51620 + }, + { + "epoch": 0.74, + "grad_norm": 0.6953125, + "learning_rate": 3.8287264670759106e-05, + "loss": 0.9776, + "step": 51625 + }, + { + "epoch": 0.74, + "grad_norm": 0.53515625, + "learning_rate": 3.826756653720605e-05, + "loss": 0.9665, + "step": 51630 + }, + { + "epoch": 0.74, + "grad_norm": 0.55859375, + "learning_rate": 3.824787227311218e-05, + "loss": 0.9334, + "step": 51635 + }, + { + "epoch": 0.74, + "grad_norm": 0.6328125, + "learning_rate": 3.822818187971201e-05, + "loss": 1.0076, + "step": 51640 + }, + { + "epoch": 0.74, + "grad_norm": 0.5234375, + "learning_rate": 3.8208495358239796e-05, + "loss": 0.8727, + "step": 51645 + }, + { + "epoch": 0.74, + "grad_norm": 0.486328125, + "learning_rate": 3.818881270992943e-05, + "loss": 0.9704, + "step": 51650 + }, + { + "epoch": 0.74, + "grad_norm": 0.51171875, + "learning_rate": 3.816913393601468e-05, + "loss": 0.9842, + "step": 51655 + }, + { + "epoch": 0.74, + "grad_norm": 0.58203125, + "learning_rate": 3.8149459037729076e-05, + "loss": 0.9255, + "step": 51660 + }, + { + "epoch": 0.74, + "grad_norm": 0.5546875, + "learning_rate": 3.8129788016305814e-05, + "loss": 1.0225, + "step": 51665 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.811012087297786e-05, + "loss": 0.9501, + "step": 51670 + }, + { + "epoch": 0.74, + "grad_norm": 0.57421875, + "learning_rate": 3.809045760897802e-05, + "loss": 1.0628, + "step": 51675 + }, + { + "epoch": 0.74, + "grad_norm": 0.58984375, + "learning_rate": 3.807079822553885e-05, + "loss": 1.0562, + "step": 51680 + }, + { + "epoch": 0.74, + "grad_norm": 0.5703125, + "learning_rate": 3.805114272389254e-05, + "loss": 0.9083, + "step": 51685 + }, + { + "epoch": 0.74, + "grad_norm": 0.56640625, + "learning_rate": 3.803149110527116e-05, + "loss": 0.9235, + "step": 51690 + }, + { + "epoch": 0.74, + "grad_norm": 0.62890625, + "learning_rate": 3.801184337090653e-05, + "loss": 1.0131, + "step": 51695 + }, + { + "epoch": 0.74, + "grad_norm": 0.65625, + "learning_rate": 3.7992199522030115e-05, + "loss": 1.0319, + "step": 51700 + }, + { + "epoch": 0.74, + "grad_norm": 0.5703125, + "learning_rate": 3.797255955987326e-05, + "loss": 1.0243, + "step": 51705 + }, + { + "epoch": 0.74, + "grad_norm": 0.578125, + "learning_rate": 3.7952923485667045e-05, + "loss": 0.885, + "step": 51710 + }, + { + "epoch": 0.74, + "grad_norm": 0.50390625, + "learning_rate": 3.793329130064225e-05, + "loss": 0.9075, + "step": 51715 + }, + { + "epoch": 0.74, + "grad_norm": 0.5078125, + "learning_rate": 3.791366300602941e-05, + "loss": 1.0565, + "step": 51720 + }, + { + "epoch": 0.74, + "grad_norm": 0.5859375, + "learning_rate": 3.789403860305889e-05, + "loss": 0.8994, + "step": 51725 + }, + { + "epoch": 0.74, + "grad_norm": 0.578125, + "learning_rate": 3.7874418092960796e-05, + "loss": 0.9463, + "step": 51730 + }, + { + "epoch": 0.74, + "grad_norm": 0.52734375, + "learning_rate": 3.7854801476964895e-05, + "loss": 0.9532, + "step": 51735 + }, + { + "epoch": 0.74, + "grad_norm": 0.47265625, + "learning_rate": 3.783518875630081e-05, + "loss": 0.8684, + "step": 51740 + }, + { + "epoch": 0.74, + "grad_norm": 0.56640625, + "learning_rate": 3.781557993219794e-05, + "loss": 0.904, + "step": 51745 + }, + { + "epoch": 0.74, + "grad_norm": 0.51953125, + "learning_rate": 3.77959750058853e-05, + "loss": 0.8483, + "step": 51750 + }, + { + "epoch": 0.74, + "grad_norm": 0.5859375, + "learning_rate": 3.777637397859183e-05, + "loss": 1.0518, + "step": 51755 + }, + { + "epoch": 0.74, + "grad_norm": 0.57421875, + "learning_rate": 3.775677685154606e-05, + "loss": 0.8509, + "step": 51760 + }, + { + "epoch": 0.74, + "grad_norm": 0.61328125, + "learning_rate": 3.7737183625976446e-05, + "loss": 0.9962, + "step": 51765 + }, + { + "epoch": 0.74, + "grad_norm": 0.466796875, + "learning_rate": 3.771759430311105e-05, + "loss": 0.7977, + "step": 51770 + }, + { + "epoch": 0.74, + "grad_norm": 0.58203125, + "learning_rate": 3.7698008884177794e-05, + "loss": 0.9294, + "step": 51775 + }, + { + "epoch": 0.74, + "grad_norm": 0.578125, + "learning_rate": 3.767842737040427e-05, + "loss": 1.0377, + "step": 51780 + }, + { + "epoch": 0.74, + "grad_norm": 0.69140625, + "learning_rate": 3.76588497630179e-05, + "loss": 1.0127, + "step": 51785 + }, + { + "epoch": 0.74, + "grad_norm": 0.6328125, + "learning_rate": 3.7639276063245855e-05, + "loss": 1.0065, + "step": 51790 + }, + { + "epoch": 0.74, + "grad_norm": 0.57421875, + "learning_rate": 3.761970627231498e-05, + "loss": 0.8589, + "step": 51795 + }, + { + "epoch": 0.74, + "grad_norm": 0.55859375, + "learning_rate": 3.760014039145197e-05, + "loss": 0.8028, + "step": 51800 + }, + { + "epoch": 0.74, + "grad_norm": 0.56640625, + "learning_rate": 3.758057842188325e-05, + "loss": 0.8983, + "step": 51805 + }, + { + "epoch": 0.74, + "grad_norm": 0.52734375, + "learning_rate": 3.756102036483493e-05, + "loss": 0.9926, + "step": 51810 + }, + { + "epoch": 0.74, + "grad_norm": 0.55859375, + "learning_rate": 3.754146622153296e-05, + "loss": 0.9784, + "step": 51815 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.752191599320307e-05, + "loss": 0.9877, + "step": 51820 + }, + { + "epoch": 0.74, + "grad_norm": 0.64453125, + "learning_rate": 3.7502369681070635e-05, + "loss": 1.0543, + "step": 51825 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.748282728636081e-05, + "loss": 0.9123, + "step": 51830 + }, + { + "epoch": 0.74, + "grad_norm": 0.57421875, + "learning_rate": 3.746328881029858e-05, + "loss": 0.8175, + "step": 51835 + }, + { + "epoch": 0.74, + "grad_norm": 0.51953125, + "learning_rate": 3.744375425410867e-05, + "loss": 0.92, + "step": 51840 + }, + { + "epoch": 0.74, + "grad_norm": 0.5859375, + "learning_rate": 3.742422361901544e-05, + "loss": 0.9807, + "step": 51845 + }, + { + "epoch": 0.74, + "grad_norm": 0.59765625, + "learning_rate": 3.740469690624314e-05, + "loss": 0.9612, + "step": 51850 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.738517411701578e-05, + "loss": 0.9129, + "step": 51855 + }, + { + "epoch": 0.74, + "grad_norm": 0.515625, + "learning_rate": 3.7365655252556965e-05, + "loss": 0.8131, + "step": 51860 + }, + { + "epoch": 0.74, + "grad_norm": 0.58984375, + "learning_rate": 3.7346140314090226e-05, + "loss": 0.9964, + "step": 51865 + }, + { + "epoch": 0.74, + "grad_norm": 0.59765625, + "learning_rate": 3.732662930283883e-05, + "loss": 1.0277, + "step": 51870 + }, + { + "epoch": 0.74, + "grad_norm": 0.55859375, + "learning_rate": 3.7307122220025625e-05, + "loss": 0.9241, + "step": 51875 + }, + { + "epoch": 0.74, + "grad_norm": 0.7578125, + "learning_rate": 3.728761906687339e-05, + "loss": 0.9222, + "step": 51880 + }, + { + "epoch": 0.74, + "grad_norm": 0.5546875, + "learning_rate": 3.726811984460467e-05, + "loss": 1.0535, + "step": 51885 + }, + { + "epoch": 0.74, + "grad_norm": 0.609375, + "learning_rate": 3.724862455444159e-05, + "loss": 1.073, + "step": 51890 + }, + { + "epoch": 0.74, + "grad_norm": 0.63671875, + "learning_rate": 3.72291331976062e-05, + "loss": 0.9135, + "step": 51895 + }, + { + "epoch": 0.74, + "grad_norm": 0.578125, + "learning_rate": 3.7209645775320265e-05, + "loss": 0.959, + "step": 51900 + }, + { + "epoch": 0.74, + "grad_norm": 0.546875, + "learning_rate": 3.7190162288805205e-05, + "loss": 0.9446, + "step": 51905 + }, + { + "epoch": 0.74, + "grad_norm": 0.765625, + "learning_rate": 3.717068273928232e-05, + "loss": 1.0108, + "step": 51910 + }, + { + "epoch": 0.74, + "grad_norm": 0.498046875, + "learning_rate": 3.715120712797262e-05, + "loss": 0.8893, + "step": 51915 + }, + { + "epoch": 0.74, + "grad_norm": 0.58984375, + "learning_rate": 3.713173545609681e-05, + "loss": 1.1068, + "step": 51920 + }, + { + "epoch": 0.74, + "grad_norm": 0.5546875, + "learning_rate": 3.711226772487545e-05, + "loss": 0.7963, + "step": 51925 + }, + { + "epoch": 0.74, + "grad_norm": 0.62109375, + "learning_rate": 3.7092803935528734e-05, + "loss": 0.9644, + "step": 51930 + }, + { + "epoch": 0.74, + "grad_norm": 0.5625, + "learning_rate": 3.7073344089276754e-05, + "loss": 0.9825, + "step": 51935 + }, + { + "epoch": 0.75, + "grad_norm": 0.55859375, + "learning_rate": 3.705388818733919e-05, + "loss": 0.9845, + "step": 51940 + }, + { + "epoch": 0.75, + "grad_norm": 0.61328125, + "learning_rate": 3.703443623093562e-05, + "loss": 0.9744, + "step": 51945 + }, + { + "epoch": 0.75, + "grad_norm": 0.5859375, + "learning_rate": 3.7014988221285315e-05, + "loss": 0.8523, + "step": 51950 + }, + { + "epoch": 0.75, + "grad_norm": 0.5546875, + "learning_rate": 3.699554415960727e-05, + "loss": 0.8706, + "step": 51955 + }, + { + "epoch": 0.75, + "grad_norm": 0.5625, + "learning_rate": 3.6976104047120264e-05, + "loss": 1.1154, + "step": 51960 + }, + { + "epoch": 0.75, + "grad_norm": 0.58203125, + "learning_rate": 3.695666788504286e-05, + "loss": 0.9287, + "step": 51965 + }, + { + "epoch": 0.75, + "grad_norm": 0.55859375, + "learning_rate": 3.693723567459329e-05, + "loss": 0.8874, + "step": 51970 + }, + { + "epoch": 0.75, + "grad_norm": 0.48046875, + "learning_rate": 3.691780741698964e-05, + "loss": 0.9014, + "step": 51975 + }, + { + "epoch": 0.75, + "grad_norm": 0.5078125, + "learning_rate": 3.689838311344966e-05, + "loss": 1.0094, + "step": 51980 + }, + { + "epoch": 0.75, + "grad_norm": 0.58203125, + "learning_rate": 3.687896276519086e-05, + "loss": 1.0438, + "step": 51985 + }, + { + "epoch": 0.75, + "grad_norm": 0.5625, + "learning_rate": 3.6859546373430576e-05, + "loss": 0.8317, + "step": 51990 + }, + { + "epoch": 0.75, + "grad_norm": 0.58203125, + "learning_rate": 3.6840133939385854e-05, + "loss": 0.9784, + "step": 51995 + }, + { + "epoch": 0.75, + "grad_norm": 0.5078125, + "learning_rate": 3.682072546427344e-05, + "loss": 1.0079, + "step": 52000 + }, + { + "epoch": 0.75, + "grad_norm": 0.50390625, + "learning_rate": 3.680132094930992e-05, + "loss": 0.9256, + "step": 52005 + }, + { + "epoch": 0.75, + "grad_norm": 0.494140625, + "learning_rate": 3.678192039571161e-05, + "loss": 0.8137, + "step": 52010 + }, + { + "epoch": 0.75, + "grad_norm": 0.55078125, + "learning_rate": 3.676252380469448e-05, + "loss": 0.9156, + "step": 52015 + }, + { + "epoch": 0.75, + "grad_norm": 0.61328125, + "learning_rate": 3.67431311774744e-05, + "loss": 0.9449, + "step": 52020 + }, + { + "epoch": 0.75, + "grad_norm": 0.6015625, + "learning_rate": 3.6723742515266924e-05, + "loss": 1.0522, + "step": 52025 + }, + { + "epoch": 0.75, + "grad_norm": 0.6015625, + "learning_rate": 3.6704357819287336e-05, + "loss": 0.887, + "step": 52030 + }, + { + "epoch": 0.75, + "grad_norm": 0.5625, + "learning_rate": 3.668497709075065e-05, + "loss": 0.879, + "step": 52035 + }, + { + "epoch": 0.75, + "grad_norm": 0.73046875, + "learning_rate": 3.666560033087172e-05, + "loss": 1.0117, + "step": 52040 + }, + { + "epoch": 0.75, + "grad_norm": 0.66015625, + "learning_rate": 3.6646227540865105e-05, + "loss": 0.9591, + "step": 52045 + }, + { + "epoch": 0.75, + "grad_norm": 0.546875, + "learning_rate": 3.662685872194509e-05, + "loss": 0.9551, + "step": 52050 + }, + { + "epoch": 0.75, + "grad_norm": 0.57421875, + "learning_rate": 3.660749387532574e-05, + "loss": 0.8861, + "step": 52055 + }, + { + "epoch": 0.75, + "grad_norm": 0.546875, + "learning_rate": 3.658813300222091e-05, + "loss": 1.0416, + "step": 52060 + }, + { + "epoch": 0.75, + "grad_norm": 0.50390625, + "learning_rate": 3.656877610384407e-05, + "loss": 0.9706, + "step": 52065 + }, + { + "epoch": 0.75, + "grad_norm": 0.58984375, + "learning_rate": 3.65494231814086e-05, + "loss": 1.0754, + "step": 52070 + }, + { + "epoch": 0.75, + "grad_norm": 0.5390625, + "learning_rate": 3.6530074236127585e-05, + "loss": 0.869, + "step": 52075 + }, + { + "epoch": 0.75, + "grad_norm": 0.578125, + "learning_rate": 3.6510729269213805e-05, + "loss": 0.9369, + "step": 52080 + }, + { + "epoch": 0.75, + "grad_norm": 0.58984375, + "learning_rate": 3.649138828187978e-05, + "loss": 1.013, + "step": 52085 + }, + { + "epoch": 0.75, + "grad_norm": 0.58203125, + "learning_rate": 3.6472051275337904e-05, + "loss": 1.021, + "step": 52090 + }, + { + "epoch": 0.75, + "grad_norm": 0.61328125, + "learning_rate": 3.6452718250800174e-05, + "loss": 0.9296, + "step": 52095 + }, + { + "epoch": 0.75, + "grad_norm": 0.68359375, + "learning_rate": 3.643338920947844e-05, + "loss": 1.0464, + "step": 52100 + }, + { + "epoch": 0.75, + "grad_norm": 0.58984375, + "learning_rate": 3.64140641525843e-05, + "loss": 0.9818, + "step": 52105 + }, + { + "epoch": 0.75, + "grad_norm": 0.78125, + "learning_rate": 3.639474308132901e-05, + "loss": 1.1438, + "step": 52110 + }, + { + "epoch": 0.75, + "grad_norm": 0.51953125, + "learning_rate": 3.637542599692365e-05, + "loss": 0.9877, + "step": 52115 + }, + { + "epoch": 0.75, + "grad_norm": 0.61328125, + "learning_rate": 3.63561129005791e-05, + "loss": 0.9625, + "step": 52120 + }, + { + "epoch": 0.75, + "grad_norm": 0.5, + "learning_rate": 3.633680379350583e-05, + "loss": 0.8335, + "step": 52125 + }, + { + "epoch": 0.75, + "grad_norm": 0.5859375, + "learning_rate": 3.631749867691421e-05, + "loss": 1.027, + "step": 52130 + }, + { + "epoch": 0.75, + "grad_norm": 0.5859375, + "learning_rate": 3.6298197552014336e-05, + "loss": 0.8022, + "step": 52135 + }, + { + "epoch": 0.75, + "grad_norm": 0.5390625, + "learning_rate": 3.627890042001598e-05, + "loss": 1.0426, + "step": 52140 + }, + { + "epoch": 0.75, + "grad_norm": 0.60546875, + "learning_rate": 3.625960728212869e-05, + "loss": 1.2428, + "step": 52145 + }, + { + "epoch": 0.75, + "grad_norm": 0.55078125, + "learning_rate": 3.6240318139561826e-05, + "loss": 0.882, + "step": 52150 + }, + { + "epoch": 0.75, + "grad_norm": 0.55078125, + "learning_rate": 3.622103299352445e-05, + "loss": 0.9101, + "step": 52155 + }, + { + "epoch": 0.75, + "grad_norm": 0.5625, + "learning_rate": 3.620175184522534e-05, + "loss": 0.9604, + "step": 52160 + }, + { + "epoch": 0.75, + "grad_norm": 0.63671875, + "learning_rate": 3.6182474695873084e-05, + "loss": 1.1251, + "step": 52165 + }, + { + "epoch": 0.75, + "grad_norm": 0.50390625, + "learning_rate": 3.616320154667603e-05, + "loss": 1.1223, + "step": 52170 + }, + { + "epoch": 0.75, + "grad_norm": 0.609375, + "learning_rate": 3.614393239884216e-05, + "loss": 0.8701, + "step": 52175 + }, + { + "epoch": 0.75, + "grad_norm": 0.484375, + "learning_rate": 3.612466725357935e-05, + "loss": 0.8804, + "step": 52180 + }, + { + "epoch": 0.75, + "grad_norm": 0.56640625, + "learning_rate": 3.6105406112095207e-05, + "loss": 1.0503, + "step": 52185 + }, + { + "epoch": 0.75, + "grad_norm": 0.5625, + "learning_rate": 3.6086148975596914e-05, + "loss": 0.9704, + "step": 52190 + }, + { + "epoch": 0.75, + "grad_norm": 0.54296875, + "learning_rate": 3.6066895845291595e-05, + "loss": 0.9705, + "step": 52195 + }, + { + "epoch": 0.75, + "grad_norm": 0.55859375, + "learning_rate": 3.604764672238609e-05, + "loss": 0.9165, + "step": 52200 + }, + { + "epoch": 0.75, + "grad_norm": 0.5625, + "learning_rate": 3.602840160808688e-05, + "loss": 0.9666, + "step": 52205 + }, + { + "epoch": 0.75, + "grad_norm": 0.58984375, + "learning_rate": 3.6009160503600326e-05, + "loss": 0.9914, + "step": 52210 + }, + { + "epoch": 0.75, + "grad_norm": 0.5, + "learning_rate": 3.5989923410132495e-05, + "loss": 0.9454, + "step": 52215 + }, + { + "epoch": 0.75, + "grad_norm": 0.458984375, + "learning_rate": 3.597069032888915e-05, + "loss": 0.9694, + "step": 52220 + }, + { + "epoch": 0.75, + "grad_norm": 0.51171875, + "learning_rate": 3.5951461261075845e-05, + "loss": 0.9661, + "step": 52225 + }, + { + "epoch": 0.75, + "grad_norm": 0.48828125, + "learning_rate": 3.593223620789793e-05, + "loss": 1.0651, + "step": 52230 + }, + { + "epoch": 0.75, + "grad_norm": 0.5234375, + "learning_rate": 3.5913015170560385e-05, + "loss": 0.933, + "step": 52235 + }, + { + "epoch": 0.75, + "grad_norm": 0.5234375, + "learning_rate": 3.589379815026806e-05, + "loss": 0.9405, + "step": 52240 + }, + { + "epoch": 0.75, + "grad_norm": 0.5390625, + "learning_rate": 3.5874585148225456e-05, + "loss": 0.9426, + "step": 52245 + }, + { + "epoch": 0.75, + "grad_norm": 0.609375, + "learning_rate": 3.5855376165636924e-05, + "loss": 0.9903, + "step": 52250 + }, + { + "epoch": 0.75, + "grad_norm": 0.51953125, + "learning_rate": 3.5836171203706425e-05, + "loss": 1.2026, + "step": 52255 + }, + { + "epoch": 0.75, + "grad_norm": 0.49609375, + "learning_rate": 3.5816970263637796e-05, + "loss": 0.965, + "step": 52260 + }, + { + "epoch": 0.75, + "grad_norm": 0.455078125, + "learning_rate": 3.579777334663461e-05, + "loss": 0.9093, + "step": 52265 + }, + { + "epoch": 0.75, + "grad_norm": 0.5625, + "learning_rate": 3.577858045390007e-05, + "loss": 1.1153, + "step": 52270 + }, + { + "epoch": 0.75, + "grad_norm": 0.53125, + "learning_rate": 3.575939158663725e-05, + "loss": 0.9002, + "step": 52275 + }, + { + "epoch": 0.75, + "grad_norm": 0.5625, + "learning_rate": 3.5740206746048965e-05, + "loss": 0.9292, + "step": 52280 + }, + { + "epoch": 0.75, + "grad_norm": 0.5390625, + "learning_rate": 3.572102593333767e-05, + "loss": 0.8759, + "step": 52285 + }, + { + "epoch": 0.75, + "grad_norm": 0.55859375, + "learning_rate": 3.570184914970571e-05, + "loss": 0.946, + "step": 52290 + }, + { + "epoch": 0.75, + "grad_norm": 0.53125, + "learning_rate": 3.568267639635507e-05, + "loss": 0.9866, + "step": 52295 + }, + { + "epoch": 0.75, + "grad_norm": 0.55859375, + "learning_rate": 3.5663507674487505e-05, + "loss": 0.892, + "step": 52300 + }, + { + "epoch": 0.75, + "grad_norm": 0.48046875, + "learning_rate": 3.5644342985304545e-05, + "loss": 0.8986, + "step": 52305 + }, + { + "epoch": 0.75, + "grad_norm": 0.52734375, + "learning_rate": 3.562518233000749e-05, + "loss": 0.9364, + "step": 52310 + }, + { + "epoch": 0.75, + "grad_norm": 0.4609375, + "learning_rate": 3.56060257097973e-05, + "loss": 0.8437, + "step": 52315 + }, + { + "epoch": 0.75, + "grad_norm": 0.61328125, + "learning_rate": 3.558687312587474e-05, + "loss": 0.9295, + "step": 52320 + }, + { + "epoch": 0.75, + "grad_norm": 0.58203125, + "learning_rate": 3.5567724579440386e-05, + "loss": 1.0193, + "step": 52325 + }, + { + "epoch": 0.75, + "grad_norm": 0.55859375, + "learning_rate": 3.554858007169439e-05, + "loss": 1.019, + "step": 52330 + }, + { + "epoch": 0.75, + "grad_norm": 0.515625, + "learning_rate": 3.552943960383681e-05, + "loss": 0.9998, + "step": 52335 + }, + { + "epoch": 0.75, + "grad_norm": 0.60546875, + "learning_rate": 3.551030317706742e-05, + "loss": 1.1862, + "step": 52340 + }, + { + "epoch": 0.75, + "grad_norm": 0.57421875, + "learning_rate": 3.549117079258566e-05, + "loss": 1.1737, + "step": 52345 + }, + { + "epoch": 0.75, + "grad_norm": 0.57421875, + "learning_rate": 3.547204245159075e-05, + "loss": 0.9703, + "step": 52350 + }, + { + "epoch": 0.75, + "grad_norm": 0.6953125, + "learning_rate": 3.545291815528171e-05, + "loss": 0.9504, + "step": 52355 + }, + { + "epoch": 0.75, + "grad_norm": 0.63671875, + "learning_rate": 3.543379790485732e-05, + "loss": 0.8241, + "step": 52360 + }, + { + "epoch": 0.75, + "grad_norm": 0.56640625, + "learning_rate": 3.541468170151597e-05, + "loss": 0.9014, + "step": 52365 + }, + { + "epoch": 0.75, + "grad_norm": 0.5859375, + "learning_rate": 3.539556954645593e-05, + "loss": 0.9119, + "step": 52370 + }, + { + "epoch": 0.75, + "grad_norm": 0.5390625, + "learning_rate": 3.53764614408752e-05, + "loss": 0.9338, + "step": 52375 + }, + { + "epoch": 0.75, + "grad_norm": 0.55078125, + "learning_rate": 3.535735738597144e-05, + "loss": 0.9221, + "step": 52380 + }, + { + "epoch": 0.75, + "grad_norm": 0.49609375, + "learning_rate": 3.533825738294213e-05, + "loss": 0.888, + "step": 52385 + }, + { + "epoch": 0.75, + "grad_norm": 0.66796875, + "learning_rate": 3.5319161432984525e-05, + "loss": 1.0516, + "step": 52390 + }, + { + "epoch": 0.75, + "grad_norm": 0.57421875, + "learning_rate": 3.5300069537295556e-05, + "loss": 0.8644, + "step": 52395 + }, + { + "epoch": 0.75, + "grad_norm": 0.609375, + "learning_rate": 3.528098169707187e-05, + "loss": 0.8862, + "step": 52400 + }, + { + "epoch": 0.75, + "grad_norm": 0.640625, + "learning_rate": 3.5261897913509964e-05, + "loss": 1.0723, + "step": 52405 + }, + { + "epoch": 0.75, + "grad_norm": 0.62109375, + "learning_rate": 3.524281818780607e-05, + "loss": 0.9493, + "step": 52410 + }, + { + "epoch": 0.75, + "grad_norm": 0.5078125, + "learning_rate": 3.522374252115604e-05, + "loss": 0.9562, + "step": 52415 + }, + { + "epoch": 0.75, + "grad_norm": 0.6328125, + "learning_rate": 3.520467091475561e-05, + "loss": 0.9403, + "step": 52420 + }, + { + "epoch": 0.75, + "grad_norm": 0.58984375, + "learning_rate": 3.518560336980024e-05, + "loss": 0.9999, + "step": 52425 + }, + { + "epoch": 0.75, + "grad_norm": 0.57421875, + "learning_rate": 3.516653988748503e-05, + "loss": 0.866, + "step": 52430 + }, + { + "epoch": 0.75, + "grad_norm": 0.51171875, + "learning_rate": 3.514748046900497e-05, + "loss": 0.9026, + "step": 52435 + }, + { + "epoch": 0.75, + "grad_norm": 0.53125, + "learning_rate": 3.5128425115554656e-05, + "loss": 1.0292, + "step": 52440 + }, + { + "epoch": 0.75, + "grad_norm": 0.5, + "learning_rate": 3.510937382832854e-05, + "loss": 0.9959, + "step": 52445 + }, + { + "epoch": 0.75, + "grad_norm": 0.59375, + "learning_rate": 3.509032660852082e-05, + "loss": 1.0212, + "step": 52450 + }, + { + "epoch": 0.75, + "grad_norm": 0.5546875, + "learning_rate": 3.5071283457325344e-05, + "loss": 0.8349, + "step": 52455 + }, + { + "epoch": 0.75, + "grad_norm": 0.5390625, + "learning_rate": 3.5052244375935736e-05, + "loss": 0.9994, + "step": 52460 + }, + { + "epoch": 0.75, + "grad_norm": 0.52734375, + "learning_rate": 3.503320936554543e-05, + "loss": 0.8113, + "step": 52465 + }, + { + "epoch": 0.75, + "grad_norm": 0.578125, + "learning_rate": 3.501417842734758e-05, + "loss": 0.97, + "step": 52470 + }, + { + "epoch": 0.75, + "grad_norm": 0.625, + "learning_rate": 3.4995151562535e-05, + "loss": 0.9237, + "step": 52475 + }, + { + "epoch": 0.75, + "grad_norm": 0.53125, + "learning_rate": 3.4976128772300364e-05, + "loss": 1.1457, + "step": 52480 + }, + { + "epoch": 0.75, + "grad_norm": 0.6640625, + "learning_rate": 3.4957110057836065e-05, + "loss": 1.0911, + "step": 52485 + }, + { + "epoch": 0.75, + "grad_norm": 0.494140625, + "learning_rate": 3.493809542033414e-05, + "loss": 0.8484, + "step": 52490 + }, + { + "epoch": 0.75, + "grad_norm": 0.54296875, + "learning_rate": 3.4919084860986506e-05, + "loss": 1.061, + "step": 52495 + }, + { + "epoch": 0.75, + "grad_norm": 0.61328125, + "learning_rate": 3.490007838098478e-05, + "loss": 0.9506, + "step": 52500 + }, + { + "epoch": 0.75, + "grad_norm": 0.578125, + "learning_rate": 3.4881075981520284e-05, + "loss": 0.8539, + "step": 52505 + }, + { + "epoch": 0.75, + "grad_norm": 0.625, + "learning_rate": 3.4862077663784074e-05, + "loss": 0.962, + "step": 52510 + }, + { + "epoch": 0.75, + "grad_norm": 0.494140625, + "learning_rate": 3.484308342896703e-05, + "loss": 0.8708, + "step": 52515 + }, + { + "epoch": 0.75, + "grad_norm": 0.482421875, + "learning_rate": 3.482409327825975e-05, + "loss": 0.8459, + "step": 52520 + }, + { + "epoch": 0.75, + "grad_norm": 0.59765625, + "learning_rate": 3.4805107212852504e-05, + "loss": 0.9101, + "step": 52525 + }, + { + "epoch": 0.75, + "grad_norm": 0.5234375, + "learning_rate": 3.4786125233935386e-05, + "loss": 0.9337, + "step": 52530 + }, + { + "epoch": 0.75, + "grad_norm": 0.90234375, + "learning_rate": 3.4767147342698244e-05, + "loss": 0.8437, + "step": 52535 + }, + { + "epoch": 0.75, + "grad_norm": 0.51171875, + "learning_rate": 3.474817354033058e-05, + "loss": 1.005, + "step": 52540 + }, + { + "epoch": 0.75, + "grad_norm": 0.62109375, + "learning_rate": 3.4729203828021694e-05, + "loss": 0.9563, + "step": 52545 + }, + { + "epoch": 0.75, + "grad_norm": 0.546875, + "learning_rate": 3.47102382069607e-05, + "loss": 0.9094, + "step": 52550 + }, + { + "epoch": 0.75, + "grad_norm": 0.5546875, + "learning_rate": 3.469127667833631e-05, + "loss": 0.8738, + "step": 52555 + }, + { + "epoch": 0.75, + "grad_norm": 0.53125, + "learning_rate": 3.467231924333707e-05, + "loss": 0.8692, + "step": 52560 + }, + { + "epoch": 0.75, + "grad_norm": 0.55859375, + "learning_rate": 3.465336590315128e-05, + "loss": 0.9131, + "step": 52565 + }, + { + "epoch": 0.75, + "grad_norm": 0.56640625, + "learning_rate": 3.463441665896692e-05, + "loss": 0.9032, + "step": 52570 + }, + { + "epoch": 0.75, + "grad_norm": 0.55859375, + "learning_rate": 3.461547151197175e-05, + "loss": 1.0797, + "step": 52575 + }, + { + "epoch": 0.75, + "grad_norm": 0.58203125, + "learning_rate": 3.4596530463353336e-05, + "loss": 0.9467, + "step": 52580 + }, + { + "epoch": 0.75, + "grad_norm": 0.66015625, + "learning_rate": 3.457759351429884e-05, + "loss": 0.8499, + "step": 52585 + }, + { + "epoch": 0.75, + "grad_norm": 0.453125, + "learning_rate": 3.455866066599531e-05, + "loss": 0.8842, + "step": 52590 + }, + { + "epoch": 0.75, + "grad_norm": 0.57421875, + "learning_rate": 3.453973191962948e-05, + "loss": 0.7765, + "step": 52595 + }, + { + "epoch": 0.75, + "grad_norm": 0.57421875, + "learning_rate": 3.452080727638778e-05, + "loss": 1.0101, + "step": 52600 + }, + { + "epoch": 0.75, + "grad_norm": 0.53125, + "learning_rate": 3.450188673745648e-05, + "loss": 0.9075, + "step": 52605 + }, + { + "epoch": 0.75, + "grad_norm": 0.5703125, + "learning_rate": 3.448297030402149e-05, + "loss": 0.9609, + "step": 52610 + }, + { + "epoch": 0.75, + "grad_norm": 0.765625, + "learning_rate": 3.446405797726857e-05, + "loss": 1.009, + "step": 52615 + }, + { + "epoch": 0.75, + "grad_norm": 0.66015625, + "learning_rate": 3.4445149758383096e-05, + "loss": 1.0084, + "step": 52620 + }, + { + "epoch": 0.75, + "grad_norm": 0.8671875, + "learning_rate": 3.44262456485503e-05, + "loss": 0.8443, + "step": 52625 + }, + { + "epoch": 0.75, + "grad_norm": 0.515625, + "learning_rate": 3.440734564895515e-05, + "loss": 0.7951, + "step": 52630 + }, + { + "epoch": 0.76, + "grad_norm": 0.546875, + "learning_rate": 3.438844976078224e-05, + "loss": 0.9905, + "step": 52635 + }, + { + "epoch": 0.76, + "grad_norm": 0.55078125, + "learning_rate": 3.436955798521602e-05, + "loss": 0.995, + "step": 52640 + }, + { + "epoch": 0.76, + "grad_norm": 0.57421875, + "learning_rate": 3.4350670323440684e-05, + "loss": 1.0952, + "step": 52645 + }, + { + "epoch": 0.76, + "grad_norm": 0.5859375, + "learning_rate": 3.4331786776640075e-05, + "loss": 0.8872, + "step": 52650 + }, + { + "epoch": 0.76, + "grad_norm": 0.546875, + "learning_rate": 3.431290734599785e-05, + "loss": 0.9607, + "step": 52655 + }, + { + "epoch": 0.76, + "grad_norm": 0.64453125, + "learning_rate": 3.429403203269748e-05, + "loss": 1.0511, + "step": 52660 + }, + { + "epoch": 0.76, + "grad_norm": 0.515625, + "learning_rate": 3.427516083792194e-05, + "loss": 0.9045, + "step": 52665 + }, + { + "epoch": 0.76, + "grad_norm": 0.52734375, + "learning_rate": 3.425629376285418e-05, + "loss": 1.0049, + "step": 52670 + }, + { + "epoch": 0.76, + "grad_norm": 0.5625, + "learning_rate": 3.423743080867684e-05, + "loss": 1.0459, + "step": 52675 + }, + { + "epoch": 0.76, + "grad_norm": 0.48828125, + "learning_rate": 3.421857197657219e-05, + "loss": 0.9, + "step": 52680 + }, + { + "epoch": 0.76, + "grad_norm": 0.546875, + "learning_rate": 3.419971726772238e-05, + "loss": 1.0364, + "step": 52685 + }, + { + "epoch": 0.76, + "grad_norm": 0.55859375, + "learning_rate": 3.4180866683309255e-05, + "loss": 1.015, + "step": 52690 + }, + { + "epoch": 0.76, + "grad_norm": 0.69140625, + "learning_rate": 3.416202022451433e-05, + "loss": 0.9177, + "step": 52695 + }, + { + "epoch": 0.76, + "grad_norm": 0.59765625, + "learning_rate": 3.4143177892518975e-05, + "loss": 1.0352, + "step": 52700 + }, + { + "epoch": 0.76, + "grad_norm": 0.55859375, + "learning_rate": 3.412433968850426e-05, + "loss": 1.0523, + "step": 52705 + }, + { + "epoch": 0.76, + "grad_norm": 0.51953125, + "learning_rate": 3.4105505613650956e-05, + "loss": 0.886, + "step": 52710 + }, + { + "epoch": 0.76, + "grad_norm": 0.578125, + "learning_rate": 3.408667566913958e-05, + "loss": 0.9176, + "step": 52715 + }, + { + "epoch": 0.76, + "grad_norm": 0.59765625, + "learning_rate": 3.406784985615044e-05, + "loss": 0.9577, + "step": 52720 + }, + { + "epoch": 0.76, + "grad_norm": 0.57421875, + "learning_rate": 3.40490281758636e-05, + "loss": 1.008, + "step": 52725 + }, + { + "epoch": 0.76, + "grad_norm": 0.5625, + "learning_rate": 3.403021062945875e-05, + "loss": 0.8865, + "step": 52730 + }, + { + "epoch": 0.76, + "grad_norm": 0.74609375, + "learning_rate": 3.4011397218115425e-05, + "loss": 0.9519, + "step": 52735 + }, + { + "epoch": 0.76, + "grad_norm": 0.5859375, + "learning_rate": 3.399258794301291e-05, + "loss": 1.0096, + "step": 52740 + }, + { + "epoch": 0.76, + "grad_norm": 0.5390625, + "learning_rate": 3.3973782805330135e-05, + "loss": 1.1155, + "step": 52745 + }, + { + "epoch": 0.76, + "grad_norm": 0.50390625, + "learning_rate": 3.395498180624584e-05, + "loss": 0.9467, + "step": 52750 + }, + { + "epoch": 0.76, + "grad_norm": 0.5625, + "learning_rate": 3.3936184946938544e-05, + "loss": 0.8723, + "step": 52755 + }, + { + "epoch": 0.76, + "grad_norm": 0.56640625, + "learning_rate": 3.391739222858639e-05, + "loss": 0.928, + "step": 52760 + }, + { + "epoch": 0.76, + "grad_norm": 0.54296875, + "learning_rate": 3.3898603652367364e-05, + "loss": 0.8712, + "step": 52765 + }, + { + "epoch": 0.76, + "grad_norm": 0.5859375, + "learning_rate": 3.387981921945916e-05, + "loss": 0.9256, + "step": 52770 + }, + { + "epoch": 0.76, + "grad_norm": 0.6171875, + "learning_rate": 3.386103893103916e-05, + "loss": 1.0196, + "step": 52775 + }, + { + "epoch": 0.76, + "grad_norm": 0.57421875, + "learning_rate": 3.384226278828456e-05, + "loss": 0.8886, + "step": 52780 + }, + { + "epoch": 0.76, + "grad_norm": 0.63671875, + "learning_rate": 3.382349079237232e-05, + "loss": 1.0258, + "step": 52785 + }, + { + "epoch": 0.76, + "grad_norm": 0.5234375, + "learning_rate": 3.3804722944479004e-05, + "loss": 0.8668, + "step": 52790 + }, + { + "epoch": 0.76, + "grad_norm": 0.5625, + "learning_rate": 3.378595924578104e-05, + "loss": 0.8021, + "step": 52795 + }, + { + "epoch": 0.76, + "grad_norm": 0.5703125, + "learning_rate": 3.37671996974546e-05, + "loss": 0.9738, + "step": 52800 + }, + { + "epoch": 0.76, + "grad_norm": 0.5234375, + "learning_rate": 3.3748444300675484e-05, + "loss": 0.8922, + "step": 52805 + }, + { + "epoch": 0.76, + "grad_norm": 0.7265625, + "learning_rate": 3.372969305661934e-05, + "loss": 0.9705, + "step": 52810 + }, + { + "epoch": 0.76, + "grad_norm": 0.6015625, + "learning_rate": 3.371094596646153e-05, + "loss": 0.9663, + "step": 52815 + }, + { + "epoch": 0.76, + "grad_norm": 0.5078125, + "learning_rate": 3.369220303137712e-05, + "loss": 1.0698, + "step": 52820 + }, + { + "epoch": 0.76, + "grad_norm": 0.5625, + "learning_rate": 3.367346425254093e-05, + "loss": 0.8487, + "step": 52825 + }, + { + "epoch": 0.76, + "grad_norm": 0.6015625, + "learning_rate": 3.365472963112752e-05, + "loss": 0.9631, + "step": 52830 + }, + { + "epoch": 0.76, + "grad_norm": 0.5546875, + "learning_rate": 3.363599916831126e-05, + "loss": 0.9238, + "step": 52835 + }, + { + "epoch": 0.76, + "grad_norm": 0.5, + "learning_rate": 3.361727286526612e-05, + "loss": 0.9257, + "step": 52840 + }, + { + "epoch": 0.76, + "grad_norm": 0.52734375, + "learning_rate": 3.359855072316592e-05, + "loss": 0.9111, + "step": 52845 + }, + { + "epoch": 0.76, + "grad_norm": 0.55078125, + "learning_rate": 3.357983274318422e-05, + "loss": 0.9004, + "step": 52850 + }, + { + "epoch": 0.76, + "grad_norm": 0.55859375, + "learning_rate": 3.356111892649423e-05, + "loss": 1.0007, + "step": 52855 + }, + { + "epoch": 0.76, + "grad_norm": 0.671875, + "learning_rate": 3.354240927426895e-05, + "loss": 1.0987, + "step": 52860 + }, + { + "epoch": 0.76, + "grad_norm": 0.60546875, + "learning_rate": 3.352370378768119e-05, + "loss": 1.0265, + "step": 52865 + }, + { + "epoch": 0.76, + "grad_norm": 0.6171875, + "learning_rate": 3.350500246790339e-05, + "loss": 1.024, + "step": 52870 + }, + { + "epoch": 0.76, + "grad_norm": 0.5703125, + "learning_rate": 3.348630531610773e-05, + "loss": 0.8475, + "step": 52875 + }, + { + "epoch": 0.76, + "grad_norm": 0.5859375, + "learning_rate": 3.346761233346624e-05, + "loss": 0.8669, + "step": 52880 + }, + { + "epoch": 0.76, + "grad_norm": 0.55859375, + "learning_rate": 3.344892352115055e-05, + "loss": 0.8269, + "step": 52885 + }, + { + "epoch": 0.76, + "grad_norm": 0.4765625, + "learning_rate": 3.3430238880332124e-05, + "loss": 0.868, + "step": 52890 + }, + { + "epoch": 0.76, + "grad_norm": 0.58984375, + "learning_rate": 3.3411558412182165e-05, + "loss": 0.8541, + "step": 52895 + }, + { + "epoch": 0.76, + "grad_norm": 0.53125, + "learning_rate": 3.339288211787155e-05, + "loss": 0.8378, + "step": 52900 + }, + { + "epoch": 0.76, + "grad_norm": 0.5625, + "learning_rate": 3.3374209998570924e-05, + "loss": 0.9926, + "step": 52905 + }, + { + "epoch": 0.76, + "grad_norm": 0.546875, + "learning_rate": 3.335554205545075e-05, + "loss": 0.9391, + "step": 52910 + }, + { + "epoch": 0.76, + "grad_norm": 0.609375, + "learning_rate": 3.333687828968105e-05, + "loss": 0.9534, + "step": 52915 + }, + { + "epoch": 0.76, + "grad_norm": 0.56640625, + "learning_rate": 3.331821870243179e-05, + "loss": 0.9321, + "step": 52920 + }, + { + "epoch": 0.76, + "grad_norm": 0.55078125, + "learning_rate": 3.3299563294872485e-05, + "loss": 0.9496, + "step": 52925 + }, + { + "epoch": 0.76, + "grad_norm": 0.57421875, + "learning_rate": 3.3280912068172554e-05, + "loss": 1.0303, + "step": 52930 + }, + { + "epoch": 0.76, + "grad_norm": 0.81640625, + "learning_rate": 3.3262265023501e-05, + "loss": 1.015, + "step": 52935 + }, + { + "epoch": 0.76, + "grad_norm": 0.53515625, + "learning_rate": 3.324362216202669e-05, + "loss": 1.0326, + "step": 52940 + }, + { + "epoch": 0.76, + "grad_norm": 0.60546875, + "learning_rate": 3.3224983484918205e-05, + "loss": 0.8786, + "step": 52945 + }, + { + "epoch": 0.76, + "grad_norm": 0.54296875, + "learning_rate": 3.320634899334377e-05, + "loss": 0.9051, + "step": 52950 + }, + { + "epoch": 0.76, + "grad_norm": 0.53515625, + "learning_rate": 3.3187718688471446e-05, + "loss": 0.9922, + "step": 52955 + }, + { + "epoch": 0.76, + "grad_norm": 0.5546875, + "learning_rate": 3.316909257146905e-05, + "loss": 0.9637, + "step": 52960 + }, + { + "epoch": 0.76, + "grad_norm": 0.53515625, + "learning_rate": 3.315047064350402e-05, + "loss": 0.9554, + "step": 52965 + }, + { + "epoch": 0.76, + "grad_norm": 0.455078125, + "learning_rate": 3.313185290574361e-05, + "loss": 0.7879, + "step": 52970 + }, + { + "epoch": 0.76, + "grad_norm": 0.55078125, + "learning_rate": 3.311323935935489e-05, + "loss": 1.035, + "step": 52975 + }, + { + "epoch": 0.76, + "grad_norm": 0.51953125, + "learning_rate": 3.3094630005504435e-05, + "loss": 0.9484, + "step": 52980 + }, + { + "epoch": 0.76, + "grad_norm": 0.640625, + "learning_rate": 3.307602484535877e-05, + "loss": 1.1787, + "step": 52985 + }, + { + "epoch": 0.76, + "grad_norm": 0.5703125, + "learning_rate": 3.3057423880084114e-05, + "loss": 0.8673, + "step": 52990 + }, + { + "epoch": 0.76, + "grad_norm": 0.5546875, + "learning_rate": 3.303882711084635e-05, + "loss": 0.9424, + "step": 52995 + }, + { + "epoch": 0.76, + "grad_norm": 0.51171875, + "learning_rate": 3.3020234538811155e-05, + "loss": 0.9029, + "step": 53000 + }, + { + "epoch": 0.76, + "grad_norm": 0.5859375, + "learning_rate": 3.3001646165143986e-05, + "loss": 1.0614, + "step": 53005 + }, + { + "epoch": 0.76, + "grad_norm": 0.55078125, + "learning_rate": 3.29830619910099e-05, + "loss": 1.0087, + "step": 53010 + }, + { + "epoch": 0.76, + "grad_norm": 0.56640625, + "learning_rate": 3.2964482017573816e-05, + "loss": 0.9137, + "step": 53015 + }, + { + "epoch": 0.76, + "grad_norm": 0.57421875, + "learning_rate": 3.294590624600038e-05, + "loss": 0.9672, + "step": 53020 + }, + { + "epoch": 0.76, + "grad_norm": 0.5078125, + "learning_rate": 3.29273346774539e-05, + "loss": 0.932, + "step": 53025 + }, + { + "epoch": 0.76, + "grad_norm": 0.5546875, + "learning_rate": 3.2908767313098453e-05, + "loss": 0.9715, + "step": 53030 + }, + { + "epoch": 0.76, + "grad_norm": 0.65234375, + "learning_rate": 3.289020415409787e-05, + "loss": 0.9634, + "step": 53035 + }, + { + "epoch": 0.76, + "grad_norm": 0.5390625, + "learning_rate": 3.2871645201615765e-05, + "loss": 1.0967, + "step": 53040 + }, + { + "epoch": 0.76, + "grad_norm": 0.5625, + "learning_rate": 3.285309045681535e-05, + "loss": 1.0092, + "step": 53045 + }, + { + "epoch": 0.76, + "grad_norm": 0.498046875, + "learning_rate": 3.283453992085971e-05, + "loss": 0.902, + "step": 53050 + }, + { + "epoch": 0.76, + "grad_norm": 0.578125, + "learning_rate": 3.281599359491162e-05, + "loss": 0.9241, + "step": 53055 + }, + { + "epoch": 0.76, + "grad_norm": 0.51171875, + "learning_rate": 3.279745148013354e-05, + "loss": 0.9882, + "step": 53060 + }, + { + "epoch": 0.76, + "grad_norm": 0.50390625, + "learning_rate": 3.2778913577687754e-05, + "loss": 0.9183, + "step": 53065 + }, + { + "epoch": 0.76, + "grad_norm": 0.58203125, + "learning_rate": 3.2760379888736236e-05, + "loss": 1.0216, + "step": 53070 + }, + { + "epoch": 0.76, + "grad_norm": 0.5546875, + "learning_rate": 3.274185041444067e-05, + "loss": 1.0704, + "step": 53075 + }, + { + "epoch": 0.76, + "grad_norm": 0.56640625, + "learning_rate": 3.272332515596254e-05, + "loss": 1.0757, + "step": 53080 + }, + { + "epoch": 0.76, + "grad_norm": 0.546875, + "learning_rate": 3.270480411446298e-05, + "loss": 0.9288, + "step": 53085 + }, + { + "epoch": 0.76, + "grad_norm": 0.5078125, + "learning_rate": 3.268628729110298e-05, + "loss": 1.0104, + "step": 53090 + }, + { + "epoch": 0.76, + "grad_norm": 0.49609375, + "learning_rate": 3.2667774687043116e-05, + "loss": 0.8726, + "step": 53095 + }, + { + "epoch": 0.76, + "grad_norm": 0.5390625, + "learning_rate": 3.264926630344386e-05, + "loss": 0.9184, + "step": 53100 + }, + { + "epoch": 0.76, + "grad_norm": 0.55078125, + "learning_rate": 3.2630762141465265e-05, + "loss": 0.9145, + "step": 53105 + }, + { + "epoch": 0.76, + "grad_norm": 0.6015625, + "learning_rate": 3.261226220226722e-05, + "loss": 1.1971, + "step": 53110 + }, + { + "epoch": 0.76, + "grad_norm": 0.578125, + "learning_rate": 3.259376648700937e-05, + "loss": 0.8967, + "step": 53115 + }, + { + "epoch": 0.76, + "grad_norm": 0.478515625, + "learning_rate": 3.257527499685096e-05, + "loss": 0.9529, + "step": 53120 + }, + { + "epoch": 0.76, + "grad_norm": 0.52734375, + "learning_rate": 3.255678773295111e-05, + "loss": 1.1128, + "step": 53125 + }, + { + "epoch": 0.76, + "grad_norm": 0.6015625, + "learning_rate": 3.253830469646866e-05, + "loss": 0.8973, + "step": 53130 + }, + { + "epoch": 0.76, + "grad_norm": 0.474609375, + "learning_rate": 3.251982588856208e-05, + "loss": 1.0136, + "step": 53135 + }, + { + "epoch": 0.76, + "grad_norm": 0.5546875, + "learning_rate": 3.250135131038966e-05, + "loss": 0.9316, + "step": 53140 + }, + { + "epoch": 0.76, + "grad_norm": 0.55078125, + "learning_rate": 3.24828809631094e-05, + "loss": 0.8264, + "step": 53145 + }, + { + "epoch": 0.76, + "grad_norm": 0.546875, + "learning_rate": 3.2464414847879084e-05, + "loss": 0.972, + "step": 53150 + }, + { + "epoch": 0.76, + "grad_norm": 0.65625, + "learning_rate": 3.244595296585614e-05, + "loss": 0.847, + "step": 53155 + }, + { + "epoch": 0.76, + "grad_norm": 0.59375, + "learning_rate": 3.24274953181978e-05, + "loss": 0.9734, + "step": 53160 + }, + { + "epoch": 0.76, + "grad_norm": 0.6015625, + "learning_rate": 3.240904190606105e-05, + "loss": 1.0219, + "step": 53165 + }, + { + "epoch": 0.76, + "grad_norm": 0.578125, + "learning_rate": 3.239059273060249e-05, + "loss": 0.9728, + "step": 53170 + }, + { + "epoch": 0.76, + "grad_norm": 0.53515625, + "learning_rate": 3.237214779297858e-05, + "loss": 1.1157, + "step": 53175 + }, + { + "epoch": 0.76, + "grad_norm": 1.15625, + "learning_rate": 3.235370709434551e-05, + "loss": 0.9806, + "step": 53180 + }, + { + "epoch": 0.76, + "grad_norm": 0.515625, + "learning_rate": 3.23352706358591e-05, + "loss": 0.942, + "step": 53185 + }, + { + "epoch": 0.76, + "grad_norm": 0.494140625, + "learning_rate": 3.2316838418674975e-05, + "loss": 1.0261, + "step": 53190 + }, + { + "epoch": 0.76, + "grad_norm": 0.48828125, + "learning_rate": 3.2298410443948504e-05, + "loss": 0.9171, + "step": 53195 + }, + { + "epoch": 0.76, + "grad_norm": 0.6640625, + "learning_rate": 3.22799867128348e-05, + "loss": 0.9859, + "step": 53200 + }, + { + "epoch": 0.76, + "grad_norm": 0.55859375, + "learning_rate": 3.2261567226488634e-05, + "loss": 0.7889, + "step": 53205 + }, + { + "epoch": 0.76, + "grad_norm": 0.52734375, + "learning_rate": 3.2243151986064565e-05, + "loss": 1.0395, + "step": 53210 + }, + { + "epoch": 0.76, + "grad_norm": 0.57421875, + "learning_rate": 3.222474099271694e-05, + "loss": 0.9108, + "step": 53215 + }, + { + "epoch": 0.76, + "grad_norm": 0.51171875, + "learning_rate": 3.22063342475997e-05, + "loss": 1.0814, + "step": 53220 + }, + { + "epoch": 0.76, + "grad_norm": 0.5546875, + "learning_rate": 3.218793175186664e-05, + "loss": 0.9087, + "step": 53225 + }, + { + "epoch": 0.76, + "grad_norm": 0.52734375, + "learning_rate": 3.216953350667129e-05, + "loss": 0.9244, + "step": 53230 + }, + { + "epoch": 0.76, + "grad_norm": 0.482421875, + "learning_rate": 3.2151139513166825e-05, + "loss": 0.8413, + "step": 53235 + }, + { + "epoch": 0.76, + "grad_norm": 0.5703125, + "learning_rate": 3.2132749772506176e-05, + "loss": 0.9695, + "step": 53240 + }, + { + "epoch": 0.76, + "grad_norm": 0.5234375, + "learning_rate": 3.2114364285842104e-05, + "loss": 0.8746, + "step": 53245 + }, + { + "epoch": 0.76, + "grad_norm": 0.4921875, + "learning_rate": 3.2095983054326964e-05, + "loss": 1.1472, + "step": 53250 + }, + { + "epoch": 0.76, + "grad_norm": 0.6015625, + "learning_rate": 3.2077606079112934e-05, + "loss": 1.0815, + "step": 53255 + }, + { + "epoch": 0.76, + "grad_norm": 0.546875, + "learning_rate": 3.205923336135195e-05, + "loss": 1.0107, + "step": 53260 + }, + { + "epoch": 0.76, + "grad_norm": 0.5703125, + "learning_rate": 3.2040864902195556e-05, + "loss": 1.0251, + "step": 53265 + }, + { + "epoch": 0.76, + "grad_norm": 0.6328125, + "learning_rate": 3.2022500702795156e-05, + "loss": 0.8614, + "step": 53270 + }, + { + "epoch": 0.76, + "grad_norm": 0.53515625, + "learning_rate": 3.200414076430186e-05, + "loss": 0.9612, + "step": 53275 + }, + { + "epoch": 0.76, + "grad_norm": 0.55078125, + "learning_rate": 3.198578508786643e-05, + "loss": 0.9162, + "step": 53280 + }, + { + "epoch": 0.76, + "grad_norm": 0.62890625, + "learning_rate": 3.196743367463946e-05, + "loss": 1.081, + "step": 53285 + }, + { + "epoch": 0.76, + "grad_norm": 0.60546875, + "learning_rate": 3.194908652577126e-05, + "loss": 1.0349, + "step": 53290 + }, + { + "epoch": 0.76, + "grad_norm": 0.51953125, + "learning_rate": 3.193074364241181e-05, + "loss": 0.8826, + "step": 53295 + }, + { + "epoch": 0.76, + "grad_norm": 0.57421875, + "learning_rate": 3.191240502571085e-05, + "loss": 1.0065, + "step": 53300 + }, + { + "epoch": 0.76, + "grad_norm": 0.5078125, + "learning_rate": 3.1894070676817876e-05, + "loss": 0.9465, + "step": 53305 + }, + { + "epoch": 0.76, + "grad_norm": 0.50390625, + "learning_rate": 3.187574059688216e-05, + "loss": 0.8663, + "step": 53310 + }, + { + "epoch": 0.76, + "grad_norm": 0.56640625, + "learning_rate": 3.185741478705259e-05, + "loss": 1.1527, + "step": 53315 + }, + { + "epoch": 0.76, + "grad_norm": 0.640625, + "learning_rate": 3.1839093248477846e-05, + "loss": 1.0791, + "step": 53320 + }, + { + "epoch": 0.76, + "grad_norm": 0.58984375, + "learning_rate": 3.1820775982306417e-05, + "loss": 0.922, + "step": 53325 + }, + { + "epoch": 0.76, + "grad_norm": 0.51953125, + "learning_rate": 3.1802462989686355e-05, + "loss": 0.9416, + "step": 53330 + }, + { + "epoch": 0.77, + "grad_norm": 0.67578125, + "learning_rate": 3.178415427176559e-05, + "loss": 0.9373, + "step": 53335 + }, + { + "epoch": 0.77, + "grad_norm": 0.6796875, + "learning_rate": 3.1765849829691785e-05, + "loss": 1.1576, + "step": 53340 + }, + { + "epoch": 0.77, + "grad_norm": 0.54296875, + "learning_rate": 3.1747549664612165e-05, + "loss": 0.9918, + "step": 53345 + }, + { + "epoch": 0.77, + "grad_norm": 0.5546875, + "learning_rate": 3.1729253777673864e-05, + "loss": 1.1734, + "step": 53350 + }, + { + "epoch": 0.77, + "grad_norm": 0.51953125, + "learning_rate": 3.1710962170023715e-05, + "loss": 0.8821, + "step": 53355 + }, + { + "epoch": 0.77, + "grad_norm": 0.53515625, + "learning_rate": 3.169267484280821e-05, + "loss": 0.8931, + "step": 53360 + }, + { + "epoch": 0.77, + "grad_norm": 0.59375, + "learning_rate": 3.167439179717364e-05, + "loss": 0.9137, + "step": 53365 + }, + { + "epoch": 0.77, + "grad_norm": 0.6171875, + "learning_rate": 3.1656113034266046e-05, + "loss": 1.0828, + "step": 53370 + }, + { + "epoch": 0.77, + "grad_norm": 0.54296875, + "learning_rate": 3.163783855523108e-05, + "loss": 0.9159, + "step": 53375 + }, + { + "epoch": 0.77, + "grad_norm": 0.72265625, + "learning_rate": 3.1619568361214256e-05, + "loss": 1.0501, + "step": 53380 + }, + { + "epoch": 0.77, + "grad_norm": 0.5, + "learning_rate": 3.1601302453360796e-05, + "loss": 1.0275, + "step": 53385 + }, + { + "epoch": 0.77, + "grad_norm": 0.50390625, + "learning_rate": 3.158304083281557e-05, + "loss": 0.9749, + "step": 53390 + }, + { + "epoch": 0.77, + "grad_norm": 0.65234375, + "learning_rate": 3.1564783500723296e-05, + "loss": 0.9175, + "step": 53395 + }, + { + "epoch": 0.77, + "grad_norm": 0.494140625, + "learning_rate": 3.154653045822829e-05, + "loss": 1.0527, + "step": 53400 + }, + { + "epoch": 0.77, + "grad_norm": 0.466796875, + "learning_rate": 3.152828170647477e-05, + "loss": 0.933, + "step": 53405 + }, + { + "epoch": 0.77, + "grad_norm": 0.5234375, + "learning_rate": 3.1510037246606496e-05, + "loss": 0.9635, + "step": 53410 + }, + { + "epoch": 0.77, + "grad_norm": 0.59375, + "learning_rate": 3.1491797079767086e-05, + "loss": 1.0825, + "step": 53415 + }, + { + "epoch": 0.77, + "grad_norm": 0.58984375, + "learning_rate": 3.147356120709989e-05, + "loss": 0.8809, + "step": 53420 + }, + { + "epoch": 0.77, + "grad_norm": 0.9453125, + "learning_rate": 3.1455329629747884e-05, + "loss": 1.0626, + "step": 53425 + }, + { + "epoch": 0.77, + "grad_norm": 0.56640625, + "learning_rate": 3.14371023488539e-05, + "loss": 0.8831, + "step": 53430 + }, + { + "epoch": 0.77, + "grad_norm": 0.5703125, + "learning_rate": 3.141887936556045e-05, + "loss": 0.9965, + "step": 53435 + }, + { + "epoch": 0.77, + "grad_norm": 0.5703125, + "learning_rate": 3.140066068100972e-05, + "loss": 0.9626, + "step": 53440 + }, + { + "epoch": 0.77, + "grad_norm": 0.53515625, + "learning_rate": 3.138244629634374e-05, + "loss": 1.0613, + "step": 53445 + }, + { + "epoch": 0.77, + "grad_norm": 0.58203125, + "learning_rate": 3.136423621270417e-05, + "loss": 0.9647, + "step": 53450 + }, + { + "epoch": 0.77, + "grad_norm": 0.60546875, + "learning_rate": 3.134603043123241e-05, + "loss": 0.8799, + "step": 53455 + }, + { + "epoch": 0.77, + "grad_norm": 0.5703125, + "learning_rate": 3.132782895306966e-05, + "loss": 0.9257, + "step": 53460 + }, + { + "epoch": 0.77, + "grad_norm": 0.51171875, + "learning_rate": 3.130963177935683e-05, + "loss": 0.9144, + "step": 53465 + }, + { + "epoch": 0.77, + "grad_norm": 0.6328125, + "learning_rate": 3.1291438911234474e-05, + "loss": 1.0308, + "step": 53470 + }, + { + "epoch": 0.77, + "grad_norm": 0.55859375, + "learning_rate": 3.1273250349842985e-05, + "loss": 1.0441, + "step": 53475 + }, + { + "epoch": 0.77, + "grad_norm": 0.49609375, + "learning_rate": 3.125506609632247e-05, + "loss": 0.8567, + "step": 53480 + }, + { + "epoch": 0.77, + "grad_norm": 0.56640625, + "learning_rate": 3.123688615181267e-05, + "loss": 1.1832, + "step": 53485 + }, + { + "epoch": 0.77, + "grad_norm": 0.52734375, + "learning_rate": 3.121871051745317e-05, + "loss": 0.9365, + "step": 53490 + }, + { + "epoch": 0.77, + "grad_norm": 0.6015625, + "learning_rate": 3.120053919438326e-05, + "loss": 0.8913, + "step": 53495 + }, + { + "epoch": 0.77, + "grad_norm": 0.6171875, + "learning_rate": 3.11823721837419e-05, + "loss": 1.0006, + "step": 53500 + }, + { + "epoch": 0.77, + "grad_norm": 0.515625, + "learning_rate": 3.116420948666781e-05, + "loss": 0.9045, + "step": 53505 + }, + { + "epoch": 0.77, + "grad_norm": 0.546875, + "learning_rate": 3.114605110429946e-05, + "loss": 0.926, + "step": 53510 + }, + { + "epoch": 0.77, + "grad_norm": 0.51171875, + "learning_rate": 3.1127897037775077e-05, + "loss": 0.9321, + "step": 53515 + }, + { + "epoch": 0.77, + "grad_norm": 0.6171875, + "learning_rate": 3.1109747288232505e-05, + "loss": 0.9293, + "step": 53520 + }, + { + "epoch": 0.77, + "grad_norm": 0.703125, + "learning_rate": 3.109160185680945e-05, + "loss": 0.9626, + "step": 53525 + }, + { + "epoch": 0.77, + "grad_norm": 0.50390625, + "learning_rate": 3.107346074464329e-05, + "loss": 0.8654, + "step": 53530 + }, + { + "epoch": 0.77, + "grad_norm": 0.6796875, + "learning_rate": 3.1055323952871086e-05, + "loss": 0.8872, + "step": 53535 + }, + { + "epoch": 0.77, + "grad_norm": 0.53515625, + "learning_rate": 3.1037191482629694e-05, + "loss": 1.0243, + "step": 53540 + }, + { + "epoch": 0.77, + "grad_norm": 0.51953125, + "learning_rate": 3.101906333505571e-05, + "loss": 0.9513, + "step": 53545 + }, + { + "epoch": 0.77, + "grad_norm": 0.5859375, + "learning_rate": 3.100093951128541e-05, + "loss": 0.9745, + "step": 53550 + }, + { + "epoch": 0.77, + "grad_norm": 0.51171875, + "learning_rate": 3.098282001245476e-05, + "loss": 1.0569, + "step": 53555 + }, + { + "epoch": 0.77, + "grad_norm": 0.6328125, + "learning_rate": 3.0964704839699596e-05, + "loss": 1.0132, + "step": 53560 + }, + { + "epoch": 0.77, + "grad_norm": 0.59765625, + "learning_rate": 3.094659399415533e-05, + "loss": 0.9108, + "step": 53565 + }, + { + "epoch": 0.77, + "grad_norm": 0.5, + "learning_rate": 3.092848747695718e-05, + "loss": 0.8874, + "step": 53570 + }, + { + "epoch": 0.77, + "grad_norm": 0.58984375, + "learning_rate": 3.091038528924015e-05, + "loss": 1.0389, + "step": 53575 + }, + { + "epoch": 0.77, + "grad_norm": 0.5859375, + "learning_rate": 3.089228743213881e-05, + "loss": 0.8209, + "step": 53580 + }, + { + "epoch": 0.77, + "grad_norm": 0.546875, + "learning_rate": 3.0874193906787616e-05, + "loss": 1.1522, + "step": 53585 + }, + { + "epoch": 0.77, + "grad_norm": 0.625, + "learning_rate": 3.08561047143207e-05, + "loss": 0.919, + "step": 53590 + }, + { + "epoch": 0.77, + "grad_norm": 0.6015625, + "learning_rate": 3.0838019855871855e-05, + "loss": 1.0189, + "step": 53595 + }, + { + "epoch": 0.77, + "grad_norm": 0.458984375, + "learning_rate": 3.08199393325747e-05, + "loss": 1.0154, + "step": 53600 + }, + { + "epoch": 0.77, + "grad_norm": 0.6171875, + "learning_rate": 3.080186314556256e-05, + "loss": 0.9181, + "step": 53605 + }, + { + "epoch": 0.77, + "grad_norm": 0.484375, + "learning_rate": 3.0783791295968454e-05, + "loss": 0.8568, + "step": 53610 + }, + { + "epoch": 0.77, + "grad_norm": 0.59765625, + "learning_rate": 3.0765723784925116e-05, + "loss": 1.0142, + "step": 53615 + }, + { + "epoch": 0.77, + "grad_norm": 0.56640625, + "learning_rate": 3.0747660613565054e-05, + "loss": 1.0121, + "step": 53620 + }, + { + "epoch": 0.77, + "grad_norm": 0.515625, + "learning_rate": 3.072960178302052e-05, + "loss": 0.8851, + "step": 53625 + }, + { + "epoch": 0.77, + "grad_norm": 0.67578125, + "learning_rate": 3.071154729442343e-05, + "loss": 0.9259, + "step": 53630 + }, + { + "epoch": 0.77, + "grad_norm": 0.5546875, + "learning_rate": 3.069349714890545e-05, + "loss": 1.1588, + "step": 53635 + }, + { + "epoch": 0.77, + "grad_norm": 0.578125, + "learning_rate": 3.0675451347598036e-05, + "loss": 1.0215, + "step": 53640 + }, + { + "epoch": 0.77, + "grad_norm": 0.5703125, + "learning_rate": 3.065740989163225e-05, + "loss": 0.9885, + "step": 53645 + }, + { + "epoch": 0.77, + "grad_norm": 0.48828125, + "learning_rate": 3.0639372782138995e-05, + "loss": 0.9443, + "step": 53650 + }, + { + "epoch": 0.77, + "grad_norm": 0.55859375, + "learning_rate": 3.062134002024891e-05, + "loss": 0.8408, + "step": 53655 + }, + { + "epoch": 0.77, + "grad_norm": 0.57421875, + "learning_rate": 3.060331160709219e-05, + "loss": 0.9833, + "step": 53660 + }, + { + "epoch": 0.77, + "grad_norm": 0.64453125, + "learning_rate": 3.0585287543798924e-05, + "loss": 0.9061, + "step": 53665 + }, + { + "epoch": 0.77, + "grad_norm": 0.4921875, + "learning_rate": 3.0567267831498937e-05, + "loss": 0.989, + "step": 53670 + }, + { + "epoch": 0.77, + "grad_norm": 0.5078125, + "learning_rate": 3.0549252471321635e-05, + "loss": 0.9358, + "step": 53675 + }, + { + "epoch": 0.77, + "grad_norm": 0.64453125, + "learning_rate": 3.0531241464396285e-05, + "loss": 0.8769, + "step": 53680 + }, + { + "epoch": 0.77, + "grad_norm": 0.5546875, + "learning_rate": 3.051323481185189e-05, + "loss": 0.8853, + "step": 53685 + }, + { + "epoch": 0.77, + "grad_norm": 0.6171875, + "learning_rate": 3.0495232514817016e-05, + "loss": 0.966, + "step": 53690 + }, + { + "epoch": 0.77, + "grad_norm": 0.6171875, + "learning_rate": 3.0477234574420143e-05, + "loss": 1.0596, + "step": 53695 + }, + { + "epoch": 0.77, + "grad_norm": 0.5234375, + "learning_rate": 3.045924099178942e-05, + "loss": 0.9349, + "step": 53700 + }, + { + "epoch": 0.77, + "grad_norm": 0.5625, + "learning_rate": 3.0441251768052636e-05, + "loss": 0.882, + "step": 53705 + }, + { + "epoch": 0.77, + "grad_norm": 0.5546875, + "learning_rate": 3.0423266904337444e-05, + "loss": 0.9613, + "step": 53710 + }, + { + "epoch": 0.77, + "grad_norm": 0.52734375, + "learning_rate": 3.0405286401771095e-05, + "loss": 0.9987, + "step": 53715 + }, + { + "epoch": 0.77, + "grad_norm": 0.54296875, + "learning_rate": 3.0387310261480684e-05, + "loss": 0.9974, + "step": 53720 + }, + { + "epoch": 0.77, + "grad_norm": 0.515625, + "learning_rate": 3.0369338484592914e-05, + "loss": 0.8701, + "step": 53725 + }, + { + "epoch": 0.77, + "grad_norm": 0.51953125, + "learning_rate": 3.0351371072234315e-05, + "loss": 0.9919, + "step": 53730 + }, + { + "epoch": 0.77, + "grad_norm": 0.59375, + "learning_rate": 3.033340802553113e-05, + "loss": 0.8744, + "step": 53735 + }, + { + "epoch": 0.77, + "grad_norm": 0.54296875, + "learning_rate": 3.0315449345609248e-05, + "loss": 0.9822, + "step": 53740 + }, + { + "epoch": 0.77, + "grad_norm": 0.5625, + "learning_rate": 3.029749503359436e-05, + "loss": 1.0412, + "step": 53745 + }, + { + "epoch": 0.77, + "grad_norm": 0.5234375, + "learning_rate": 3.0279545090611895e-05, + "loss": 0.9188, + "step": 53750 + }, + { + "epoch": 0.77, + "grad_norm": 0.61328125, + "learning_rate": 3.026159951778692e-05, + "loss": 0.99, + "step": 53755 + }, + { + "epoch": 0.77, + "grad_norm": 0.66015625, + "learning_rate": 3.024365831624435e-05, + "loss": 0.9298, + "step": 53760 + }, + { + "epoch": 0.77, + "grad_norm": 0.66796875, + "learning_rate": 3.0225721487108683e-05, + "loss": 1.0029, + "step": 53765 + }, + { + "epoch": 0.77, + "grad_norm": 0.640625, + "learning_rate": 3.0207789031504295e-05, + "loss": 1.0409, + "step": 53770 + }, + { + "epoch": 0.77, + "grad_norm": 0.62890625, + "learning_rate": 3.018986095055515e-05, + "loss": 0.9551, + "step": 53775 + }, + { + "epoch": 0.77, + "grad_norm": 0.703125, + "learning_rate": 3.017193724538505e-05, + "loss": 1.0299, + "step": 53780 + }, + { + "epoch": 0.77, + "grad_norm": 0.53125, + "learning_rate": 3.015401791711743e-05, + "loss": 0.8979, + "step": 53785 + }, + { + "epoch": 0.77, + "grad_norm": 0.54296875, + "learning_rate": 3.0136102966875502e-05, + "loss": 1.1295, + "step": 53790 + }, + { + "epoch": 0.77, + "grad_norm": 0.62109375, + "learning_rate": 3.011819239578225e-05, + "loss": 0.917, + "step": 53795 + }, + { + "epoch": 0.77, + "grad_norm": 0.9375, + "learning_rate": 3.0100286204960247e-05, + "loss": 0.9848, + "step": 53800 + }, + { + "epoch": 0.77, + "grad_norm": 0.53125, + "learning_rate": 3.0082384395531926e-05, + "loss": 0.8899, + "step": 53805 + }, + { + "epoch": 0.77, + "grad_norm": 0.546875, + "learning_rate": 3.00644869686194e-05, + "loss": 1.1058, + "step": 53810 + }, + { + "epoch": 0.77, + "grad_norm": 0.60546875, + "learning_rate": 3.004659392534449e-05, + "loss": 1.0225, + "step": 53815 + }, + { + "epoch": 0.77, + "grad_norm": 0.59375, + "learning_rate": 3.0028705266828704e-05, + "loss": 0.9437, + "step": 53820 + }, + { + "epoch": 0.77, + "grad_norm": 0.51953125, + "learning_rate": 3.0010820994193367e-05, + "loss": 0.9064, + "step": 53825 + }, + { + "epoch": 0.77, + "grad_norm": 0.53515625, + "learning_rate": 2.9992941108559514e-05, + "loss": 0.8512, + "step": 53830 + }, + { + "epoch": 0.77, + "grad_norm": 0.546875, + "learning_rate": 2.9975065611047804e-05, + "loss": 0.9165, + "step": 53835 + }, + { + "epoch": 0.77, + "grad_norm": 0.50390625, + "learning_rate": 2.995719450277874e-05, + "loss": 0.8977, + "step": 53840 + }, + { + "epoch": 0.77, + "grad_norm": 0.59375, + "learning_rate": 2.993932778487253e-05, + "loss": 0.9242, + "step": 53845 + }, + { + "epoch": 0.77, + "grad_norm": 0.57421875, + "learning_rate": 2.9921465458449006e-05, + "loss": 0.8031, + "step": 53850 + }, + { + "epoch": 0.77, + "grad_norm": 0.49609375, + "learning_rate": 2.9903607524627852e-05, + "loss": 0.7719, + "step": 53855 + }, + { + "epoch": 0.77, + "grad_norm": 0.546875, + "learning_rate": 2.988575398452843e-05, + "loss": 1.0093, + "step": 53860 + }, + { + "epoch": 0.77, + "grad_norm": 0.5, + "learning_rate": 2.9867904839269812e-05, + "loss": 1.0583, + "step": 53865 + }, + { + "epoch": 0.77, + "grad_norm": 0.63671875, + "learning_rate": 2.9850060089970753e-05, + "loss": 0.9877, + "step": 53870 + }, + { + "epoch": 0.77, + "grad_norm": 0.51171875, + "learning_rate": 2.9832219737749835e-05, + "loss": 0.8176, + "step": 53875 + }, + { + "epoch": 0.77, + "grad_norm": 0.5859375, + "learning_rate": 2.981438378372532e-05, + "loss": 0.8607, + "step": 53880 + }, + { + "epoch": 0.77, + "grad_norm": 0.58984375, + "learning_rate": 2.9796552229015128e-05, + "loss": 0.9302, + "step": 53885 + }, + { + "epoch": 0.77, + "grad_norm": 0.5546875, + "learning_rate": 2.9778725074737002e-05, + "loss": 1.0279, + "step": 53890 + }, + { + "epoch": 0.77, + "grad_norm": 0.51953125, + "learning_rate": 2.9760902322008398e-05, + "loss": 0.9389, + "step": 53895 + }, + { + "epoch": 0.77, + "grad_norm": 0.57421875, + "learning_rate": 2.9743083971946407e-05, + "loss": 0.9887, + "step": 53900 + }, + { + "epoch": 0.77, + "grad_norm": 0.5078125, + "learning_rate": 2.9725270025667917e-05, + "loss": 0.9757, + "step": 53905 + }, + { + "epoch": 0.77, + "grad_norm": 0.65625, + "learning_rate": 2.9707460484289573e-05, + "loss": 0.9174, + "step": 53910 + }, + { + "epoch": 0.77, + "grad_norm": 0.55078125, + "learning_rate": 2.9689655348927625e-05, + "loss": 1.0818, + "step": 53915 + }, + { + "epoch": 0.77, + "grad_norm": 0.6171875, + "learning_rate": 2.9671854620698192e-05, + "loss": 0.884, + "step": 53920 + }, + { + "epoch": 0.77, + "grad_norm": 0.5703125, + "learning_rate": 2.9654058300717015e-05, + "loss": 1.0192, + "step": 53925 + }, + { + "epoch": 0.77, + "grad_norm": 0.5546875, + "learning_rate": 2.9636266390099543e-05, + "loss": 0.7995, + "step": 53930 + }, + { + "epoch": 0.77, + "grad_norm": 0.52734375, + "learning_rate": 2.9618478889961032e-05, + "loss": 0.9835, + "step": 53935 + }, + { + "epoch": 0.77, + "grad_norm": 0.56640625, + "learning_rate": 2.9600695801416454e-05, + "loss": 0.912, + "step": 53940 + }, + { + "epoch": 0.77, + "grad_norm": 0.5546875, + "learning_rate": 2.958291712558041e-05, + "loss": 0.9939, + "step": 53945 + }, + { + "epoch": 0.77, + "grad_norm": 0.5625, + "learning_rate": 2.956514286356732e-05, + "loss": 0.8653, + "step": 53950 + }, + { + "epoch": 0.77, + "grad_norm": 0.6171875, + "learning_rate": 2.954737301649132e-05, + "loss": 1.0124, + "step": 53955 + }, + { + "epoch": 0.77, + "grad_norm": 0.4921875, + "learning_rate": 2.95296075854662e-05, + "loss": 0.9432, + "step": 53960 + }, + { + "epoch": 0.77, + "grad_norm": 0.58203125, + "learning_rate": 2.9511846571605517e-05, + "loss": 0.8652, + "step": 53965 + }, + { + "epoch": 0.77, + "grad_norm": 0.57421875, + "learning_rate": 2.949408997602262e-05, + "loss": 0.9531, + "step": 53970 + }, + { + "epoch": 0.77, + "grad_norm": 0.6015625, + "learning_rate": 2.9476337799830456e-05, + "loss": 0.9282, + "step": 53975 + }, + { + "epoch": 0.77, + "grad_norm": 0.498046875, + "learning_rate": 2.945859004414172e-05, + "loss": 1.1041, + "step": 53980 + }, + { + "epoch": 0.77, + "grad_norm": 0.55859375, + "learning_rate": 2.9440846710068902e-05, + "loss": 0.948, + "step": 53985 + }, + { + "epoch": 0.77, + "grad_norm": 0.6328125, + "learning_rate": 2.9423107798724214e-05, + "loss": 0.9718, + "step": 53990 + }, + { + "epoch": 0.77, + "grad_norm": 0.53125, + "learning_rate": 2.9405373311219465e-05, + "loss": 0.9747, + "step": 53995 + }, + { + "epoch": 0.77, + "grad_norm": 0.59375, + "learning_rate": 2.938764324866632e-05, + "loss": 0.8916, + "step": 54000 + }, + { + "epoch": 0.77, + "grad_norm": 0.55859375, + "learning_rate": 2.9369917612176157e-05, + "loss": 0.951, + "step": 54005 + }, + { + "epoch": 0.77, + "grad_norm": 0.53125, + "learning_rate": 2.9352196402859955e-05, + "loss": 1.0411, + "step": 54010 + }, + { + "epoch": 0.77, + "grad_norm": 0.6171875, + "learning_rate": 2.9334479621828558e-05, + "loss": 0.8444, + "step": 54015 + }, + { + "epoch": 0.77, + "grad_norm": 0.6328125, + "learning_rate": 2.931676727019248e-05, + "loss": 1.1292, + "step": 54020 + }, + { + "epoch": 0.77, + "grad_norm": 0.51953125, + "learning_rate": 2.9299059349061942e-05, + "loss": 0.9727, + "step": 54025 + }, + { + "epoch": 0.78, + "grad_norm": 0.7734375, + "learning_rate": 2.9281355859546843e-05, + "loss": 0.8993, + "step": 54030 + }, + { + "epoch": 0.78, + "grad_norm": 0.62890625, + "learning_rate": 2.9263656802756935e-05, + "loss": 1.0242, + "step": 54035 + }, + { + "epoch": 0.78, + "grad_norm": 0.53125, + "learning_rate": 2.9245962179801544e-05, + "loss": 0.9757, + "step": 54040 + }, + { + "epoch": 0.78, + "grad_norm": 0.62109375, + "learning_rate": 2.9228271991789834e-05, + "loss": 1.0083, + "step": 54045 + }, + { + "epoch": 0.78, + "grad_norm": 0.52734375, + "learning_rate": 2.9210586239830673e-05, + "loss": 0.9915, + "step": 54050 + }, + { + "epoch": 0.78, + "grad_norm": 0.50390625, + "learning_rate": 2.919290492503255e-05, + "loss": 1.0139, + "step": 54055 + }, + { + "epoch": 0.78, + "grad_norm": 0.88671875, + "learning_rate": 2.9175228048503788e-05, + "loss": 1.1561, + "step": 54060 + }, + { + "epoch": 0.78, + "grad_norm": 0.5234375, + "learning_rate": 2.9157555611352427e-05, + "loss": 1.0815, + "step": 54065 + }, + { + "epoch": 0.78, + "grad_norm": 0.6015625, + "learning_rate": 2.9139887614686134e-05, + "loss": 0.9104, + "step": 54070 + }, + { + "epoch": 0.78, + "grad_norm": 0.65234375, + "learning_rate": 2.912222405961239e-05, + "loss": 1.1101, + "step": 54075 + }, + { + "epoch": 0.78, + "grad_norm": 0.703125, + "learning_rate": 2.910456494723839e-05, + "loss": 1.0586, + "step": 54080 + }, + { + "epoch": 0.78, + "grad_norm": 0.52734375, + "learning_rate": 2.908691027867101e-05, + "loss": 0.7989, + "step": 54085 + }, + { + "epoch": 0.78, + "grad_norm": 0.48046875, + "learning_rate": 2.9069260055016813e-05, + "loss": 0.9496, + "step": 54090 + }, + { + "epoch": 0.78, + "grad_norm": 0.53515625, + "learning_rate": 2.905161427738219e-05, + "loss": 0.9066, + "step": 54095 + }, + { + "epoch": 0.78, + "grad_norm": 0.7421875, + "learning_rate": 2.9033972946873212e-05, + "loss": 0.9965, + "step": 54100 + }, + { + "epoch": 0.78, + "grad_norm": 0.490234375, + "learning_rate": 2.9016336064595607e-05, + "loss": 0.8419, + "step": 54105 + }, + { + "epoch": 0.78, + "grad_norm": 0.55859375, + "learning_rate": 2.89987036316549e-05, + "loss": 0.7921, + "step": 54110 + }, + { + "epoch": 0.78, + "grad_norm": 0.59375, + "learning_rate": 2.898107564915634e-05, + "loss": 0.8745, + "step": 54115 + }, + { + "epoch": 0.78, + "grad_norm": 0.5234375, + "learning_rate": 2.8963452118204827e-05, + "loss": 0.7942, + "step": 54120 + }, + { + "epoch": 0.78, + "grad_norm": 0.60546875, + "learning_rate": 2.8945833039905023e-05, + "loss": 0.8932, + "step": 54125 + }, + { + "epoch": 0.78, + "grad_norm": 0.59375, + "learning_rate": 2.8928218415361397e-05, + "loss": 0.8994, + "step": 54130 + }, + { + "epoch": 0.78, + "grad_norm": 0.58984375, + "learning_rate": 2.8910608245677918e-05, + "loss": 1.0687, + "step": 54135 + }, + { + "epoch": 0.78, + "grad_norm": 0.5625, + "learning_rate": 2.8893002531958478e-05, + "loss": 0.8616, + "step": 54140 + }, + { + "epoch": 0.78, + "grad_norm": 0.55078125, + "learning_rate": 2.8875401275306657e-05, + "loss": 0.9944, + "step": 54145 + }, + { + "epoch": 0.78, + "grad_norm": 0.55859375, + "learning_rate": 2.8857804476825655e-05, + "loss": 0.7736, + "step": 54150 + }, + { + "epoch": 0.78, + "grad_norm": 0.6640625, + "learning_rate": 2.8840212137618495e-05, + "loss": 0.9387, + "step": 54155 + }, + { + "epoch": 0.78, + "grad_norm": 0.55078125, + "learning_rate": 2.882262425878791e-05, + "loss": 0.8779, + "step": 54160 + }, + { + "epoch": 0.78, + "grad_norm": 0.59375, + "learning_rate": 2.880504084143627e-05, + "loss": 1.0307, + "step": 54165 + }, + { + "epoch": 0.78, + "grad_norm": 0.5390625, + "learning_rate": 2.8787461886665756e-05, + "loss": 0.8971, + "step": 54170 + }, + { + "epoch": 0.78, + "grad_norm": 0.53515625, + "learning_rate": 2.8769887395578276e-05, + "loss": 0.8709, + "step": 54175 + }, + { + "epoch": 0.78, + "grad_norm": 0.52734375, + "learning_rate": 2.875231736927534e-05, + "loss": 0.997, + "step": 54180 + }, + { + "epoch": 0.78, + "grad_norm": 0.5234375, + "learning_rate": 2.8734751808858318e-05, + "loss": 0.9288, + "step": 54185 + }, + { + "epoch": 0.78, + "grad_norm": 0.54296875, + "learning_rate": 2.87171907154282e-05, + "loss": 1.0063, + "step": 54190 + }, + { + "epoch": 0.78, + "grad_norm": 0.57421875, + "learning_rate": 2.869963409008577e-05, + "loss": 1.0611, + "step": 54195 + }, + { + "epoch": 0.78, + "grad_norm": 0.50390625, + "learning_rate": 2.868208193393146e-05, + "loss": 0.9429, + "step": 54200 + }, + { + "epoch": 0.78, + "grad_norm": 0.57421875, + "learning_rate": 2.8664534248065467e-05, + "loss": 0.8201, + "step": 54205 + }, + { + "epoch": 0.78, + "grad_norm": 0.57421875, + "learning_rate": 2.8646991033587754e-05, + "loss": 0.8655, + "step": 54210 + }, + { + "epoch": 0.78, + "grad_norm": 0.55078125, + "learning_rate": 2.8629452291597868e-05, + "loss": 1.0149, + "step": 54215 + }, + { + "epoch": 0.78, + "grad_norm": 0.66015625, + "learning_rate": 2.8611918023195193e-05, + "loss": 0.9014, + "step": 54220 + }, + { + "epoch": 0.78, + "grad_norm": 0.6015625, + "learning_rate": 2.8594388229478843e-05, + "loss": 0.933, + "step": 54225 + }, + { + "epoch": 0.78, + "grad_norm": 0.53515625, + "learning_rate": 2.857686291154753e-05, + "loss": 1.0044, + "step": 54230 + }, + { + "epoch": 0.78, + "grad_norm": 0.53125, + "learning_rate": 2.8559342070499817e-05, + "loss": 0.9215, + "step": 54235 + }, + { + "epoch": 0.78, + "grad_norm": 0.5703125, + "learning_rate": 2.854182570743391e-05, + "loss": 0.8283, + "step": 54240 + }, + { + "epoch": 0.78, + "grad_norm": 0.65625, + "learning_rate": 2.8524313823447724e-05, + "loss": 0.8395, + "step": 54245 + }, + { + "epoch": 0.78, + "grad_norm": 0.58203125, + "learning_rate": 2.8506806419638953e-05, + "loss": 1.0228, + "step": 54250 + }, + { + "epoch": 0.78, + "grad_norm": 0.57421875, + "learning_rate": 2.8489303497105012e-05, + "loss": 1.0182, + "step": 54255 + }, + { + "epoch": 0.78, + "grad_norm": 0.640625, + "learning_rate": 2.8471805056942936e-05, + "loss": 1.0948, + "step": 54260 + }, + { + "epoch": 0.78, + "grad_norm": 0.5625, + "learning_rate": 2.8454311100249588e-05, + "loss": 0.9233, + "step": 54265 + }, + { + "epoch": 0.78, + "grad_norm": 0.60546875, + "learning_rate": 2.8436821628121523e-05, + "loss": 1.0684, + "step": 54270 + }, + { + "epoch": 0.78, + "grad_norm": 0.609375, + "learning_rate": 2.8419336641654958e-05, + "loss": 0.9943, + "step": 54275 + }, + { + "epoch": 0.78, + "grad_norm": 0.453125, + "learning_rate": 2.8401856141945893e-05, + "loss": 0.8644, + "step": 54280 + }, + { + "epoch": 0.78, + "grad_norm": 0.546875, + "learning_rate": 2.8384380130090072e-05, + "loss": 0.99, + "step": 54285 + }, + { + "epoch": 0.78, + "grad_norm": 0.5859375, + "learning_rate": 2.8366908607182852e-05, + "loss": 1.0081, + "step": 54290 + }, + { + "epoch": 0.78, + "grad_norm": 0.53125, + "learning_rate": 2.834944157431936e-05, + "loss": 1.066, + "step": 54295 + }, + { + "epoch": 0.78, + "grad_norm": 0.546875, + "learning_rate": 2.833197903259447e-05, + "loss": 0.9283, + "step": 54300 + }, + { + "epoch": 0.78, + "grad_norm": 0.53125, + "learning_rate": 2.83145209831028e-05, + "loss": 0.8595, + "step": 54305 + }, + { + "epoch": 0.78, + "grad_norm": 0.515625, + "learning_rate": 2.829706742693855e-05, + "loss": 1.1134, + "step": 54310 + }, + { + "epoch": 0.78, + "grad_norm": 0.5390625, + "learning_rate": 2.827961836519578e-05, + "loss": 0.8888, + "step": 54315 + }, + { + "epoch": 0.78, + "grad_norm": 0.55078125, + "learning_rate": 2.826217379896825e-05, + "loss": 1.1328, + "step": 54320 + }, + { + "epoch": 0.78, + "grad_norm": 0.5, + "learning_rate": 2.8244733729349338e-05, + "loss": 0.8952, + "step": 54325 + }, + { + "epoch": 0.78, + "grad_norm": 0.5625, + "learning_rate": 2.8227298157432235e-05, + "loss": 0.9766, + "step": 54330 + }, + { + "epoch": 0.78, + "grad_norm": 0.55078125, + "learning_rate": 2.820986708430986e-05, + "loss": 0.9542, + "step": 54335 + }, + { + "epoch": 0.78, + "grad_norm": 0.470703125, + "learning_rate": 2.8192440511074782e-05, + "loss": 0.964, + "step": 54340 + }, + { + "epoch": 0.78, + "grad_norm": 0.59375, + "learning_rate": 2.817501843881928e-05, + "loss": 1.0103, + "step": 54345 + }, + { + "epoch": 0.78, + "grad_norm": 0.5859375, + "learning_rate": 2.8157600868635448e-05, + "loss": 0.9626, + "step": 54350 + }, + { + "epoch": 0.78, + "grad_norm": 0.546875, + "learning_rate": 2.8140187801614993e-05, + "loss": 0.8749, + "step": 54355 + }, + { + "epoch": 0.78, + "grad_norm": 0.5546875, + "learning_rate": 2.8122779238849407e-05, + "loss": 1.015, + "step": 54360 + }, + { + "epoch": 0.78, + "grad_norm": 0.5234375, + "learning_rate": 2.8105375181429906e-05, + "loss": 0.9016, + "step": 54365 + }, + { + "epoch": 0.78, + "grad_norm": 0.58984375, + "learning_rate": 2.808797563044734e-05, + "loss": 0.9122, + "step": 54370 + }, + { + "epoch": 0.78, + "grad_norm": 0.51953125, + "learning_rate": 2.8070580586992356e-05, + "loss": 0.8866, + "step": 54375 + }, + { + "epoch": 0.78, + "grad_norm": 0.62890625, + "learning_rate": 2.8053190052155343e-05, + "loss": 1.0144, + "step": 54380 + }, + { + "epoch": 0.78, + "grad_norm": 0.55859375, + "learning_rate": 2.803580402702628e-05, + "loss": 1.0065, + "step": 54385 + }, + { + "epoch": 0.78, + "grad_norm": 0.5703125, + "learning_rate": 2.8018422512694985e-05, + "loss": 0.976, + "step": 54390 + }, + { + "epoch": 0.78, + "grad_norm": 0.64453125, + "learning_rate": 2.800104551025098e-05, + "loss": 1.0068, + "step": 54395 + }, + { + "epoch": 0.78, + "grad_norm": 0.58984375, + "learning_rate": 2.798367302078344e-05, + "loss": 0.9593, + "step": 54400 + }, + { + "epoch": 0.78, + "grad_norm": 0.53515625, + "learning_rate": 2.7966305045381268e-05, + "loss": 1.1082, + "step": 54405 + }, + { + "epoch": 0.78, + "grad_norm": 0.51171875, + "learning_rate": 2.7948941585133126e-05, + "loss": 0.9987, + "step": 54410 + }, + { + "epoch": 0.78, + "grad_norm": 0.53515625, + "learning_rate": 2.793158264112743e-05, + "loss": 0.8221, + "step": 54415 + }, + { + "epoch": 0.78, + "grad_norm": 0.5, + "learning_rate": 2.7914228214452165e-05, + "loss": 0.7934, + "step": 54420 + }, + { + "epoch": 0.78, + "grad_norm": 0.57421875, + "learning_rate": 2.7896878306195184e-05, + "loss": 0.9969, + "step": 54425 + }, + { + "epoch": 0.78, + "grad_norm": 0.486328125, + "learning_rate": 2.787953291744402e-05, + "loss": 0.9567, + "step": 54430 + }, + { + "epoch": 0.78, + "grad_norm": 0.53515625, + "learning_rate": 2.786219204928584e-05, + "loss": 0.923, + "step": 54435 + }, + { + "epoch": 0.78, + "grad_norm": 0.6015625, + "learning_rate": 2.7844855702807616e-05, + "loss": 1.0718, + "step": 54440 + }, + { + "epoch": 0.78, + "grad_norm": 0.5625, + "learning_rate": 2.7827523879096052e-05, + "loss": 0.9112, + "step": 54445 + }, + { + "epoch": 0.78, + "grad_norm": 0.546875, + "learning_rate": 2.781019657923748e-05, + "loss": 0.9836, + "step": 54450 + }, + { + "epoch": 0.78, + "grad_norm": 0.546875, + "learning_rate": 2.7792873804317976e-05, + "loss": 0.8287, + "step": 54455 + }, + { + "epoch": 0.78, + "grad_norm": 0.53515625, + "learning_rate": 2.7775555555423416e-05, + "loss": 1.008, + "step": 54460 + }, + { + "epoch": 0.78, + "grad_norm": 0.54296875, + "learning_rate": 2.7758241833639252e-05, + "loss": 0.9603, + "step": 54465 + }, + { + "epoch": 0.78, + "grad_norm": 0.56640625, + "learning_rate": 2.7740932640050775e-05, + "loss": 0.88, + "step": 54470 + }, + { + "epoch": 0.78, + "grad_norm": 0.5078125, + "learning_rate": 2.7723627975742962e-05, + "loss": 1.0548, + "step": 54475 + }, + { + "epoch": 0.78, + "grad_norm": 0.54296875, + "learning_rate": 2.7706327841800428e-05, + "loss": 1.0476, + "step": 54480 + }, + { + "epoch": 0.78, + "grad_norm": 0.640625, + "learning_rate": 2.768903223930761e-05, + "loss": 0.9371, + "step": 54485 + }, + { + "epoch": 0.78, + "grad_norm": 0.5546875, + "learning_rate": 2.7671741169348632e-05, + "loss": 0.9731, + "step": 54490 + }, + { + "epoch": 0.78, + "grad_norm": 0.65625, + "learning_rate": 2.7654454633007266e-05, + "loss": 1.1529, + "step": 54495 + }, + { + "epoch": 0.78, + "grad_norm": 0.52734375, + "learning_rate": 2.7637172631367104e-05, + "loss": 0.9571, + "step": 54500 + }, + { + "epoch": 0.78, + "grad_norm": 0.6015625, + "learning_rate": 2.7619895165511345e-05, + "loss": 0.9497, + "step": 54505 + }, + { + "epoch": 0.78, + "grad_norm": 0.50390625, + "learning_rate": 2.7602622236523023e-05, + "loss": 0.8806, + "step": 54510 + }, + { + "epoch": 0.78, + "grad_norm": 0.57421875, + "learning_rate": 2.7585353845484764e-05, + "loss": 0.9026, + "step": 54515 + }, + { + "epoch": 0.78, + "grad_norm": 0.59375, + "learning_rate": 2.7568089993478995e-05, + "loss": 0.8992, + "step": 54520 + }, + { + "epoch": 0.78, + "grad_norm": 0.6015625, + "learning_rate": 2.755083068158787e-05, + "loss": 1.0083, + "step": 54525 + }, + { + "epoch": 0.78, + "grad_norm": 0.546875, + "learning_rate": 2.7533575910893162e-05, + "loss": 1.0233, + "step": 54530 + }, + { + "epoch": 0.78, + "grad_norm": 0.58984375, + "learning_rate": 2.7516325682476453e-05, + "loss": 0.9418, + "step": 54535 + }, + { + "epoch": 0.78, + "grad_norm": 0.5703125, + "learning_rate": 2.749907999741902e-05, + "loss": 1.0514, + "step": 54540 + }, + { + "epoch": 0.78, + "grad_norm": 0.546875, + "learning_rate": 2.74818388568018e-05, + "loss": 0.9402, + "step": 54545 + }, + { + "epoch": 0.78, + "grad_norm": 0.5859375, + "learning_rate": 2.746460226170554e-05, + "loss": 1.0042, + "step": 54550 + }, + { + "epoch": 0.78, + "grad_norm": 0.58984375, + "learning_rate": 2.74473702132106e-05, + "loss": 0.9899, + "step": 54555 + }, + { + "epoch": 0.78, + "grad_norm": 0.59765625, + "learning_rate": 2.7430142712397157e-05, + "loss": 0.8437, + "step": 54560 + }, + { + "epoch": 0.78, + "grad_norm": 0.5234375, + "learning_rate": 2.7412919760344978e-05, + "loss": 0.9307, + "step": 54565 + }, + { + "epoch": 0.78, + "grad_norm": 0.462890625, + "learning_rate": 2.7395701358133664e-05, + "loss": 0.8752, + "step": 54570 + }, + { + "epoch": 0.78, + "grad_norm": 0.58984375, + "learning_rate": 2.73784875068425e-05, + "loss": 0.8699, + "step": 54575 + }, + { + "epoch": 0.78, + "grad_norm": 0.52734375, + "learning_rate": 2.7361278207550423e-05, + "loss": 0.9109, + "step": 54580 + }, + { + "epoch": 0.78, + "grad_norm": 0.52734375, + "learning_rate": 2.734407346133616e-05, + "loss": 1.0958, + "step": 54585 + }, + { + "epoch": 0.78, + "grad_norm": 0.5078125, + "learning_rate": 2.7326873269278154e-05, + "loss": 0.8842, + "step": 54590 + }, + { + "epoch": 0.78, + "grad_norm": 0.53515625, + "learning_rate": 2.7309677632454454e-05, + "loss": 0.9275, + "step": 54595 + }, + { + "epoch": 0.78, + "grad_norm": 0.50390625, + "learning_rate": 2.7292486551942987e-05, + "loss": 0.8883, + "step": 54600 + }, + { + "epoch": 0.78, + "grad_norm": 0.65625, + "learning_rate": 2.727530002882127e-05, + "loss": 0.96, + "step": 54605 + }, + { + "epoch": 0.78, + "grad_norm": 0.5625, + "learning_rate": 2.725811806416655e-05, + "loss": 0.9566, + "step": 54610 + }, + { + "epoch": 0.78, + "grad_norm": 0.498046875, + "learning_rate": 2.7240940659055826e-05, + "loss": 0.8742, + "step": 54615 + }, + { + "epoch": 0.78, + "grad_norm": 0.55078125, + "learning_rate": 2.7223767814565837e-05, + "loss": 0.8872, + "step": 54620 + }, + { + "epoch": 0.78, + "grad_norm": 0.5390625, + "learning_rate": 2.7206599531772948e-05, + "loss": 0.8923, + "step": 54625 + }, + { + "epoch": 0.78, + "grad_norm": 0.62890625, + "learning_rate": 2.7189435811753293e-05, + "loss": 0.899, + "step": 54630 + }, + { + "epoch": 0.78, + "grad_norm": 0.5234375, + "learning_rate": 2.717227665558276e-05, + "loss": 0.8567, + "step": 54635 + }, + { + "epoch": 0.78, + "grad_norm": 0.53125, + "learning_rate": 2.7155122064336835e-05, + "loss": 0.9793, + "step": 54640 + }, + { + "epoch": 0.78, + "grad_norm": 0.65234375, + "learning_rate": 2.713797203909083e-05, + "loss": 0.938, + "step": 54645 + }, + { + "epoch": 0.78, + "grad_norm": 0.5859375, + "learning_rate": 2.7120826580919746e-05, + "loss": 0.9868, + "step": 54650 + }, + { + "epoch": 0.78, + "grad_norm": 0.466796875, + "learning_rate": 2.710368569089826e-05, + "loss": 0.8248, + "step": 54655 + }, + { + "epoch": 0.78, + "grad_norm": 0.6484375, + "learning_rate": 2.7086549370100746e-05, + "loss": 0.8802, + "step": 54660 + }, + { + "epoch": 0.78, + "grad_norm": 0.6796875, + "learning_rate": 2.7069417619601343e-05, + "loss": 0.855, + "step": 54665 + }, + { + "epoch": 0.78, + "grad_norm": 0.546875, + "learning_rate": 2.705229044047396e-05, + "loss": 1.0715, + "step": 54670 + }, + { + "epoch": 0.78, + "grad_norm": 0.6640625, + "learning_rate": 2.703516783379204e-05, + "loss": 0.8297, + "step": 54675 + }, + { + "epoch": 0.78, + "grad_norm": 0.51953125, + "learning_rate": 2.701804980062891e-05, + "loss": 0.9352, + "step": 54680 + }, + { + "epoch": 0.78, + "grad_norm": 0.55078125, + "learning_rate": 2.700093634205757e-05, + "loss": 0.9531, + "step": 54685 + }, + { + "epoch": 0.78, + "grad_norm": 0.52734375, + "learning_rate": 2.6983827459150646e-05, + "loss": 0.9047, + "step": 54690 + }, + { + "epoch": 0.78, + "grad_norm": 0.486328125, + "learning_rate": 2.6966723152980578e-05, + "loss": 0.8579, + "step": 54695 + }, + { + "epoch": 0.78, + "grad_norm": 0.53125, + "learning_rate": 2.6949623424619507e-05, + "loss": 1.0137, + "step": 54700 + }, + { + "epoch": 0.78, + "grad_norm": 0.51953125, + "learning_rate": 2.6932528275139212e-05, + "loss": 1.0013, + "step": 54705 + }, + { + "epoch": 0.78, + "grad_norm": 0.546875, + "learning_rate": 2.691543770561129e-05, + "loss": 0.9706, + "step": 54710 + }, + { + "epoch": 0.78, + "grad_norm": 0.703125, + "learning_rate": 2.6898351717106962e-05, + "loss": 1.0458, + "step": 54715 + }, + { + "epoch": 0.78, + "grad_norm": 0.5546875, + "learning_rate": 2.688127031069718e-05, + "loss": 1.0411, + "step": 54720 + }, + { + "epoch": 0.79, + "grad_norm": 0.59765625, + "learning_rate": 2.686419348745265e-05, + "loss": 0.9726, + "step": 54725 + }, + { + "epoch": 0.79, + "grad_norm": 0.59375, + "learning_rate": 2.6847121248443796e-05, + "loss": 1.1117, + "step": 54730 + }, + { + "epoch": 0.79, + "grad_norm": 0.49609375, + "learning_rate": 2.6830053594740668e-05, + "loss": 0.9083, + "step": 54735 + }, + { + "epoch": 0.79, + "grad_norm": 0.56640625, + "learning_rate": 2.6812990527413116e-05, + "loss": 0.9351, + "step": 54740 + }, + { + "epoch": 0.79, + "grad_norm": 0.578125, + "learning_rate": 2.6795932047530705e-05, + "loss": 1.0094, + "step": 54745 + }, + { + "epoch": 0.79, + "grad_norm": 0.625, + "learning_rate": 2.67788781561626e-05, + "loss": 0.935, + "step": 54750 + }, + { + "epoch": 0.79, + "grad_norm": 0.51953125, + "learning_rate": 2.6761828854377812e-05, + "loss": 0.8739, + "step": 54755 + }, + { + "epoch": 0.79, + "grad_norm": 0.515625, + "learning_rate": 2.674478414324504e-05, + "loss": 0.9497, + "step": 54760 + }, + { + "epoch": 0.79, + "grad_norm": 0.5625, + "learning_rate": 2.6727744023832622e-05, + "loss": 1.0655, + "step": 54765 + }, + { + "epoch": 0.79, + "grad_norm": 0.58984375, + "learning_rate": 2.6710708497208625e-05, + "loss": 1.0258, + "step": 54770 + }, + { + "epoch": 0.79, + "grad_norm": 0.66015625, + "learning_rate": 2.66936775644409e-05, + "loss": 1.1597, + "step": 54775 + }, + { + "epoch": 0.79, + "grad_norm": 0.474609375, + "learning_rate": 2.667665122659697e-05, + "loss": 0.8485, + "step": 54780 + }, + { + "epoch": 0.79, + "grad_norm": 0.453125, + "learning_rate": 2.665962948474403e-05, + "loss": 0.8748, + "step": 54785 + }, + { + "epoch": 0.79, + "grad_norm": 0.5390625, + "learning_rate": 2.6642612339949037e-05, + "loss": 1.0183, + "step": 54790 + }, + { + "epoch": 0.79, + "grad_norm": 0.58203125, + "learning_rate": 2.6625599793278676e-05, + "loss": 0.9696, + "step": 54795 + }, + { + "epoch": 0.79, + "grad_norm": 0.62109375, + "learning_rate": 2.6608591845799268e-05, + "loss": 0.9695, + "step": 54800 + }, + { + "epoch": 0.79, + "grad_norm": 0.5234375, + "learning_rate": 2.6591588498576903e-05, + "loss": 0.9984, + "step": 54805 + }, + { + "epoch": 0.79, + "grad_norm": 0.7890625, + "learning_rate": 2.6574589752677405e-05, + "loss": 0.927, + "step": 54810 + }, + { + "epoch": 0.79, + "grad_norm": 0.546875, + "learning_rate": 2.6557595609166242e-05, + "loss": 0.9817, + "step": 54815 + }, + { + "epoch": 0.79, + "grad_norm": 0.49609375, + "learning_rate": 2.65406060691086e-05, + "loss": 0.7142, + "step": 54820 + }, + { + "epoch": 0.79, + "grad_norm": 0.52734375, + "learning_rate": 2.6523621133569464e-05, + "loss": 0.8814, + "step": 54825 + }, + { + "epoch": 0.79, + "grad_norm": 0.51953125, + "learning_rate": 2.6506640803613412e-05, + "loss": 1.0073, + "step": 54830 + }, + { + "epoch": 0.79, + "grad_norm": 0.55078125, + "learning_rate": 2.6489665080304814e-05, + "loss": 0.9157, + "step": 54835 + }, + { + "epoch": 0.79, + "grad_norm": 0.59375, + "learning_rate": 2.647269396470776e-05, + "loss": 0.9138, + "step": 54840 + }, + { + "epoch": 0.79, + "grad_norm": 0.5390625, + "learning_rate": 2.645572745788597e-05, + "loss": 0.8308, + "step": 54845 + }, + { + "epoch": 0.79, + "grad_norm": 0.59375, + "learning_rate": 2.6438765560902933e-05, + "loss": 0.9832, + "step": 54850 + }, + { + "epoch": 0.79, + "grad_norm": 0.47265625, + "learning_rate": 2.642180827482188e-05, + "loss": 0.8653, + "step": 54855 + }, + { + "epoch": 0.79, + "grad_norm": 0.5, + "learning_rate": 2.6404855600705648e-05, + "loss": 0.8943, + "step": 54860 + }, + { + "epoch": 0.79, + "grad_norm": 0.56640625, + "learning_rate": 2.638790753961692e-05, + "loss": 0.835, + "step": 54865 + }, + { + "epoch": 0.79, + "grad_norm": 0.6484375, + "learning_rate": 2.6370964092617957e-05, + "loss": 0.9696, + "step": 54870 + }, + { + "epoch": 0.79, + "grad_norm": 0.62890625, + "learning_rate": 2.6354025260770843e-05, + "loss": 0.9426, + "step": 54875 + }, + { + "epoch": 0.79, + "grad_norm": 0.462890625, + "learning_rate": 2.6337091045137275e-05, + "loss": 0.9313, + "step": 54880 + }, + { + "epoch": 0.79, + "grad_norm": 0.56640625, + "learning_rate": 2.6320161446778735e-05, + "loss": 1.0948, + "step": 54885 + }, + { + "epoch": 0.79, + "grad_norm": 0.6015625, + "learning_rate": 2.630323646675642e-05, + "loss": 1.0268, + "step": 54890 + }, + { + "epoch": 0.79, + "grad_norm": 0.5390625, + "learning_rate": 2.6286316106131148e-05, + "loss": 0.7687, + "step": 54895 + }, + { + "epoch": 0.79, + "grad_norm": 0.578125, + "learning_rate": 2.6269400365963536e-05, + "loss": 0.9385, + "step": 54900 + }, + { + "epoch": 0.79, + "grad_norm": 0.55859375, + "learning_rate": 2.6252489247313915e-05, + "loss": 0.915, + "step": 54905 + }, + { + "epoch": 0.79, + "grad_norm": 0.53515625, + "learning_rate": 2.6235582751242226e-05, + "loss": 0.8307, + "step": 54910 + }, + { + "epoch": 0.79, + "grad_norm": 0.5703125, + "learning_rate": 2.621868087880822e-05, + "loss": 0.8782, + "step": 54915 + }, + { + "epoch": 0.79, + "grad_norm": 0.62890625, + "learning_rate": 2.6201783631071385e-05, + "loss": 1.0213, + "step": 54920 + }, + { + "epoch": 0.79, + "grad_norm": 0.48046875, + "learning_rate": 2.618489100909074e-05, + "loss": 0.9641, + "step": 54925 + }, + { + "epoch": 0.79, + "grad_norm": 0.59765625, + "learning_rate": 2.61680030139252e-05, + "loss": 0.8366, + "step": 54930 + }, + { + "epoch": 0.79, + "grad_norm": 0.57421875, + "learning_rate": 2.6151119646633347e-05, + "loss": 1.034, + "step": 54935 + }, + { + "epoch": 0.79, + "grad_norm": 0.5390625, + "learning_rate": 2.613424090827339e-05, + "loss": 0.9292, + "step": 54940 + }, + { + "epoch": 0.79, + "grad_norm": 0.59765625, + "learning_rate": 2.611736679990334e-05, + "loss": 0.8975, + "step": 54945 + }, + { + "epoch": 0.79, + "grad_norm": 0.55859375, + "learning_rate": 2.6100497322580908e-05, + "loss": 0.9267, + "step": 54950 + }, + { + "epoch": 0.79, + "grad_norm": 0.578125, + "learning_rate": 2.6083632477363452e-05, + "loss": 1.0463, + "step": 54955 + }, + { + "epoch": 0.79, + "grad_norm": 0.53125, + "learning_rate": 2.6066772265308083e-05, + "loss": 0.9473, + "step": 54960 + }, + { + "epoch": 0.79, + "grad_norm": 0.5703125, + "learning_rate": 2.6049916687471666e-05, + "loss": 0.8829, + "step": 54965 + }, + { + "epoch": 0.79, + "grad_norm": 0.625, + "learning_rate": 2.603306574491069e-05, + "loss": 1.0004, + "step": 54970 + }, + { + "epoch": 0.79, + "grad_norm": 0.58984375, + "learning_rate": 2.6016219438681367e-05, + "loss": 0.9185, + "step": 54975 + }, + { + "epoch": 0.79, + "grad_norm": 0.5625, + "learning_rate": 2.599937776983967e-05, + "loss": 1.0157, + "step": 54980 + }, + { + "epoch": 0.79, + "grad_norm": 0.578125, + "learning_rate": 2.5982540739441284e-05, + "loss": 1.0383, + "step": 54985 + }, + { + "epoch": 0.79, + "grad_norm": 0.578125, + "learning_rate": 2.5965708348541518e-05, + "loss": 1.072, + "step": 54990 + }, + { + "epoch": 0.79, + "grad_norm": 0.52734375, + "learning_rate": 2.5948880598195467e-05, + "loss": 1.0174, + "step": 54995 + }, + { + "epoch": 0.79, + "grad_norm": 0.5078125, + "learning_rate": 2.593205748945795e-05, + "loss": 1.1266, + "step": 55000 + }, + { + "epoch": 0.79, + "grad_norm": 0.62890625, + "learning_rate": 2.5915239023383387e-05, + "loss": 0.9785, + "step": 55005 + }, + { + "epoch": 0.79, + "grad_norm": 0.515625, + "learning_rate": 2.5898425201026022e-05, + "loss": 0.8967, + "step": 55010 + }, + { + "epoch": 0.79, + "grad_norm": 0.53515625, + "learning_rate": 2.588161602343979e-05, + "loss": 0.9996, + "step": 55015 + }, + { + "epoch": 0.79, + "grad_norm": 0.5234375, + "learning_rate": 2.5864811491678255e-05, + "loss": 0.9827, + "step": 55020 + }, + { + "epoch": 0.79, + "grad_norm": 0.6484375, + "learning_rate": 2.5848011606794797e-05, + "loss": 0.9682, + "step": 55025 + }, + { + "epoch": 0.79, + "grad_norm": 0.55859375, + "learning_rate": 2.5831216369842416e-05, + "loss": 0.803, + "step": 55030 + }, + { + "epoch": 0.79, + "grad_norm": 0.5703125, + "learning_rate": 2.581442578187384e-05, + "loss": 1.03, + "step": 55035 + }, + { + "epoch": 0.79, + "grad_norm": 0.57421875, + "learning_rate": 2.5797639843941547e-05, + "loss": 1.0129, + "step": 55040 + }, + { + "epoch": 0.79, + "grad_norm": 0.5078125, + "learning_rate": 2.578085855709773e-05, + "loss": 0.8681, + "step": 55045 + }, + { + "epoch": 0.79, + "grad_norm": 0.54296875, + "learning_rate": 2.576408192239419e-05, + "loss": 0.8666, + "step": 55050 + }, + { + "epoch": 0.79, + "grad_norm": 0.56640625, + "learning_rate": 2.5747309940882546e-05, + "loss": 0.995, + "step": 55055 + }, + { + "epoch": 0.79, + "grad_norm": 0.5, + "learning_rate": 2.5730542613614118e-05, + "loss": 0.904, + "step": 55060 + }, + { + "epoch": 0.79, + "grad_norm": 0.58203125, + "learning_rate": 2.5713779941639826e-05, + "loss": 0.9581, + "step": 55065 + }, + { + "epoch": 0.79, + "grad_norm": 0.625, + "learning_rate": 2.5697021926010413e-05, + "loss": 0.8722, + "step": 55070 + }, + { + "epoch": 0.79, + "grad_norm": 0.52734375, + "learning_rate": 2.5680268567776323e-05, + "loss": 1.021, + "step": 55075 + }, + { + "epoch": 0.79, + "grad_norm": 0.62890625, + "learning_rate": 2.566351986798764e-05, + "loss": 0.8893, + "step": 55080 + }, + { + "epoch": 0.79, + "grad_norm": 0.65234375, + "learning_rate": 2.564677582769416e-05, + "loss": 1.1019, + "step": 55085 + }, + { + "epoch": 0.79, + "grad_norm": 0.65234375, + "learning_rate": 2.563003644794546e-05, + "loss": 1.16, + "step": 55090 + }, + { + "epoch": 0.79, + "grad_norm": 0.59765625, + "learning_rate": 2.5613301729790794e-05, + "loss": 1.0489, + "step": 55095 + }, + { + "epoch": 0.79, + "grad_norm": 0.7109375, + "learning_rate": 2.559657167427908e-05, + "loss": 0.954, + "step": 55100 + }, + { + "epoch": 0.79, + "grad_norm": 0.50390625, + "learning_rate": 2.5579846282458987e-05, + "loss": 0.9067, + "step": 55105 + }, + { + "epoch": 0.79, + "grad_norm": 0.5390625, + "learning_rate": 2.5563125555378908e-05, + "loss": 0.8201, + "step": 55110 + }, + { + "epoch": 0.79, + "grad_norm": 0.62890625, + "learning_rate": 2.5546409494086876e-05, + "loss": 0.9355, + "step": 55115 + }, + { + "epoch": 0.79, + "grad_norm": 0.51953125, + "learning_rate": 2.5529698099630694e-05, + "loss": 0.951, + "step": 55120 + }, + { + "epoch": 0.79, + "grad_norm": 0.59765625, + "learning_rate": 2.551299137305787e-05, + "loss": 0.9268, + "step": 55125 + }, + { + "epoch": 0.79, + "grad_norm": 0.5546875, + "learning_rate": 2.549628931541559e-05, + "loss": 0.9385, + "step": 55130 + }, + { + "epoch": 0.79, + "grad_norm": 0.59375, + "learning_rate": 2.5479591927750713e-05, + "loss": 0.9118, + "step": 55135 + }, + { + "epoch": 0.79, + "grad_norm": 0.58203125, + "learning_rate": 2.5462899211109915e-05, + "loss": 0.9354, + "step": 55140 + }, + { + "epoch": 0.79, + "grad_norm": 0.55078125, + "learning_rate": 2.5446211166539448e-05, + "loss": 0.993, + "step": 55145 + }, + { + "epoch": 0.79, + "grad_norm": 0.57421875, + "learning_rate": 2.5429527795085384e-05, + "loss": 1.1194, + "step": 55150 + }, + { + "epoch": 0.79, + "grad_norm": 0.58203125, + "learning_rate": 2.541284909779348e-05, + "loss": 0.9855, + "step": 55155 + }, + { + "epoch": 0.79, + "grad_norm": 0.6015625, + "learning_rate": 2.5396175075709107e-05, + "loss": 1.1419, + "step": 55160 + }, + { + "epoch": 0.79, + "grad_norm": 0.56640625, + "learning_rate": 2.537950572987744e-05, + "loss": 1.0015, + "step": 55165 + }, + { + "epoch": 0.79, + "grad_norm": 0.515625, + "learning_rate": 2.536284106134338e-05, + "loss": 0.985, + "step": 55170 + }, + { + "epoch": 0.79, + "grad_norm": 0.52734375, + "learning_rate": 2.534618107115141e-05, + "loss": 0.9552, + "step": 55175 + }, + { + "epoch": 0.79, + "grad_norm": 0.6015625, + "learning_rate": 2.5329525760345875e-05, + "loss": 0.9298, + "step": 55180 + }, + { + "epoch": 0.79, + "grad_norm": 0.56640625, + "learning_rate": 2.531287512997067e-05, + "loss": 0.8846, + "step": 55185 + }, + { + "epoch": 0.79, + "grad_norm": 0.66015625, + "learning_rate": 2.529622918106954e-05, + "loss": 0.9645, + "step": 55190 + }, + { + "epoch": 0.79, + "grad_norm": 0.58203125, + "learning_rate": 2.527958791468582e-05, + "loss": 0.9765, + "step": 55195 + }, + { + "epoch": 0.79, + "grad_norm": 0.63671875, + "learning_rate": 2.5262951331862628e-05, + "loss": 1.0036, + "step": 55200 + }, + { + "epoch": 0.79, + "grad_norm": 0.55859375, + "learning_rate": 2.5246319433642794e-05, + "loss": 1.0886, + "step": 55205 + }, + { + "epoch": 0.79, + "grad_norm": 0.6328125, + "learning_rate": 2.5229692221068767e-05, + "loss": 0.9325, + "step": 55210 + }, + { + "epoch": 0.79, + "grad_norm": 0.5234375, + "learning_rate": 2.521306969518279e-05, + "loss": 1.0222, + "step": 55215 + }, + { + "epoch": 0.79, + "grad_norm": 0.5546875, + "learning_rate": 2.5196451857026805e-05, + "loss": 1.0714, + "step": 55220 + }, + { + "epoch": 0.79, + "grad_norm": 0.578125, + "learning_rate": 2.517983870764238e-05, + "loss": 0.9616, + "step": 55225 + }, + { + "epoch": 0.79, + "grad_norm": 0.59375, + "learning_rate": 2.516323024807088e-05, + "loss": 0.9596, + "step": 55230 + }, + { + "epoch": 0.79, + "grad_norm": 0.5546875, + "learning_rate": 2.5146626479353375e-05, + "loss": 0.8699, + "step": 55235 + }, + { + "epoch": 0.79, + "grad_norm": 0.578125, + "learning_rate": 2.5130027402530564e-05, + "loss": 0.8693, + "step": 55240 + }, + { + "epoch": 0.79, + "grad_norm": 0.55859375, + "learning_rate": 2.5113433018642883e-05, + "loss": 0.921, + "step": 55245 + }, + { + "epoch": 0.79, + "grad_norm": 0.66015625, + "learning_rate": 2.5096843328730503e-05, + "loss": 1.1023, + "step": 55250 + }, + { + "epoch": 0.79, + "grad_norm": 0.52734375, + "learning_rate": 2.508025833383333e-05, + "loss": 0.9598, + "step": 55255 + }, + { + "epoch": 0.79, + "grad_norm": 0.51953125, + "learning_rate": 2.5063678034990855e-05, + "loss": 0.9467, + "step": 55260 + }, + { + "epoch": 0.79, + "grad_norm": 0.57421875, + "learning_rate": 2.5047102433242385e-05, + "loss": 0.8873, + "step": 55265 + }, + { + "epoch": 0.79, + "grad_norm": 0.60546875, + "learning_rate": 2.5030531529626932e-05, + "loss": 0.9983, + "step": 55270 + }, + { + "epoch": 0.79, + "grad_norm": 0.52734375, + "learning_rate": 2.501396532518311e-05, + "loss": 0.9313, + "step": 55275 + }, + { + "epoch": 0.79, + "grad_norm": 0.53515625, + "learning_rate": 2.499740382094937e-05, + "loss": 1.0263, + "step": 55280 + }, + { + "epoch": 0.79, + "grad_norm": 0.52734375, + "learning_rate": 2.4980847017963792e-05, + "loss": 1.1104, + "step": 55285 + }, + { + "epoch": 0.79, + "grad_norm": 0.458984375, + "learning_rate": 2.496429491726413e-05, + "loss": 1.0691, + "step": 55290 + }, + { + "epoch": 0.79, + "grad_norm": 0.55078125, + "learning_rate": 2.4947747519887922e-05, + "loss": 1.0045, + "step": 55295 + }, + { + "epoch": 0.79, + "grad_norm": 0.54296875, + "learning_rate": 2.49312048268724e-05, + "loss": 0.7354, + "step": 55300 + }, + { + "epoch": 0.79, + "grad_norm": 0.6015625, + "learning_rate": 2.491466683925443e-05, + "loss": 0.9239, + "step": 55305 + }, + { + "epoch": 0.79, + "grad_norm": 0.51953125, + "learning_rate": 2.4898133558070647e-05, + "loss": 1.1663, + "step": 55310 + }, + { + "epoch": 0.79, + "grad_norm": 0.5859375, + "learning_rate": 2.4881604984357432e-05, + "loss": 0.8713, + "step": 55315 + }, + { + "epoch": 0.79, + "grad_norm": 0.53125, + "learning_rate": 2.4865081119150734e-05, + "loss": 0.8766, + "step": 55320 + }, + { + "epoch": 0.79, + "grad_norm": 0.49609375, + "learning_rate": 2.4848561963486318e-05, + "loss": 0.9103, + "step": 55325 + }, + { + "epoch": 0.79, + "grad_norm": 0.5703125, + "learning_rate": 2.483204751839966e-05, + "loss": 0.9963, + "step": 55330 + }, + { + "epoch": 0.79, + "grad_norm": 0.53515625, + "learning_rate": 2.4815537784925846e-05, + "loss": 0.9386, + "step": 55335 + }, + { + "epoch": 0.79, + "grad_norm": 0.54296875, + "learning_rate": 2.4799032764099784e-05, + "loss": 0.9169, + "step": 55340 + }, + { + "epoch": 0.79, + "grad_norm": 0.59375, + "learning_rate": 2.4782532456955955e-05, + "loss": 0.9657, + "step": 55345 + }, + { + "epoch": 0.79, + "grad_norm": 0.64453125, + "learning_rate": 2.4766036864528696e-05, + "loss": 0.8921, + "step": 55350 + }, + { + "epoch": 0.79, + "grad_norm": 0.484375, + "learning_rate": 2.4749545987851897e-05, + "loss": 0.8551, + "step": 55355 + }, + { + "epoch": 0.79, + "grad_norm": 0.5859375, + "learning_rate": 2.473305982795926e-05, + "loss": 0.9152, + "step": 55360 + }, + { + "epoch": 0.79, + "grad_norm": 0.6015625, + "learning_rate": 2.4716578385884182e-05, + "loss": 1.1017, + "step": 55365 + }, + { + "epoch": 0.79, + "grad_norm": 0.51953125, + "learning_rate": 2.4700101662659692e-05, + "loss": 1.047, + "step": 55370 + }, + { + "epoch": 0.79, + "grad_norm": 0.56640625, + "learning_rate": 2.4683629659318575e-05, + "loss": 0.8259, + "step": 55375 + }, + { + "epoch": 0.79, + "grad_norm": 0.53125, + "learning_rate": 2.466716237689337e-05, + "loss": 0.8812, + "step": 55380 + }, + { + "epoch": 0.79, + "grad_norm": 0.5546875, + "learning_rate": 2.465069981641619e-05, + "loss": 0.9734, + "step": 55385 + }, + { + "epoch": 0.79, + "grad_norm": 0.56640625, + "learning_rate": 2.463424197891898e-05, + "loss": 0.9448, + "step": 55390 + }, + { + "epoch": 0.79, + "grad_norm": 0.609375, + "learning_rate": 2.4617788865433332e-05, + "loss": 1.0034, + "step": 55395 + }, + { + "epoch": 0.79, + "grad_norm": 0.546875, + "learning_rate": 2.4601340476990498e-05, + "loss": 0.9685, + "step": 55400 + }, + { + "epoch": 0.79, + "grad_norm": 0.52734375, + "learning_rate": 2.4584896814621518e-05, + "loss": 0.979, + "step": 55405 + }, + { + "epoch": 0.79, + "grad_norm": 0.609375, + "learning_rate": 2.456845787935712e-05, + "loss": 0.9138, + "step": 55410 + }, + { + "epoch": 0.79, + "grad_norm": 0.5234375, + "learning_rate": 2.4552023672227666e-05, + "loss": 1.0635, + "step": 55415 + }, + { + "epoch": 0.79, + "grad_norm": 0.55078125, + "learning_rate": 2.453559419426329e-05, + "loss": 0.9942, + "step": 55420 + }, + { + "epoch": 0.8, + "grad_norm": 0.48828125, + "learning_rate": 2.451916944649385e-05, + "loss": 0.8659, + "step": 55425 + }, + { + "epoch": 0.8, + "grad_norm": 0.5390625, + "learning_rate": 2.45027494299488e-05, + "loss": 0.8897, + "step": 55430 + }, + { + "epoch": 0.8, + "grad_norm": 0.51171875, + "learning_rate": 2.4486334145657408e-05, + "loss": 1.0097, + "step": 55435 + }, + { + "epoch": 0.8, + "grad_norm": 0.60546875, + "learning_rate": 2.4469923594648626e-05, + "loss": 0.8444, + "step": 55440 + }, + { + "epoch": 0.8, + "grad_norm": 0.69140625, + "learning_rate": 2.445351777795104e-05, + "loss": 0.9892, + "step": 55445 + }, + { + "epoch": 0.8, + "grad_norm": 0.59765625, + "learning_rate": 2.4437116696592988e-05, + "loss": 0.8611, + "step": 55450 + }, + { + "epoch": 0.8, + "grad_norm": 0.62109375, + "learning_rate": 2.4420720351602512e-05, + "loss": 0.912, + "step": 55455 + }, + { + "epoch": 0.8, + "grad_norm": 0.51171875, + "learning_rate": 2.4404328744007387e-05, + "loss": 0.8703, + "step": 55460 + }, + { + "epoch": 0.8, + "grad_norm": 0.578125, + "learning_rate": 2.4387941874835008e-05, + "loss": 0.9272, + "step": 55465 + }, + { + "epoch": 0.8, + "grad_norm": 0.5546875, + "learning_rate": 2.437155974511255e-05, + "loss": 0.8864, + "step": 55470 + }, + { + "epoch": 0.8, + "grad_norm": 0.52734375, + "learning_rate": 2.435518235586688e-05, + "loss": 0.9701, + "step": 55475 + }, + { + "epoch": 0.8, + "grad_norm": 0.484375, + "learning_rate": 2.433880970812451e-05, + "loss": 0.9989, + "step": 55480 + }, + { + "epoch": 0.8, + "grad_norm": 0.578125, + "learning_rate": 2.432244180291172e-05, + "loss": 0.9397, + "step": 55485 + }, + { + "epoch": 0.8, + "grad_norm": 0.6015625, + "learning_rate": 2.4306078641254493e-05, + "loss": 0.9598, + "step": 55490 + }, + { + "epoch": 0.8, + "grad_norm": 0.5625, + "learning_rate": 2.428972022417847e-05, + "loss": 1.0931, + "step": 55495 + }, + { + "epoch": 0.8, + "grad_norm": 0.67578125, + "learning_rate": 2.4273366552708975e-05, + "loss": 1.1147, + "step": 55500 + }, + { + "epoch": 0.8, + "grad_norm": 0.609375, + "learning_rate": 2.425701762787115e-05, + "loss": 0.9353, + "step": 55505 + }, + { + "epoch": 0.8, + "grad_norm": 0.57421875, + "learning_rate": 2.4240673450689676e-05, + "loss": 0.9107, + "step": 55510 + }, + { + "epoch": 0.8, + "grad_norm": 0.53125, + "learning_rate": 2.422433402218909e-05, + "loss": 0.9921, + "step": 55515 + }, + { + "epoch": 0.8, + "grad_norm": 0.56640625, + "learning_rate": 2.4207999343393574e-05, + "loss": 0.9116, + "step": 55520 + }, + { + "epoch": 0.8, + "grad_norm": 0.54296875, + "learning_rate": 2.4191669415326945e-05, + "loss": 0.9435, + "step": 55525 + }, + { + "epoch": 0.8, + "grad_norm": 0.58203125, + "learning_rate": 2.4175344239012822e-05, + "loss": 0.9898, + "step": 55530 + }, + { + "epoch": 0.8, + "grad_norm": 0.671875, + "learning_rate": 2.4159023815474502e-05, + "loss": 0.932, + "step": 55535 + }, + { + "epoch": 0.8, + "grad_norm": 0.5078125, + "learning_rate": 2.4142708145734916e-05, + "loss": 0.8182, + "step": 55540 + }, + { + "epoch": 0.8, + "grad_norm": 0.546875, + "learning_rate": 2.4126397230816778e-05, + "loss": 0.8407, + "step": 55545 + }, + { + "epoch": 0.8, + "grad_norm": 0.5546875, + "learning_rate": 2.41100910717425e-05, + "loss": 1.0069, + "step": 55550 + }, + { + "epoch": 0.8, + "grad_norm": 0.515625, + "learning_rate": 2.409378966953415e-05, + "loss": 0.9041, + "step": 55555 + }, + { + "epoch": 0.8, + "grad_norm": 0.58984375, + "learning_rate": 2.4077493025213483e-05, + "loss": 0.893, + "step": 55560 + }, + { + "epoch": 0.8, + "grad_norm": 0.578125, + "learning_rate": 2.4061201139802024e-05, + "loss": 1.0359, + "step": 55565 + }, + { + "epoch": 0.8, + "grad_norm": 0.48046875, + "learning_rate": 2.4044914014320995e-05, + "loss": 0.8965, + "step": 55570 + }, + { + "epoch": 0.8, + "grad_norm": 0.51953125, + "learning_rate": 2.4028631649791233e-05, + "loss": 0.8205, + "step": 55575 + }, + { + "epoch": 0.8, + "grad_norm": 0.63671875, + "learning_rate": 2.4012354047233354e-05, + "loss": 0.8566, + "step": 55580 + }, + { + "epoch": 0.8, + "grad_norm": 0.5859375, + "learning_rate": 2.3996081207667698e-05, + "loss": 0.8672, + "step": 55585 + }, + { + "epoch": 0.8, + "grad_norm": 0.57421875, + "learning_rate": 2.3979813132114215e-05, + "loss": 0.9229, + "step": 55590 + }, + { + "epoch": 0.8, + "grad_norm": 0.6015625, + "learning_rate": 2.3963549821592613e-05, + "loss": 1.1278, + "step": 55595 + }, + { + "epoch": 0.8, + "grad_norm": 0.69921875, + "learning_rate": 2.3947291277122362e-05, + "loss": 1.1218, + "step": 55600 + }, + { + "epoch": 0.8, + "grad_norm": 0.62109375, + "learning_rate": 2.3931037499722466e-05, + "loss": 1.045, + "step": 55605 + }, + { + "epoch": 0.8, + "grad_norm": 0.4609375, + "learning_rate": 2.3914788490411765e-05, + "loss": 0.9761, + "step": 55610 + }, + { + "epoch": 0.8, + "grad_norm": 0.625, + "learning_rate": 2.3898544250208808e-05, + "loss": 0.9598, + "step": 55615 + }, + { + "epoch": 0.8, + "grad_norm": 0.61328125, + "learning_rate": 2.3882304780131738e-05, + "loss": 0.9488, + "step": 55620 + }, + { + "epoch": 0.8, + "grad_norm": 0.58203125, + "learning_rate": 2.38660700811985e-05, + "loss": 0.888, + "step": 55625 + }, + { + "epoch": 0.8, + "grad_norm": 0.625, + "learning_rate": 2.3849840154426716e-05, + "loss": 1.1086, + "step": 55630 + }, + { + "epoch": 0.8, + "grad_norm": 0.55859375, + "learning_rate": 2.3833615000833666e-05, + "loss": 0.9319, + "step": 55635 + }, + { + "epoch": 0.8, + "grad_norm": 0.5859375, + "learning_rate": 2.381739462143636e-05, + "loss": 0.9932, + "step": 55640 + }, + { + "epoch": 0.8, + "grad_norm": 0.5546875, + "learning_rate": 2.380117901725156e-05, + "loss": 0.7757, + "step": 55645 + }, + { + "epoch": 0.8, + "grad_norm": 0.57421875, + "learning_rate": 2.378496818929561e-05, + "loss": 1.0451, + "step": 55650 + }, + { + "epoch": 0.8, + "grad_norm": 0.6171875, + "learning_rate": 2.3768762138584688e-05, + "loss": 0.9445, + "step": 55655 + }, + { + "epoch": 0.8, + "grad_norm": 0.51953125, + "learning_rate": 2.3752560866134544e-05, + "loss": 0.8609, + "step": 55660 + }, + { + "epoch": 0.8, + "grad_norm": 0.53515625, + "learning_rate": 2.3736364372960763e-05, + "loss": 0.9798, + "step": 55665 + }, + { + "epoch": 0.8, + "grad_norm": 0.765625, + "learning_rate": 2.3720172660078478e-05, + "loss": 1.0855, + "step": 55670 + }, + { + "epoch": 0.8, + "grad_norm": 0.63671875, + "learning_rate": 2.370398572850264e-05, + "loss": 1.0424, + "step": 55675 + }, + { + "epoch": 0.8, + "grad_norm": 0.474609375, + "learning_rate": 2.3687803579247915e-05, + "loss": 0.9406, + "step": 55680 + }, + { + "epoch": 0.8, + "grad_norm": 0.5078125, + "learning_rate": 2.3671626213328534e-05, + "loss": 1.0556, + "step": 55685 + }, + { + "epoch": 0.8, + "grad_norm": 0.5390625, + "learning_rate": 2.365545363175856e-05, + "loss": 1.0199, + "step": 55690 + }, + { + "epoch": 0.8, + "grad_norm": 0.6484375, + "learning_rate": 2.363928583555173e-05, + "loss": 0.9645, + "step": 55695 + }, + { + "epoch": 0.8, + "grad_norm": 0.6015625, + "learning_rate": 2.36231228257214e-05, + "loss": 0.9919, + "step": 55700 + }, + { + "epoch": 0.8, + "grad_norm": 0.55859375, + "learning_rate": 2.3606964603280746e-05, + "loss": 0.9985, + "step": 55705 + }, + { + "epoch": 0.8, + "grad_norm": 0.6484375, + "learning_rate": 2.3590811169242554e-05, + "loss": 0.9128, + "step": 55710 + }, + { + "epoch": 0.8, + "grad_norm": 0.51953125, + "learning_rate": 2.3574662524619318e-05, + "loss": 1.0251, + "step": 55715 + }, + { + "epoch": 0.8, + "grad_norm": 0.546875, + "learning_rate": 2.3558518670423268e-05, + "loss": 0.9368, + "step": 55720 + }, + { + "epoch": 0.8, + "grad_norm": 0.5234375, + "learning_rate": 2.3542379607666365e-05, + "loss": 1.0604, + "step": 55725 + }, + { + "epoch": 0.8, + "grad_norm": 0.4921875, + "learning_rate": 2.3526245337360153e-05, + "loss": 1.0838, + "step": 55730 + }, + { + "epoch": 0.8, + "grad_norm": 0.56640625, + "learning_rate": 2.351011586051598e-05, + "loss": 0.9904, + "step": 55735 + }, + { + "epoch": 0.8, + "grad_norm": 0.546875, + "learning_rate": 2.349399117814488e-05, + "loss": 1.0388, + "step": 55740 + }, + { + "epoch": 0.8, + "grad_norm": 0.60546875, + "learning_rate": 2.3477871291257525e-05, + "loss": 1.089, + "step": 55745 + }, + { + "epoch": 0.8, + "grad_norm": 0.51171875, + "learning_rate": 2.346175620086435e-05, + "loss": 0.7987, + "step": 55750 + }, + { + "epoch": 0.8, + "grad_norm": 0.6015625, + "learning_rate": 2.3445645907975488e-05, + "loss": 1.0289, + "step": 55755 + }, + { + "epoch": 0.8, + "grad_norm": 0.625, + "learning_rate": 2.3429540413600736e-05, + "loss": 0.9907, + "step": 55760 + }, + { + "epoch": 0.8, + "grad_norm": 0.57421875, + "learning_rate": 2.3413439718749562e-05, + "loss": 0.9355, + "step": 55765 + }, + { + "epoch": 0.8, + "grad_norm": 0.5546875, + "learning_rate": 2.3397343824431216e-05, + "loss": 0.9169, + "step": 55770 + }, + { + "epoch": 0.8, + "grad_norm": 0.53515625, + "learning_rate": 2.3381252731654633e-05, + "loss": 0.97, + "step": 55775 + }, + { + "epoch": 0.8, + "grad_norm": 0.474609375, + "learning_rate": 2.3365166441428366e-05, + "loss": 0.9656, + "step": 55780 + }, + { + "epoch": 0.8, + "grad_norm": 0.546875, + "learning_rate": 2.3349084954760735e-05, + "loss": 1.0667, + "step": 55785 + }, + { + "epoch": 0.8, + "grad_norm": 0.5390625, + "learning_rate": 2.3333008272659784e-05, + "loss": 0.8738, + "step": 55790 + }, + { + "epoch": 0.8, + "grad_norm": 0.54296875, + "learning_rate": 2.331693639613317e-05, + "loss": 1.0558, + "step": 55795 + }, + { + "epoch": 0.8, + "grad_norm": 0.59375, + "learning_rate": 2.3300869326188313e-05, + "loss": 0.9356, + "step": 55800 + }, + { + "epoch": 0.8, + "grad_norm": 0.53125, + "learning_rate": 2.328480706383236e-05, + "loss": 0.8565, + "step": 55805 + }, + { + "epoch": 0.8, + "grad_norm": 0.52734375, + "learning_rate": 2.3268749610072062e-05, + "loss": 0.884, + "step": 55810 + }, + { + "epoch": 0.8, + "grad_norm": 0.5625, + "learning_rate": 2.325269696591389e-05, + "loss": 0.8756, + "step": 55815 + }, + { + "epoch": 0.8, + "grad_norm": 0.48046875, + "learning_rate": 2.323664913236412e-05, + "loss": 0.7658, + "step": 55820 + }, + { + "epoch": 0.8, + "grad_norm": 0.5703125, + "learning_rate": 2.322060611042858e-05, + "loss": 0.882, + "step": 55825 + }, + { + "epoch": 0.8, + "grad_norm": 0.50390625, + "learning_rate": 2.3204567901112895e-05, + "loss": 0.9241, + "step": 55830 + }, + { + "epoch": 0.8, + "grad_norm": 0.65234375, + "learning_rate": 2.3188534505422377e-05, + "loss": 1.0328, + "step": 55835 + }, + { + "epoch": 0.8, + "grad_norm": 0.54296875, + "learning_rate": 2.3172505924361976e-05, + "loss": 0.8019, + "step": 55840 + }, + { + "epoch": 0.8, + "grad_norm": 0.4921875, + "learning_rate": 2.31564821589364e-05, + "loss": 0.9576, + "step": 55845 + }, + { + "epoch": 0.8, + "grad_norm": 0.54296875, + "learning_rate": 2.3140463210150065e-05, + "loss": 1.0144, + "step": 55850 + }, + { + "epoch": 0.8, + "grad_norm": 0.55859375, + "learning_rate": 2.3124449079007004e-05, + "loss": 0.8785, + "step": 55855 + }, + { + "epoch": 0.8, + "grad_norm": 0.59765625, + "learning_rate": 2.3108439766511038e-05, + "loss": 1.0206, + "step": 55860 + }, + { + "epoch": 0.8, + "grad_norm": 0.54296875, + "learning_rate": 2.3092435273665657e-05, + "loss": 0.9757, + "step": 55865 + }, + { + "epoch": 0.8, + "grad_norm": 0.51953125, + "learning_rate": 2.3076435601474024e-05, + "loss": 0.9286, + "step": 55870 + }, + { + "epoch": 0.8, + "grad_norm": 0.49609375, + "learning_rate": 2.3060440750938994e-05, + "loss": 0.9818, + "step": 55875 + }, + { + "epoch": 0.8, + "grad_norm": 0.61328125, + "learning_rate": 2.304445072306316e-05, + "loss": 0.8368, + "step": 55880 + }, + { + "epoch": 0.8, + "grad_norm": 0.54296875, + "learning_rate": 2.3028465518848828e-05, + "loss": 0.9012, + "step": 55885 + }, + { + "epoch": 0.8, + "grad_norm": 0.54296875, + "learning_rate": 2.301248513929791e-05, + "loss": 1.0057, + "step": 55890 + }, + { + "epoch": 0.8, + "grad_norm": 0.5859375, + "learning_rate": 2.2996509585412117e-05, + "loss": 0.8797, + "step": 55895 + }, + { + "epoch": 0.8, + "grad_norm": 0.484375, + "learning_rate": 2.2980538858192825e-05, + "loss": 0.9028, + "step": 55900 + }, + { + "epoch": 0.8, + "grad_norm": 0.56640625, + "learning_rate": 2.296457295864104e-05, + "loss": 0.9417, + "step": 55905 + }, + { + "epoch": 0.8, + "grad_norm": 0.58984375, + "learning_rate": 2.2948611887757566e-05, + "loss": 0.9962, + "step": 55910 + }, + { + "epoch": 0.8, + "grad_norm": 0.5625, + "learning_rate": 2.2932655646542876e-05, + "loss": 0.9032, + "step": 55915 + }, + { + "epoch": 0.8, + "grad_norm": 0.6484375, + "learning_rate": 2.29167042359971e-05, + "loss": 1.0349, + "step": 55920 + }, + { + "epoch": 0.8, + "grad_norm": 0.5, + "learning_rate": 2.2900757657120075e-05, + "loss": 0.9714, + "step": 55925 + }, + { + "epoch": 0.8, + "grad_norm": 0.6953125, + "learning_rate": 2.288481591091136e-05, + "loss": 1.1025, + "step": 55930 + }, + { + "epoch": 0.8, + "grad_norm": 0.5078125, + "learning_rate": 2.2868878998370247e-05, + "loss": 0.916, + "step": 55935 + }, + { + "epoch": 0.8, + "grad_norm": 0.578125, + "learning_rate": 2.2852946920495612e-05, + "loss": 0.92, + "step": 55940 + }, + { + "epoch": 0.8, + "grad_norm": 0.6015625, + "learning_rate": 2.2837019678286143e-05, + "loss": 0.9217, + "step": 55945 + }, + { + "epoch": 0.8, + "grad_norm": 0.63671875, + "learning_rate": 2.2821097272740143e-05, + "loss": 0.9212, + "step": 55950 + }, + { + "epoch": 0.8, + "grad_norm": 0.55078125, + "learning_rate": 2.2805179704855674e-05, + "loss": 0.8215, + "step": 55955 + }, + { + "epoch": 0.8, + "grad_norm": 0.5546875, + "learning_rate": 2.2789266975630474e-05, + "loss": 0.967, + "step": 55960 + }, + { + "epoch": 0.8, + "grad_norm": 0.62890625, + "learning_rate": 2.2773359086061928e-05, + "loss": 0.8872, + "step": 55965 + }, + { + "epoch": 0.8, + "grad_norm": 0.57421875, + "learning_rate": 2.275745603714723e-05, + "loss": 0.9856, + "step": 55970 + }, + { + "epoch": 0.8, + "grad_norm": 0.52734375, + "learning_rate": 2.274155782988312e-05, + "loss": 0.8701, + "step": 55975 + }, + { + "epoch": 0.8, + "grad_norm": 0.54296875, + "learning_rate": 2.2725664465266182e-05, + "loss": 0.9997, + "step": 55980 + }, + { + "epoch": 0.8, + "grad_norm": 0.55859375, + "learning_rate": 2.2709775944292576e-05, + "loss": 1.0167, + "step": 55985 + }, + { + "epoch": 0.8, + "grad_norm": 0.640625, + "learning_rate": 2.269389226795825e-05, + "loss": 0.924, + "step": 55990 + }, + { + "epoch": 0.8, + "grad_norm": 0.515625, + "learning_rate": 2.2678013437258815e-05, + "loss": 1.0758, + "step": 55995 + }, + { + "epoch": 0.8, + "grad_norm": 0.55859375, + "learning_rate": 2.2662139453189547e-05, + "loss": 0.9348, + "step": 56000 + }, + { + "epoch": 0.8, + "grad_norm": 0.55078125, + "learning_rate": 2.2646270316745445e-05, + "loss": 0.8797, + "step": 56005 + }, + { + "epoch": 0.8, + "grad_norm": 0.53515625, + "learning_rate": 2.2630406028921258e-05, + "loss": 0.9258, + "step": 56010 + }, + { + "epoch": 0.8, + "grad_norm": 0.90625, + "learning_rate": 2.2614546590711295e-05, + "loss": 1.0569, + "step": 56015 + }, + { + "epoch": 0.8, + "grad_norm": 0.54296875, + "learning_rate": 2.259869200310972e-05, + "loss": 1.0872, + "step": 56020 + }, + { + "epoch": 0.8, + "grad_norm": 0.53125, + "learning_rate": 2.258284226711026e-05, + "loss": 0.9533, + "step": 56025 + }, + { + "epoch": 0.8, + "grad_norm": 0.58203125, + "learning_rate": 2.2566997383706445e-05, + "loss": 0.9205, + "step": 56030 + }, + { + "epoch": 0.8, + "grad_norm": 0.51953125, + "learning_rate": 2.2551157353891393e-05, + "loss": 1.0329, + "step": 56035 + }, + { + "epoch": 0.8, + "grad_norm": 0.58203125, + "learning_rate": 2.2535322178658003e-05, + "loss": 0.7919, + "step": 56040 + }, + { + "epoch": 0.8, + "grad_norm": 0.52734375, + "learning_rate": 2.2519491858998875e-05, + "loss": 0.9221, + "step": 56045 + }, + { + "epoch": 0.8, + "grad_norm": 0.53515625, + "learning_rate": 2.2503666395906208e-05, + "loss": 0.9291, + "step": 56050 + }, + { + "epoch": 0.8, + "grad_norm": 0.52734375, + "learning_rate": 2.2487845790371998e-05, + "loss": 0.9941, + "step": 56055 + }, + { + "epoch": 0.8, + "grad_norm": 0.59765625, + "learning_rate": 2.2472030043387914e-05, + "loss": 0.9139, + "step": 56060 + }, + { + "epoch": 0.8, + "grad_norm": 0.490234375, + "learning_rate": 2.2456219155945256e-05, + "loss": 0.9235, + "step": 56065 + }, + { + "epoch": 0.8, + "grad_norm": 0.6015625, + "learning_rate": 2.244041312903511e-05, + "loss": 0.9313, + "step": 56070 + }, + { + "epoch": 0.8, + "grad_norm": 0.59765625, + "learning_rate": 2.2424611963648244e-05, + "loss": 0.9421, + "step": 56075 + }, + { + "epoch": 0.8, + "grad_norm": 0.5625, + "learning_rate": 2.2408815660774995e-05, + "loss": 0.9214, + "step": 56080 + }, + { + "epoch": 0.8, + "grad_norm": 0.52734375, + "learning_rate": 2.2393024221405555e-05, + "loss": 0.999, + "step": 56085 + }, + { + "epoch": 0.8, + "grad_norm": 0.5546875, + "learning_rate": 2.237723764652977e-05, + "loss": 0.8612, + "step": 56090 + }, + { + "epoch": 0.8, + "grad_norm": 0.56640625, + "learning_rate": 2.236145593713711e-05, + "loss": 0.9955, + "step": 56095 + }, + { + "epoch": 0.8, + "grad_norm": 0.455078125, + "learning_rate": 2.234567909421681e-05, + "loss": 0.8391, + "step": 56100 + }, + { + "epoch": 0.8, + "grad_norm": 0.546875, + "learning_rate": 2.2329907118757807e-05, + "loss": 0.9794, + "step": 56105 + }, + { + "epoch": 0.8, + "grad_norm": 0.55859375, + "learning_rate": 2.2314140011748662e-05, + "loss": 1.0065, + "step": 56110 + }, + { + "epoch": 0.8, + "grad_norm": 0.59375, + "learning_rate": 2.2298377774177702e-05, + "loss": 1.0236, + "step": 56115 + }, + { + "epoch": 0.81, + "grad_norm": 0.486328125, + "learning_rate": 2.228262040703294e-05, + "loss": 1.0006, + "step": 56120 + }, + { + "epoch": 0.81, + "grad_norm": 0.546875, + "learning_rate": 2.2266867911302048e-05, + "loss": 0.7434, + "step": 56125 + }, + { + "epoch": 0.81, + "grad_norm": 0.5234375, + "learning_rate": 2.2251120287972384e-05, + "loss": 0.8261, + "step": 56130 + }, + { + "epoch": 0.81, + "grad_norm": 0.5546875, + "learning_rate": 2.2235377538031033e-05, + "loss": 0.8794, + "step": 56135 + }, + { + "epoch": 0.81, + "grad_norm": 0.59375, + "learning_rate": 2.221963966246482e-05, + "loss": 0.9186, + "step": 56140 + }, + { + "epoch": 0.81, + "grad_norm": 0.58203125, + "learning_rate": 2.2203906662260156e-05, + "loss": 0.6777, + "step": 56145 + }, + { + "epoch": 0.81, + "grad_norm": 0.52734375, + "learning_rate": 2.2188178538403213e-05, + "loss": 0.9449, + "step": 56150 + }, + { + "epoch": 0.81, + "grad_norm": 0.52734375, + "learning_rate": 2.21724552918799e-05, + "loss": 0.9514, + "step": 56155 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.2156736923675693e-05, + "loss": 0.8412, + "step": 56160 + }, + { + "epoch": 0.81, + "grad_norm": 0.48828125, + "learning_rate": 2.2141023434775866e-05, + "loss": 0.9219, + "step": 56165 + }, + { + "epoch": 0.81, + "grad_norm": 0.5390625, + "learning_rate": 2.21253148261654e-05, + "loss": 0.8796, + "step": 56170 + }, + { + "epoch": 0.81, + "grad_norm": 0.498046875, + "learning_rate": 2.2109611098828863e-05, + "loss": 0.9103, + "step": 56175 + }, + { + "epoch": 0.81, + "grad_norm": 0.48828125, + "learning_rate": 2.209391225375064e-05, + "loss": 0.8296, + "step": 56180 + }, + { + "epoch": 0.81, + "grad_norm": 0.609375, + "learning_rate": 2.207821829191472e-05, + "loss": 1.0703, + "step": 56185 + }, + { + "epoch": 0.81, + "grad_norm": 0.5390625, + "learning_rate": 2.2062529214304804e-05, + "loss": 1.0274, + "step": 56190 + }, + { + "epoch": 0.81, + "grad_norm": 0.5546875, + "learning_rate": 2.2046845021904316e-05, + "loss": 1.0084, + "step": 56195 + }, + { + "epoch": 0.81, + "grad_norm": 0.478515625, + "learning_rate": 2.203116571569639e-05, + "loss": 0.9722, + "step": 56200 + }, + { + "epoch": 0.81, + "grad_norm": 0.5546875, + "learning_rate": 2.201549129666377e-05, + "loss": 0.8741, + "step": 56205 + }, + { + "epoch": 0.81, + "grad_norm": 0.5078125, + "learning_rate": 2.1999821765788965e-05, + "loss": 1.1016, + "step": 56210 + }, + { + "epoch": 0.81, + "grad_norm": 0.55078125, + "learning_rate": 2.1984157124054207e-05, + "loss": 0.8361, + "step": 56215 + }, + { + "epoch": 0.81, + "grad_norm": 0.5703125, + "learning_rate": 2.1968497372441288e-05, + "loss": 0.8822, + "step": 56220 + }, + { + "epoch": 0.81, + "grad_norm": 0.4765625, + "learning_rate": 2.195284251193184e-05, + "loss": 0.9323, + "step": 56225 + }, + { + "epoch": 0.81, + "grad_norm": 0.59765625, + "learning_rate": 2.1937192543507136e-05, + "loss": 0.9843, + "step": 56230 + }, + { + "epoch": 0.81, + "grad_norm": 0.5078125, + "learning_rate": 2.1921547468148105e-05, + "loss": 0.8911, + "step": 56235 + }, + { + "epoch": 0.81, + "grad_norm": 0.6328125, + "learning_rate": 2.190590728683537e-05, + "loss": 1.0064, + "step": 56240 + }, + { + "epoch": 0.81, + "grad_norm": 0.57421875, + "learning_rate": 2.1890272000549317e-05, + "loss": 1.0378, + "step": 56245 + }, + { + "epoch": 0.81, + "grad_norm": 0.5859375, + "learning_rate": 2.1874641610270008e-05, + "loss": 0.9608, + "step": 56250 + }, + { + "epoch": 0.81, + "grad_norm": 0.484375, + "learning_rate": 2.1859016116977106e-05, + "loss": 0.8337, + "step": 56255 + }, + { + "epoch": 0.81, + "grad_norm": 0.515625, + "learning_rate": 2.1843395521650056e-05, + "loss": 0.9049, + "step": 56260 + }, + { + "epoch": 0.81, + "grad_norm": 0.546875, + "learning_rate": 2.1827779825268036e-05, + "loss": 0.9015, + "step": 56265 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.181216902880977e-05, + "loss": 0.9828, + "step": 56270 + }, + { + "epoch": 0.81, + "grad_norm": 0.6171875, + "learning_rate": 2.179656313325379e-05, + "loss": 0.9996, + "step": 56275 + }, + { + "epoch": 0.81, + "grad_norm": 0.51953125, + "learning_rate": 2.178096213957834e-05, + "loss": 1.0021, + "step": 56280 + }, + { + "epoch": 0.81, + "grad_norm": 0.5703125, + "learning_rate": 2.176536604876126e-05, + "loss": 1.0434, + "step": 56285 + }, + { + "epoch": 0.81, + "grad_norm": 0.47265625, + "learning_rate": 2.1749774861780115e-05, + "loss": 1.0075, + "step": 56290 + }, + { + "epoch": 0.81, + "grad_norm": 0.515625, + "learning_rate": 2.1734188579612225e-05, + "loss": 0.8832, + "step": 56295 + }, + { + "epoch": 0.81, + "grad_norm": 0.4765625, + "learning_rate": 2.171860720323451e-05, + "loss": 0.857, + "step": 56300 + }, + { + "epoch": 0.81, + "grad_norm": 0.515625, + "learning_rate": 2.1703030733623642e-05, + "loss": 0.8859, + "step": 56305 + }, + { + "epoch": 0.81, + "grad_norm": 0.515625, + "learning_rate": 2.1687459171756008e-05, + "loss": 0.8828, + "step": 56310 + }, + { + "epoch": 0.81, + "grad_norm": 0.53125, + "learning_rate": 2.1671892518607607e-05, + "loss": 0.9719, + "step": 56315 + }, + { + "epoch": 0.81, + "grad_norm": 0.55859375, + "learning_rate": 2.1656330775154175e-05, + "loss": 0.9589, + "step": 56320 + }, + { + "epoch": 0.81, + "grad_norm": 0.625, + "learning_rate": 2.1640773942371195e-05, + "loss": 1.0611, + "step": 56325 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.1625222021233714e-05, + "loss": 0.9626, + "step": 56330 + }, + { + "epoch": 0.81, + "grad_norm": 0.5703125, + "learning_rate": 2.1609675012716613e-05, + "loss": 0.9591, + "step": 56335 + }, + { + "epoch": 0.81, + "grad_norm": 0.53125, + "learning_rate": 2.159413291779433e-05, + "loss": 0.918, + "step": 56340 + }, + { + "epoch": 0.81, + "grad_norm": 0.6484375, + "learning_rate": 2.157859573744112e-05, + "loss": 1.0512, + "step": 56345 + }, + { + "epoch": 0.81, + "grad_norm": 0.609375, + "learning_rate": 2.1563063472630818e-05, + "loss": 1.0652, + "step": 56350 + }, + { + "epoch": 0.81, + "grad_norm": 0.5, + "learning_rate": 2.1547536124337032e-05, + "loss": 0.9323, + "step": 56355 + }, + { + "epoch": 0.81, + "grad_norm": 0.5546875, + "learning_rate": 2.153201369353306e-05, + "loss": 0.9307, + "step": 56360 + }, + { + "epoch": 0.81, + "grad_norm": 0.53125, + "learning_rate": 2.15164961811918e-05, + "loss": 0.9425, + "step": 56365 + }, + { + "epoch": 0.81, + "grad_norm": 0.58203125, + "learning_rate": 2.150098358828595e-05, + "loss": 0.9132, + "step": 56370 + }, + { + "epoch": 0.81, + "grad_norm": 0.58203125, + "learning_rate": 2.148547591578788e-05, + "loss": 0.8788, + "step": 56375 + }, + { + "epoch": 0.81, + "grad_norm": 0.52734375, + "learning_rate": 2.1469973164669567e-05, + "loss": 0.8095, + "step": 56380 + }, + { + "epoch": 0.81, + "grad_norm": 0.54296875, + "learning_rate": 2.1454475335902778e-05, + "loss": 1.05, + "step": 56385 + }, + { + "epoch": 0.81, + "grad_norm": 0.6015625, + "learning_rate": 2.1438982430458986e-05, + "loss": 0.9308, + "step": 56390 + }, + { + "epoch": 0.81, + "grad_norm": 0.58984375, + "learning_rate": 2.14234944493092e-05, + "loss": 1.1513, + "step": 56395 + }, + { + "epoch": 0.81, + "grad_norm": 0.703125, + "learning_rate": 2.1408011393424265e-05, + "loss": 1.1842, + "step": 56400 + }, + { + "epoch": 0.81, + "grad_norm": 0.494140625, + "learning_rate": 2.1392533263774716e-05, + "loss": 1.0473, + "step": 56405 + }, + { + "epoch": 0.81, + "grad_norm": 0.6328125, + "learning_rate": 2.1377060061330677e-05, + "loss": 1.0914, + "step": 56410 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.1361591787062064e-05, + "loss": 0.9997, + "step": 56415 + }, + { + "epoch": 0.81, + "grad_norm": 0.5, + "learning_rate": 2.1346128441938463e-05, + "loss": 0.8518, + "step": 56420 + }, + { + "epoch": 0.81, + "grad_norm": 0.62890625, + "learning_rate": 2.133067002692908e-05, + "loss": 0.9078, + "step": 56425 + }, + { + "epoch": 0.81, + "grad_norm": 0.6328125, + "learning_rate": 2.13152165430029e-05, + "loss": 1.0808, + "step": 56430 + }, + { + "epoch": 0.81, + "grad_norm": 0.56640625, + "learning_rate": 2.129976799112858e-05, + "loss": 0.9562, + "step": 56435 + }, + { + "epoch": 0.81, + "grad_norm": 0.68359375, + "learning_rate": 2.1284324372274454e-05, + "loss": 1.0333, + "step": 56440 + }, + { + "epoch": 0.81, + "grad_norm": 0.5546875, + "learning_rate": 2.1268885687408478e-05, + "loss": 0.9312, + "step": 56445 + }, + { + "epoch": 0.81, + "grad_norm": 0.4765625, + "learning_rate": 2.1253451937498426e-05, + "loss": 0.8903, + "step": 56450 + }, + { + "epoch": 0.81, + "grad_norm": 0.51171875, + "learning_rate": 2.123802312351172e-05, + "loss": 1.0886, + "step": 56455 + }, + { + "epoch": 0.81, + "grad_norm": 0.57421875, + "learning_rate": 2.12225992464154e-05, + "loss": 0.9818, + "step": 56460 + }, + { + "epoch": 0.81, + "grad_norm": 0.51171875, + "learning_rate": 2.1207180307176266e-05, + "loss": 0.9473, + "step": 56465 + }, + { + "epoch": 0.81, + "grad_norm": 0.56640625, + "learning_rate": 2.1191766306760852e-05, + "loss": 0.9522, + "step": 56470 + }, + { + "epoch": 0.81, + "grad_norm": 0.5390625, + "learning_rate": 2.1176357246135247e-05, + "loss": 0.819, + "step": 56475 + }, + { + "epoch": 0.81, + "grad_norm": 0.5234375, + "learning_rate": 2.1160953126265336e-05, + "loss": 1.0077, + "step": 56480 + }, + { + "epoch": 0.81, + "grad_norm": 0.5, + "learning_rate": 2.11455539481167e-05, + "loss": 0.9595, + "step": 56485 + }, + { + "epoch": 0.81, + "grad_norm": 0.55859375, + "learning_rate": 2.113015971265454e-05, + "loss": 0.9976, + "step": 56490 + }, + { + "epoch": 0.81, + "grad_norm": 0.66015625, + "learning_rate": 2.111477042084381e-05, + "loss": 1.0494, + "step": 56495 + }, + { + "epoch": 0.81, + "grad_norm": 0.48828125, + "learning_rate": 2.1099386073649106e-05, + "loss": 0.9497, + "step": 56500 + }, + { + "epoch": 0.81, + "grad_norm": 0.578125, + "learning_rate": 2.1084006672034727e-05, + "loss": 1.1865, + "step": 56505 + }, + { + "epoch": 0.81, + "grad_norm": 0.51953125, + "learning_rate": 2.106863221696468e-05, + "loss": 0.9653, + "step": 56510 + }, + { + "epoch": 0.81, + "grad_norm": 0.51953125, + "learning_rate": 2.10532627094027e-05, + "loss": 0.9337, + "step": 56515 + }, + { + "epoch": 0.81, + "grad_norm": 0.6328125, + "learning_rate": 2.1037898150312087e-05, + "loss": 0.8808, + "step": 56520 + }, + { + "epoch": 0.81, + "grad_norm": 0.478515625, + "learning_rate": 2.1022538540655955e-05, + "loss": 0.9955, + "step": 56525 + }, + { + "epoch": 0.81, + "grad_norm": 0.56640625, + "learning_rate": 2.1007183881397075e-05, + "loss": 1.0113, + "step": 56530 + }, + { + "epoch": 0.81, + "grad_norm": 0.51953125, + "learning_rate": 2.0991834173497848e-05, + "loss": 0.9069, + "step": 56535 + }, + { + "epoch": 0.81, + "grad_norm": 0.51171875, + "learning_rate": 2.0976489417920443e-05, + "loss": 0.8756, + "step": 56540 + }, + { + "epoch": 0.81, + "grad_norm": 0.64453125, + "learning_rate": 2.0961149615626706e-05, + "loss": 0.9911, + "step": 56545 + }, + { + "epoch": 0.81, + "grad_norm": 0.6171875, + "learning_rate": 2.094581476757813e-05, + "loss": 0.896, + "step": 56550 + }, + { + "epoch": 0.81, + "grad_norm": 0.49609375, + "learning_rate": 2.09304848747359e-05, + "loss": 0.87, + "step": 56555 + }, + { + "epoch": 0.81, + "grad_norm": 0.48828125, + "learning_rate": 2.0915159938060926e-05, + "loss": 0.9616, + "step": 56560 + }, + { + "epoch": 0.81, + "grad_norm": 0.58984375, + "learning_rate": 2.0899839958513812e-05, + "loss": 0.8914, + "step": 56565 + }, + { + "epoch": 0.81, + "grad_norm": 0.6015625, + "learning_rate": 2.0884524937054805e-05, + "loss": 0.8992, + "step": 56570 + }, + { + "epoch": 0.81, + "grad_norm": 0.55078125, + "learning_rate": 2.086921487464387e-05, + "loss": 0.9412, + "step": 56575 + }, + { + "epoch": 0.81, + "grad_norm": 0.578125, + "learning_rate": 2.08539097722407e-05, + "loss": 1.0388, + "step": 56580 + }, + { + "epoch": 0.81, + "grad_norm": 0.5546875, + "learning_rate": 2.0838609630804584e-05, + "loss": 1.0003, + "step": 56585 + }, + { + "epoch": 0.81, + "grad_norm": 1.046875, + "learning_rate": 2.0823314451294563e-05, + "loss": 0.969, + "step": 56590 + }, + { + "epoch": 0.81, + "grad_norm": 0.50390625, + "learning_rate": 2.0808024234669398e-05, + "loss": 0.9585, + "step": 56595 + }, + { + "epoch": 0.81, + "grad_norm": 0.6640625, + "learning_rate": 2.0792738981887473e-05, + "loss": 0.9976, + "step": 56600 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.0777458693906837e-05, + "loss": 0.8886, + "step": 56605 + }, + { + "epoch": 0.81, + "grad_norm": 0.6640625, + "learning_rate": 2.0762183371685328e-05, + "loss": 1.0712, + "step": 56610 + }, + { + "epoch": 0.81, + "grad_norm": 0.55859375, + "learning_rate": 2.0746913016180435e-05, + "loss": 0.9209, + "step": 56615 + }, + { + "epoch": 0.81, + "grad_norm": 0.5390625, + "learning_rate": 2.0731647628349273e-05, + "loss": 0.9952, + "step": 56620 + }, + { + "epoch": 0.81, + "grad_norm": 0.62890625, + "learning_rate": 2.0716387209148737e-05, + "loss": 0.9298, + "step": 56625 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.070113175953532e-05, + "loss": 0.9394, + "step": 56630 + }, + { + "epoch": 0.81, + "grad_norm": 0.58984375, + "learning_rate": 2.068588128046528e-05, + "loss": 1.0072, + "step": 56635 + }, + { + "epoch": 0.81, + "grad_norm": 0.5234375, + "learning_rate": 2.0670635772894553e-05, + "loss": 0.9542, + "step": 56640 + }, + { + "epoch": 0.81, + "grad_norm": 0.546875, + "learning_rate": 2.0655395237778708e-05, + "loss": 0.9364, + "step": 56645 + }, + { + "epoch": 0.81, + "grad_norm": 0.53125, + "learning_rate": 2.064015967607308e-05, + "loss": 0.9768, + "step": 56650 + }, + { + "epoch": 0.81, + "grad_norm": 0.6015625, + "learning_rate": 2.0624929088732592e-05, + "loss": 1.021, + "step": 56655 + }, + { + "epoch": 0.81, + "grad_norm": 0.7265625, + "learning_rate": 2.0609703476711984e-05, + "loss": 1.0188, + "step": 56660 + }, + { + "epoch": 0.81, + "grad_norm": 0.53125, + "learning_rate": 2.0594482840965547e-05, + "loss": 0.791, + "step": 56665 + }, + { + "epoch": 0.81, + "grad_norm": 0.58984375, + "learning_rate": 2.057926718244737e-05, + "loss": 1.0041, + "step": 56670 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.0564056502111195e-05, + "loss": 1.1466, + "step": 56675 + }, + { + "epoch": 0.81, + "grad_norm": 0.68359375, + "learning_rate": 2.0548850800910413e-05, + "loss": 1.0553, + "step": 56680 + }, + { + "epoch": 0.81, + "grad_norm": 0.59375, + "learning_rate": 2.053365007979814e-05, + "loss": 0.9049, + "step": 56685 + }, + { + "epoch": 0.81, + "grad_norm": 0.50390625, + "learning_rate": 2.051845433972721e-05, + "loss": 0.8279, + "step": 56690 + }, + { + "epoch": 0.81, + "grad_norm": 0.63671875, + "learning_rate": 2.0503263581650067e-05, + "loss": 1.0301, + "step": 56695 + }, + { + "epoch": 0.81, + "grad_norm": 0.57421875, + "learning_rate": 2.0488077806518902e-05, + "loss": 1.1201, + "step": 56700 + }, + { + "epoch": 0.81, + "grad_norm": 0.54296875, + "learning_rate": 2.0472897015285597e-05, + "loss": 0.9476, + "step": 56705 + }, + { + "epoch": 0.81, + "grad_norm": 0.5078125, + "learning_rate": 2.0457721208901682e-05, + "loss": 0.9026, + "step": 56710 + }, + { + "epoch": 0.81, + "grad_norm": 0.5234375, + "learning_rate": 2.044255038831837e-05, + "loss": 0.8703, + "step": 56715 + }, + { + "epoch": 0.81, + "grad_norm": 0.515625, + "learning_rate": 2.0427384554486595e-05, + "loss": 1.0137, + "step": 56720 + }, + { + "epoch": 0.81, + "grad_norm": 0.5390625, + "learning_rate": 2.0412223708357025e-05, + "loss": 0.9533, + "step": 56725 + }, + { + "epoch": 0.81, + "grad_norm": 0.58984375, + "learning_rate": 2.039706785087988e-05, + "loss": 0.9821, + "step": 56730 + }, + { + "epoch": 0.81, + "grad_norm": 0.546875, + "learning_rate": 2.038191698300519e-05, + "loss": 0.9189, + "step": 56735 + }, + { + "epoch": 0.81, + "grad_norm": 0.62109375, + "learning_rate": 2.0366771105682637e-05, + "loss": 1.097, + "step": 56740 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.035163021986154e-05, + "loss": 0.8491, + "step": 56745 + }, + { + "epoch": 0.81, + "grad_norm": 0.5078125, + "learning_rate": 2.0336494326490985e-05, + "loss": 0.8807, + "step": 56750 + }, + { + "epoch": 0.81, + "grad_norm": 0.54296875, + "learning_rate": 2.0321363426519734e-05, + "loss": 0.9252, + "step": 56755 + }, + { + "epoch": 0.81, + "grad_norm": 0.54296875, + "learning_rate": 2.030623752089612e-05, + "loss": 0.9657, + "step": 56760 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.0291116610568304e-05, + "loss": 0.9351, + "step": 56765 + }, + { + "epoch": 0.81, + "grad_norm": 0.65625, + "learning_rate": 2.027600069648411e-05, + "loss": 1.0192, + "step": 56770 + }, + { + "epoch": 0.81, + "grad_norm": 0.55859375, + "learning_rate": 2.0260889779590962e-05, + "loss": 0.8669, + "step": 56775 + }, + { + "epoch": 0.81, + "grad_norm": 0.54296875, + "learning_rate": 2.0245783860836053e-05, + "loss": 0.7708, + "step": 56780 + }, + { + "epoch": 0.81, + "grad_norm": 0.6015625, + "learning_rate": 2.023068294116628e-05, + "loss": 0.9595, + "step": 56785 + }, + { + "epoch": 0.81, + "grad_norm": 0.5234375, + "learning_rate": 2.0215587021528116e-05, + "loss": 0.996, + "step": 56790 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.0200496102867818e-05, + "loss": 0.9928, + "step": 56795 + }, + { + "epoch": 0.81, + "grad_norm": 0.58984375, + "learning_rate": 2.018541018613135e-05, + "loss": 0.956, + "step": 56800 + }, + { + "epoch": 0.81, + "grad_norm": 0.53515625, + "learning_rate": 2.017032927226423e-05, + "loss": 0.994, + "step": 56805 + }, + { + "epoch": 0.81, + "grad_norm": 0.5234375, + "learning_rate": 2.0155253362211822e-05, + "loss": 1.0376, + "step": 56810 + }, + { + "epoch": 0.81, + "grad_norm": 0.53125, + "learning_rate": 2.0140182456919053e-05, + "loss": 1.0575, + "step": 56815 + }, + { + "epoch": 0.82, + "grad_norm": 0.59765625, + "learning_rate": 2.0125116557330615e-05, + "loss": 0.8795, + "step": 56820 + }, + { + "epoch": 0.82, + "grad_norm": 0.478515625, + "learning_rate": 2.0110055664390813e-05, + "loss": 0.876, + "step": 56825 + }, + { + "epoch": 0.82, + "grad_norm": 0.55078125, + "learning_rate": 2.009499977904372e-05, + "loss": 1.074, + "step": 56830 + }, + { + "epoch": 0.82, + "grad_norm": 0.61328125, + "learning_rate": 2.007994890223306e-05, + "loss": 1.0949, + "step": 56835 + }, + { + "epoch": 0.82, + "grad_norm": 0.58203125, + "learning_rate": 2.0064903034902206e-05, + "loss": 0.8713, + "step": 56840 + }, + { + "epoch": 0.82, + "grad_norm": 0.671875, + "learning_rate": 2.0049862177994262e-05, + "loss": 0.8938, + "step": 56845 + }, + { + "epoch": 0.82, + "grad_norm": 0.59765625, + "learning_rate": 2.003482633245205e-05, + "loss": 0.9963, + "step": 56850 + }, + { + "epoch": 0.82, + "grad_norm": 0.65234375, + "learning_rate": 2.0019795499217953e-05, + "loss": 0.9014, + "step": 56855 + }, + { + "epoch": 0.82, + "grad_norm": 0.546875, + "learning_rate": 2.0004769679234203e-05, + "loss": 0.8635, + "step": 56860 + }, + { + "epoch": 0.82, + "grad_norm": 0.55078125, + "learning_rate": 1.998974887344259e-05, + "loss": 1.0966, + "step": 56865 + }, + { + "epoch": 0.82, + "grad_norm": 0.54296875, + "learning_rate": 1.9974733082784623e-05, + "loss": 0.8467, + "step": 56870 + }, + { + "epoch": 0.82, + "grad_norm": 0.55078125, + "learning_rate": 1.995972230820152e-05, + "loss": 0.8615, + "step": 56875 + }, + { + "epoch": 0.82, + "grad_norm": 0.5625, + "learning_rate": 1.9944716550634214e-05, + "loss": 0.8607, + "step": 56880 + }, + { + "epoch": 0.82, + "grad_norm": 0.6640625, + "learning_rate": 1.992971581102322e-05, + "loss": 1.0601, + "step": 56885 + }, + { + "epoch": 0.82, + "grad_norm": 0.60546875, + "learning_rate": 1.9914720090308826e-05, + "loss": 0.981, + "step": 56890 + }, + { + "epoch": 0.82, + "grad_norm": 0.63671875, + "learning_rate": 1.9899729389431022e-05, + "loss": 0.9211, + "step": 56895 + }, + { + "epoch": 0.82, + "grad_norm": 0.59375, + "learning_rate": 1.988474370932937e-05, + "loss": 0.9536, + "step": 56900 + }, + { + "epoch": 0.82, + "grad_norm": 0.63671875, + "learning_rate": 1.9869763050943234e-05, + "loss": 1.1827, + "step": 56905 + }, + { + "epoch": 0.82, + "grad_norm": 0.5234375, + "learning_rate": 1.985478741521163e-05, + "loss": 1.0555, + "step": 56910 + }, + { + "epoch": 0.82, + "grad_norm": 0.494140625, + "learning_rate": 1.983981680307323e-05, + "loss": 1.0235, + "step": 56915 + }, + { + "epoch": 0.82, + "grad_norm": 0.56640625, + "learning_rate": 1.9824851215466388e-05, + "loss": 0.9067, + "step": 56920 + }, + { + "epoch": 0.82, + "grad_norm": 0.51953125, + "learning_rate": 1.9809890653329178e-05, + "loss": 0.8933, + "step": 56925 + }, + { + "epoch": 0.82, + "grad_norm": 0.5703125, + "learning_rate": 1.979493511759938e-05, + "loss": 0.9839, + "step": 56930 + }, + { + "epoch": 0.82, + "grad_norm": 0.56640625, + "learning_rate": 1.9779984609214363e-05, + "loss": 0.934, + "step": 56935 + }, + { + "epoch": 0.82, + "grad_norm": 0.57421875, + "learning_rate": 1.9765039129111285e-05, + "loss": 1.0783, + "step": 56940 + }, + { + "epoch": 0.82, + "grad_norm": 0.50390625, + "learning_rate": 1.975009867822695e-05, + "loss": 0.9056, + "step": 56945 + }, + { + "epoch": 0.82, + "grad_norm": 0.5390625, + "learning_rate": 1.97351632574978e-05, + "loss": 0.9473, + "step": 56950 + }, + { + "epoch": 0.82, + "grad_norm": 0.5546875, + "learning_rate": 1.9720232867860033e-05, + "loss": 1.1167, + "step": 56955 + }, + { + "epoch": 0.82, + "grad_norm": 0.52734375, + "learning_rate": 1.970530751024954e-05, + "loss": 1.1607, + "step": 56960 + }, + { + "epoch": 0.82, + "grad_norm": 0.5546875, + "learning_rate": 1.96903871856018e-05, + "loss": 0.8377, + "step": 56965 + }, + { + "epoch": 0.82, + "grad_norm": 0.5546875, + "learning_rate": 1.967547189485204e-05, + "loss": 0.9362, + "step": 56970 + }, + { + "epoch": 0.82, + "grad_norm": 0.56640625, + "learning_rate": 1.9660561638935217e-05, + "loss": 0.9683, + "step": 56975 + }, + { + "epoch": 0.82, + "grad_norm": 0.58203125, + "learning_rate": 1.9645656418785852e-05, + "loss": 0.8118, + "step": 56980 + }, + { + "epoch": 0.82, + "grad_norm": 0.578125, + "learning_rate": 1.9630756235338253e-05, + "loss": 0.824, + "step": 56985 + }, + { + "epoch": 0.82, + "grad_norm": 0.61328125, + "learning_rate": 1.9615861089526422e-05, + "loss": 0.8573, + "step": 56990 + }, + { + "epoch": 0.82, + "grad_norm": 0.56640625, + "learning_rate": 1.9600970982283952e-05, + "loss": 0.9875, + "step": 56995 + }, + { + "epoch": 0.82, + "grad_norm": 0.67578125, + "learning_rate": 1.958608591454417e-05, + "loss": 0.9674, + "step": 57000 + }, + { + "epoch": 0.82, + "grad_norm": 0.52734375, + "learning_rate": 1.957120588724014e-05, + "loss": 0.9661, + "step": 57005 + }, + { + "epoch": 0.82, + "grad_norm": 0.48828125, + "learning_rate": 1.95563309013045e-05, + "loss": 0.8551, + "step": 57010 + }, + { + "epoch": 0.82, + "grad_norm": 0.55078125, + "learning_rate": 1.9541460957669656e-05, + "loss": 0.8241, + "step": 57015 + }, + { + "epoch": 0.82, + "grad_norm": 0.66015625, + "learning_rate": 1.9526596057267697e-05, + "loss": 1.1419, + "step": 57020 + }, + { + "epoch": 0.82, + "grad_norm": 0.546875, + "learning_rate": 1.951173620103035e-05, + "loss": 0.9015, + "step": 57025 + }, + { + "epoch": 0.82, + "grad_norm": 0.55859375, + "learning_rate": 1.9496881389889023e-05, + "loss": 0.8603, + "step": 57030 + }, + { + "epoch": 0.82, + "grad_norm": 0.5078125, + "learning_rate": 1.9482031624774855e-05, + "loss": 0.9216, + "step": 57035 + }, + { + "epoch": 0.82, + "grad_norm": 0.6015625, + "learning_rate": 1.9467186906618673e-05, + "loss": 0.9155, + "step": 57040 + }, + { + "epoch": 0.82, + "grad_norm": 0.625, + "learning_rate": 1.94523472363509e-05, + "loss": 0.9163, + "step": 57045 + }, + { + "epoch": 0.82, + "grad_norm": 0.55859375, + "learning_rate": 1.9437512614901753e-05, + "loss": 0.95, + "step": 57050 + }, + { + "epoch": 0.82, + "grad_norm": 0.546875, + "learning_rate": 1.9422683043201086e-05, + "loss": 0.854, + "step": 57055 + }, + { + "epoch": 0.82, + "grad_norm": 0.515625, + "learning_rate": 1.940785852217839e-05, + "loss": 1.1324, + "step": 57060 + }, + { + "epoch": 0.82, + "grad_norm": 0.546875, + "learning_rate": 1.9393039052762908e-05, + "loss": 0.9357, + "step": 57065 + }, + { + "epoch": 0.82, + "grad_norm": 0.8125, + "learning_rate": 1.93782246358836e-05, + "loss": 1.1118, + "step": 57070 + }, + { + "epoch": 0.82, + "grad_norm": 0.6171875, + "learning_rate": 1.9363415272468933e-05, + "loss": 1.1094, + "step": 57075 + }, + { + "epoch": 0.82, + "grad_norm": 0.5390625, + "learning_rate": 1.9348610963447235e-05, + "loss": 0.9506, + "step": 57080 + }, + { + "epoch": 0.82, + "grad_norm": 0.5546875, + "learning_rate": 1.9333811709746485e-05, + "loss": 0.9978, + "step": 57085 + }, + { + "epoch": 0.82, + "grad_norm": 0.53125, + "learning_rate": 1.9319017512294257e-05, + "loss": 0.9095, + "step": 57090 + }, + { + "epoch": 0.82, + "grad_norm": 0.640625, + "learning_rate": 1.9304228372017908e-05, + "loss": 1.0865, + "step": 57095 + }, + { + "epoch": 0.82, + "grad_norm": 0.546875, + "learning_rate": 1.9289444289844448e-05, + "loss": 0.9566, + "step": 57100 + }, + { + "epoch": 0.82, + "grad_norm": 0.5703125, + "learning_rate": 1.927466526670052e-05, + "loss": 0.9192, + "step": 57105 + }, + { + "epoch": 0.82, + "grad_norm": 0.6484375, + "learning_rate": 1.9259891303512512e-05, + "loss": 1.0266, + "step": 57110 + }, + { + "epoch": 0.82, + "grad_norm": 0.5234375, + "learning_rate": 1.9245122401206493e-05, + "loss": 0.923, + "step": 57115 + }, + { + "epoch": 0.82, + "grad_norm": 0.61328125, + "learning_rate": 1.923035856070815e-05, + "loss": 0.9628, + "step": 57120 + }, + { + "epoch": 0.82, + "grad_norm": 0.6328125, + "learning_rate": 1.921559978294295e-05, + "loss": 1.0032, + "step": 57125 + }, + { + "epoch": 0.82, + "grad_norm": 0.58203125, + "learning_rate": 1.920084606883593e-05, + "loss": 1.112, + "step": 57130 + }, + { + "epoch": 0.82, + "grad_norm": 0.7578125, + "learning_rate": 1.9186097419311932e-05, + "loss": 0.9425, + "step": 57135 + }, + { + "epoch": 0.82, + "grad_norm": 0.69140625, + "learning_rate": 1.917135383529537e-05, + "loss": 1.1636, + "step": 57140 + }, + { + "epoch": 0.82, + "grad_norm": 0.515625, + "learning_rate": 1.915661531771039e-05, + "loss": 0.9695, + "step": 57145 + }, + { + "epoch": 0.82, + "grad_norm": 0.5390625, + "learning_rate": 1.914188186748087e-05, + "loss": 0.8936, + "step": 57150 + }, + { + "epoch": 0.82, + "grad_norm": 0.5625, + "learning_rate": 1.9127153485530246e-05, + "loss": 1.08, + "step": 57155 + }, + { + "epoch": 0.82, + "grad_norm": 0.52734375, + "learning_rate": 1.911243017278176e-05, + "loss": 0.9949, + "step": 57160 + }, + { + "epoch": 0.82, + "grad_norm": 0.5546875, + "learning_rate": 1.9097711930158303e-05, + "loss": 0.9987, + "step": 57165 + }, + { + "epoch": 0.82, + "grad_norm": 0.53125, + "learning_rate": 1.908299875858237e-05, + "loss": 0.8954, + "step": 57170 + }, + { + "epoch": 0.82, + "grad_norm": 0.62890625, + "learning_rate": 1.9068290658976252e-05, + "loss": 1.0304, + "step": 57175 + }, + { + "epoch": 0.82, + "grad_norm": 0.57421875, + "learning_rate": 1.905358763226186e-05, + "loss": 0.8348, + "step": 57180 + }, + { + "epoch": 0.82, + "grad_norm": 0.6015625, + "learning_rate": 1.903888967936075e-05, + "loss": 0.9374, + "step": 57185 + }, + { + "epoch": 0.82, + "grad_norm": 0.578125, + "learning_rate": 1.902419680119425e-05, + "loss": 0.998, + "step": 57190 + }, + { + "epoch": 0.82, + "grad_norm": 0.51953125, + "learning_rate": 1.9009508998683334e-05, + "loss": 0.9541, + "step": 57195 + }, + { + "epoch": 0.82, + "grad_norm": 0.609375, + "learning_rate": 1.899482627274861e-05, + "loss": 0.9615, + "step": 57200 + }, + { + "epoch": 0.82, + "grad_norm": 0.57421875, + "learning_rate": 1.8980148624310444e-05, + "loss": 0.9093, + "step": 57205 + }, + { + "epoch": 0.82, + "grad_norm": 0.58984375, + "learning_rate": 1.8965476054288857e-05, + "loss": 0.8685, + "step": 57210 + }, + { + "epoch": 0.82, + "grad_norm": 0.59375, + "learning_rate": 1.8950808563603485e-05, + "loss": 0.963, + "step": 57215 + }, + { + "epoch": 0.82, + "grad_norm": 0.54296875, + "learning_rate": 1.893614615317375e-05, + "loss": 0.8768, + "step": 57220 + }, + { + "epoch": 0.82, + "grad_norm": 0.5703125, + "learning_rate": 1.892148882391872e-05, + "loss": 1.043, + "step": 57225 + }, + { + "epoch": 0.82, + "grad_norm": 0.5234375, + "learning_rate": 1.8906836576757116e-05, + "loss": 0.9636, + "step": 57230 + }, + { + "epoch": 0.82, + "grad_norm": 0.52734375, + "learning_rate": 1.889218941260732e-05, + "loss": 1.0872, + "step": 57235 + }, + { + "epoch": 0.82, + "grad_norm": 0.6171875, + "learning_rate": 1.8877547332387467e-05, + "loss": 1.0531, + "step": 57240 + }, + { + "epoch": 0.82, + "grad_norm": 0.453125, + "learning_rate": 1.886291033701537e-05, + "loss": 0.8688, + "step": 57245 + }, + { + "epoch": 0.82, + "grad_norm": 0.6328125, + "learning_rate": 1.8848278427408438e-05, + "loss": 1.0525, + "step": 57250 + }, + { + "epoch": 0.82, + "grad_norm": 0.546875, + "learning_rate": 1.8833651604483828e-05, + "loss": 0.8974, + "step": 57255 + }, + { + "epoch": 0.82, + "grad_norm": 0.5703125, + "learning_rate": 1.88190298691584e-05, + "loss": 0.9064, + "step": 57260 + }, + { + "epoch": 0.82, + "grad_norm": 0.578125, + "learning_rate": 1.8804413222348617e-05, + "loss": 1.0215, + "step": 57265 + }, + { + "epoch": 0.82, + "grad_norm": 0.5625, + "learning_rate": 1.8789801664970686e-05, + "loss": 0.9896, + "step": 57270 + }, + { + "epoch": 0.82, + "grad_norm": 0.57421875, + "learning_rate": 1.877519519794051e-05, + "loss": 0.9436, + "step": 57275 + }, + { + "epoch": 0.82, + "grad_norm": 0.62890625, + "learning_rate": 1.87605938221736e-05, + "loss": 1.0005, + "step": 57280 + }, + { + "epoch": 0.82, + "grad_norm": 0.52734375, + "learning_rate": 1.874599753858517e-05, + "loss": 0.8156, + "step": 57285 + }, + { + "epoch": 0.82, + "grad_norm": 0.58984375, + "learning_rate": 1.8731406348090153e-05, + "loss": 1.0979, + "step": 57290 + }, + { + "epoch": 0.82, + "grad_norm": 0.53125, + "learning_rate": 1.8716820251603163e-05, + "loss": 0.9682, + "step": 57295 + }, + { + "epoch": 0.82, + "grad_norm": 0.5625, + "learning_rate": 1.8702239250038433e-05, + "loss": 0.917, + "step": 57300 + }, + { + "epoch": 0.82, + "grad_norm": 0.51171875, + "learning_rate": 1.8687663344309958e-05, + "loss": 1.0256, + "step": 57305 + }, + { + "epoch": 0.82, + "grad_norm": 0.5234375, + "learning_rate": 1.867309253533134e-05, + "loss": 0.8435, + "step": 57310 + }, + { + "epoch": 0.82, + "grad_norm": 0.51171875, + "learning_rate": 1.8658526824015886e-05, + "loss": 0.9523, + "step": 57315 + }, + { + "epoch": 0.82, + "grad_norm": 0.5703125, + "learning_rate": 1.8643966211276655e-05, + "loss": 1.0054, + "step": 57320 + }, + { + "epoch": 0.82, + "grad_norm": 0.609375, + "learning_rate": 1.862941069802624e-05, + "loss": 1.081, + "step": 57325 + }, + { + "epoch": 0.82, + "grad_norm": 0.578125, + "learning_rate": 1.8614860285177038e-05, + "loss": 0.965, + "step": 57330 + }, + { + "epoch": 0.82, + "grad_norm": 0.58203125, + "learning_rate": 1.860031497364112e-05, + "loss": 0.9633, + "step": 57335 + }, + { + "epoch": 0.82, + "grad_norm": 0.5234375, + "learning_rate": 1.8585774764330154e-05, + "loss": 0.9439, + "step": 57340 + }, + { + "epoch": 0.82, + "grad_norm": 0.66015625, + "learning_rate": 1.8571239658155526e-05, + "loss": 1.0464, + "step": 57345 + }, + { + "epoch": 0.82, + "grad_norm": 0.58984375, + "learning_rate": 1.855670965602834e-05, + "loss": 0.9419, + "step": 57350 + }, + { + "epoch": 0.82, + "grad_norm": 0.53515625, + "learning_rate": 1.8542184758859372e-05, + "loss": 0.9403, + "step": 57355 + }, + { + "epoch": 0.82, + "grad_norm": 0.44921875, + "learning_rate": 1.8527664967559e-05, + "loss": 0.8075, + "step": 57360 + }, + { + "epoch": 0.82, + "grad_norm": 0.62109375, + "learning_rate": 1.8513150283037395e-05, + "loss": 1.0399, + "step": 57365 + }, + { + "epoch": 0.82, + "grad_norm": 0.65234375, + "learning_rate": 1.849864070620435e-05, + "loss": 0.9282, + "step": 57370 + }, + { + "epoch": 0.82, + "grad_norm": 0.478515625, + "learning_rate": 1.848413623796931e-05, + "loss": 0.8879, + "step": 57375 + }, + { + "epoch": 0.82, + "grad_norm": 0.66015625, + "learning_rate": 1.8469636879241438e-05, + "loss": 1.0933, + "step": 57380 + }, + { + "epoch": 0.82, + "grad_norm": 0.61328125, + "learning_rate": 1.845514263092961e-05, + "loss": 0.9499, + "step": 57385 + }, + { + "epoch": 0.82, + "grad_norm": 0.61328125, + "learning_rate": 1.8440653493942316e-05, + "loss": 0.9202, + "step": 57390 + }, + { + "epoch": 0.82, + "grad_norm": 0.58203125, + "learning_rate": 1.8426169469187726e-05, + "loss": 0.9033, + "step": 57395 + }, + { + "epoch": 0.82, + "grad_norm": 0.51953125, + "learning_rate": 1.8411690557573747e-05, + "loss": 1.0041, + "step": 57400 + }, + { + "epoch": 0.82, + "grad_norm": 0.5859375, + "learning_rate": 1.839721676000794e-05, + "loss": 0.9258, + "step": 57405 + }, + { + "epoch": 0.82, + "grad_norm": 0.59375, + "learning_rate": 1.8382748077397494e-05, + "loss": 0.9638, + "step": 57410 + }, + { + "epoch": 0.82, + "grad_norm": 0.5078125, + "learning_rate": 1.8368284510649358e-05, + "loss": 0.8681, + "step": 57415 + }, + { + "epoch": 0.82, + "grad_norm": 0.55078125, + "learning_rate": 1.8353826060670153e-05, + "loss": 1.0304, + "step": 57420 + }, + { + "epoch": 0.82, + "grad_norm": 0.55078125, + "learning_rate": 1.8339372728366077e-05, + "loss": 0.7835, + "step": 57425 + }, + { + "epoch": 0.82, + "grad_norm": 0.6640625, + "learning_rate": 1.8324924514643138e-05, + "loss": 0.8806, + "step": 57430 + }, + { + "epoch": 0.82, + "grad_norm": 0.5703125, + "learning_rate": 1.8310481420406967e-05, + "loss": 0.9688, + "step": 57435 + }, + { + "epoch": 0.82, + "grad_norm": 0.5234375, + "learning_rate": 1.829604344656286e-05, + "loss": 1.1028, + "step": 57440 + }, + { + "epoch": 0.82, + "grad_norm": 0.515625, + "learning_rate": 1.8281610594015775e-05, + "loss": 0.8831, + "step": 57445 + }, + { + "epoch": 0.82, + "grad_norm": 0.60546875, + "learning_rate": 1.826718286367043e-05, + "loss": 1.0611, + "step": 57450 + }, + { + "epoch": 0.82, + "grad_norm": 0.5078125, + "learning_rate": 1.8252760256431123e-05, + "loss": 0.9923, + "step": 57455 + }, + { + "epoch": 0.82, + "grad_norm": 0.54296875, + "learning_rate": 1.823834277320191e-05, + "loss": 0.8988, + "step": 57460 + }, + { + "epoch": 0.82, + "grad_norm": 0.6015625, + "learning_rate": 1.8223930414886515e-05, + "loss": 1.0085, + "step": 57465 + }, + { + "epoch": 0.82, + "grad_norm": 0.65625, + "learning_rate": 1.8209523182388276e-05, + "loss": 0.9161, + "step": 57470 + }, + { + "epoch": 0.82, + "grad_norm": 0.6796875, + "learning_rate": 1.8195121076610266e-05, + "loss": 1.0192, + "step": 57475 + }, + { + "epoch": 0.82, + "grad_norm": 0.6171875, + "learning_rate": 1.818072409845527e-05, + "loss": 0.8755, + "step": 57480 + }, + { + "epoch": 0.82, + "grad_norm": 0.48828125, + "learning_rate": 1.8166332248825645e-05, + "loss": 0.8561, + "step": 57485 + }, + { + "epoch": 0.82, + "grad_norm": 0.5859375, + "learning_rate": 1.8151945528623536e-05, + "loss": 1.0005, + "step": 57490 + }, + { + "epoch": 0.82, + "grad_norm": 0.51953125, + "learning_rate": 1.8137563938750667e-05, + "loss": 0.9342, + "step": 57495 + }, + { + "epoch": 0.82, + "grad_norm": 0.54296875, + "learning_rate": 1.812318748010856e-05, + "loss": 0.9835, + "step": 57500 + }, + { + "epoch": 0.82, + "grad_norm": 0.56640625, + "learning_rate": 1.8108816153598284e-05, + "loss": 1.023, + "step": 57505 + }, + { + "epoch": 0.82, + "grad_norm": 0.61328125, + "learning_rate": 1.8094449960120673e-05, + "loss": 0.9161, + "step": 57510 + }, + { + "epoch": 0.83, + "grad_norm": 0.57421875, + "learning_rate": 1.808008890057625e-05, + "loss": 0.8931, + "step": 57515 + }, + { + "epoch": 0.83, + "grad_norm": 0.466796875, + "learning_rate": 1.8065732975865134e-05, + "loss": 0.8917, + "step": 57520 + }, + { + "epoch": 0.83, + "grad_norm": 0.59765625, + "learning_rate": 1.805138218688719e-05, + "loss": 1.1648, + "step": 57525 + }, + { + "epoch": 0.83, + "grad_norm": 0.5234375, + "learning_rate": 1.8037036534541963e-05, + "loss": 0.8815, + "step": 57530 + }, + { + "epoch": 0.83, + "grad_norm": 0.66015625, + "learning_rate": 1.8022696019728602e-05, + "loss": 1.0268, + "step": 57535 + }, + { + "epoch": 0.83, + "grad_norm": 0.52734375, + "learning_rate": 1.8008360643346022e-05, + "loss": 0.901, + "step": 57540 + }, + { + "epoch": 0.83, + "grad_norm": 0.5546875, + "learning_rate": 1.7994030406292837e-05, + "loss": 1.0224, + "step": 57545 + }, + { + "epoch": 0.83, + "grad_norm": 0.64453125, + "learning_rate": 1.7979705309467164e-05, + "loss": 1.0273, + "step": 57550 + }, + { + "epoch": 0.83, + "grad_norm": 0.5234375, + "learning_rate": 1.7965385353766985e-05, + "loss": 0.8829, + "step": 57555 + }, + { + "epoch": 0.83, + "grad_norm": 0.53515625, + "learning_rate": 1.7951070540089898e-05, + "loss": 1.1264, + "step": 57560 + }, + { + "epoch": 0.83, + "grad_norm": 0.52734375, + "learning_rate": 1.7936760869333137e-05, + "loss": 0.9873, + "step": 57565 + }, + { + "epoch": 0.83, + "grad_norm": 0.515625, + "learning_rate": 1.7922456342393658e-05, + "loss": 0.9088, + "step": 57570 + }, + { + "epoch": 0.83, + "grad_norm": 0.5546875, + "learning_rate": 1.790815696016812e-05, + "loss": 0.9321, + "step": 57575 + }, + { + "epoch": 0.83, + "grad_norm": 0.52734375, + "learning_rate": 1.789386272355278e-05, + "loss": 1.0373, + "step": 57580 + }, + { + "epoch": 0.83, + "grad_norm": 0.6015625, + "learning_rate": 1.7879573633443625e-05, + "loss": 1.0655, + "step": 57585 + }, + { + "epoch": 0.83, + "grad_norm": 0.53125, + "learning_rate": 1.7865289690736364e-05, + "loss": 0.8181, + "step": 57590 + }, + { + "epoch": 0.83, + "grad_norm": 0.61328125, + "learning_rate": 1.785101089632627e-05, + "loss": 0.8864, + "step": 57595 + }, + { + "epoch": 0.83, + "grad_norm": 0.59375, + "learning_rate": 1.783673725110836e-05, + "loss": 0.9482, + "step": 57600 + }, + { + "epoch": 0.83, + "grad_norm": 0.5625, + "learning_rate": 1.782246875597733e-05, + "loss": 0.834, + "step": 57605 + }, + { + "epoch": 0.83, + "grad_norm": 0.58203125, + "learning_rate": 1.7808205411827582e-05, + "loss": 0.8415, + "step": 57610 + }, + { + "epoch": 0.83, + "grad_norm": 0.65625, + "learning_rate": 1.77939472195531e-05, + "loss": 0.8928, + "step": 57615 + }, + { + "epoch": 0.83, + "grad_norm": 0.54296875, + "learning_rate": 1.7779694180047623e-05, + "loss": 0.9688, + "step": 57620 + }, + { + "epoch": 0.83, + "grad_norm": 0.5, + "learning_rate": 1.7765446294204592e-05, + "loss": 1.0195, + "step": 57625 + }, + { + "epoch": 0.83, + "grad_norm": 0.6640625, + "learning_rate": 1.7751203562917018e-05, + "loss": 0.9816, + "step": 57630 + }, + { + "epoch": 0.83, + "grad_norm": 0.5, + "learning_rate": 1.773696598707767e-05, + "loss": 0.9331, + "step": 57635 + }, + { + "epoch": 0.83, + "grad_norm": 0.609375, + "learning_rate": 1.772273356757902e-05, + "loss": 0.8634, + "step": 57640 + }, + { + "epoch": 0.83, + "grad_norm": 0.64453125, + "learning_rate": 1.77085063053131e-05, + "loss": 0.9065, + "step": 57645 + }, + { + "epoch": 0.83, + "grad_norm": 0.83984375, + "learning_rate": 1.7694284201171752e-05, + "loss": 0.9567, + "step": 57650 + }, + { + "epoch": 0.83, + "grad_norm": 0.59375, + "learning_rate": 1.768006725604642e-05, + "loss": 1.1036, + "step": 57655 + }, + { + "epoch": 0.83, + "grad_norm": 0.54296875, + "learning_rate": 1.7665855470828197e-05, + "loss": 1.0417, + "step": 57660 + }, + { + "epoch": 0.83, + "grad_norm": 0.625, + "learning_rate": 1.765164884640792e-05, + "loss": 0.9475, + "step": 57665 + }, + { + "epoch": 0.83, + "grad_norm": 0.51953125, + "learning_rate": 1.763744738367611e-05, + "loss": 0.8683, + "step": 57670 + }, + { + "epoch": 0.83, + "grad_norm": 0.55078125, + "learning_rate": 1.7623251083522863e-05, + "loss": 1.0274, + "step": 57675 + }, + { + "epoch": 0.83, + "grad_norm": 0.6171875, + "learning_rate": 1.760905994683807e-05, + "loss": 0.9602, + "step": 57680 + }, + { + "epoch": 0.83, + "grad_norm": 0.64453125, + "learning_rate": 1.7594873974511263e-05, + "loss": 0.9219, + "step": 57685 + }, + { + "epoch": 0.83, + "grad_norm": 0.6328125, + "learning_rate": 1.7580693167431573e-05, + "loss": 0.9376, + "step": 57690 + }, + { + "epoch": 0.83, + "grad_norm": 0.52734375, + "learning_rate": 1.7566517526487903e-05, + "loss": 1.0304, + "step": 57695 + }, + { + "epoch": 0.83, + "grad_norm": 0.5390625, + "learning_rate": 1.755234705256883e-05, + "loss": 0.9291, + "step": 57700 + }, + { + "epoch": 0.83, + "grad_norm": 0.59375, + "learning_rate": 1.7538181746562543e-05, + "loss": 1.069, + "step": 57705 + }, + { + "epoch": 0.83, + "grad_norm": 0.4609375, + "learning_rate": 1.752402160935691e-05, + "loss": 1.0669, + "step": 57710 + }, + { + "epoch": 0.83, + "grad_norm": 0.5859375, + "learning_rate": 1.7509866641839534e-05, + "loss": 1.0158, + "step": 57715 + }, + { + "epoch": 0.83, + "grad_norm": 0.5234375, + "learning_rate": 1.7495716844897692e-05, + "loss": 0.8566, + "step": 57720 + }, + { + "epoch": 0.83, + "grad_norm": 0.53125, + "learning_rate": 1.7481572219418263e-05, + "loss": 0.9092, + "step": 57725 + }, + { + "epoch": 0.83, + "grad_norm": 0.5703125, + "learning_rate": 1.746743276628786e-05, + "loss": 0.9913, + "step": 57730 + }, + { + "epoch": 0.83, + "grad_norm": 0.52734375, + "learning_rate": 1.74532984863928e-05, + "loss": 0.8428, + "step": 57735 + }, + { + "epoch": 0.83, + "grad_norm": 0.48828125, + "learning_rate": 1.743916938061898e-05, + "loss": 0.8582, + "step": 57740 + }, + { + "epoch": 0.83, + "grad_norm": 0.5703125, + "learning_rate": 1.7425045449852053e-05, + "loss": 1.0616, + "step": 57745 + }, + { + "epoch": 0.83, + "grad_norm": 0.6015625, + "learning_rate": 1.7410926694977337e-05, + "loss": 0.8835, + "step": 57750 + }, + { + "epoch": 0.83, + "grad_norm": 0.56640625, + "learning_rate": 1.7396813116879794e-05, + "loss": 1.0282, + "step": 57755 + }, + { + "epoch": 0.83, + "grad_norm": 0.51171875, + "learning_rate": 1.7382704716444075e-05, + "loss": 0.9069, + "step": 57760 + }, + { + "epoch": 0.83, + "grad_norm": 0.62890625, + "learning_rate": 1.7368601494554526e-05, + "loss": 0.9493, + "step": 57765 + }, + { + "epoch": 0.83, + "grad_norm": 0.57421875, + "learning_rate": 1.7354503452095128e-05, + "loss": 0.9655, + "step": 57770 + }, + { + "epoch": 0.83, + "grad_norm": 0.5234375, + "learning_rate": 1.7340410589949572e-05, + "loss": 0.9934, + "step": 57775 + }, + { + "epoch": 0.83, + "grad_norm": 0.498046875, + "learning_rate": 1.7326322909001258e-05, + "loss": 0.9102, + "step": 57780 + }, + { + "epoch": 0.83, + "grad_norm": 0.59375, + "learning_rate": 1.7312240410133153e-05, + "loss": 1.1948, + "step": 57785 + }, + { + "epoch": 0.83, + "grad_norm": 0.546875, + "learning_rate": 1.7298163094227982e-05, + "loss": 0.8848, + "step": 57790 + }, + { + "epoch": 0.83, + "grad_norm": 0.59375, + "learning_rate": 1.7284090962168176e-05, + "loss": 0.8271, + "step": 57795 + }, + { + "epoch": 0.83, + "grad_norm": 0.53125, + "learning_rate": 1.7270024014835716e-05, + "loss": 1.039, + "step": 57800 + }, + { + "epoch": 0.83, + "grad_norm": 0.478515625, + "learning_rate": 1.7255962253112413e-05, + "loss": 0.8683, + "step": 57805 + }, + { + "epoch": 0.83, + "grad_norm": 0.53125, + "learning_rate": 1.72419056778796e-05, + "loss": 1.0126, + "step": 57810 + }, + { + "epoch": 0.83, + "grad_norm": 0.59765625, + "learning_rate": 1.722785429001842e-05, + "loss": 0.933, + "step": 57815 + }, + { + "epoch": 0.83, + "grad_norm": 0.6171875, + "learning_rate": 1.7213808090409566e-05, + "loss": 0.963, + "step": 57820 + }, + { + "epoch": 0.83, + "grad_norm": 0.59375, + "learning_rate": 1.7199767079933527e-05, + "loss": 0.8474, + "step": 57825 + }, + { + "epoch": 0.83, + "grad_norm": 0.65234375, + "learning_rate": 1.7185731259470395e-05, + "loss": 1.1649, + "step": 57830 + }, + { + "epoch": 0.83, + "grad_norm": 0.4921875, + "learning_rate": 1.7171700629899934e-05, + "loss": 0.9088, + "step": 57835 + }, + { + "epoch": 0.83, + "grad_norm": 0.59765625, + "learning_rate": 1.715767519210161e-05, + "loss": 1.0726, + "step": 57840 + }, + { + "epoch": 0.83, + "grad_norm": 0.546875, + "learning_rate": 1.714365494695457e-05, + "loss": 1.0465, + "step": 57845 + }, + { + "epoch": 0.83, + "grad_norm": 0.5546875, + "learning_rate": 1.712963989533758e-05, + "loss": 0.9767, + "step": 57850 + }, + { + "epoch": 0.83, + "grad_norm": 0.484375, + "learning_rate": 1.711563003812915e-05, + "loss": 0.9515, + "step": 57855 + }, + { + "epoch": 0.83, + "grad_norm": 0.546875, + "learning_rate": 1.7101625376207465e-05, + "loss": 0.9817, + "step": 57860 + }, + { + "epoch": 0.83, + "grad_norm": 0.498046875, + "learning_rate": 1.7087625910450277e-05, + "loss": 1.0111, + "step": 57865 + }, + { + "epoch": 0.83, + "grad_norm": 0.53515625, + "learning_rate": 1.7073631641735122e-05, + "loss": 0.9127, + "step": 57870 + }, + { + "epoch": 0.83, + "grad_norm": 0.431640625, + "learning_rate": 1.7059642570939204e-05, + "loss": 0.89, + "step": 57875 + }, + { + "epoch": 0.83, + "grad_norm": 0.87109375, + "learning_rate": 1.7045658698939327e-05, + "loss": 0.9156, + "step": 57880 + }, + { + "epoch": 0.83, + "grad_norm": 0.66796875, + "learning_rate": 1.703168002661204e-05, + "loss": 0.839, + "step": 57885 + }, + { + "epoch": 0.83, + "grad_norm": 0.50390625, + "learning_rate": 1.7017706554833568e-05, + "loss": 0.8972, + "step": 57890 + }, + { + "epoch": 0.83, + "grad_norm": 0.58984375, + "learning_rate": 1.700373828447973e-05, + "loss": 0.909, + "step": 57895 + }, + { + "epoch": 0.83, + "grad_norm": 0.56640625, + "learning_rate": 1.6989775216426106e-05, + "loss": 1.077, + "step": 57900 + }, + { + "epoch": 0.83, + "grad_norm": 0.67578125, + "learning_rate": 1.697581735154793e-05, + "loss": 1.0366, + "step": 57905 + }, + { + "epoch": 0.83, + "grad_norm": 0.58203125, + "learning_rate": 1.6961864690720087e-05, + "loss": 1.0014, + "step": 57910 + }, + { + "epoch": 0.83, + "grad_norm": 0.57421875, + "learning_rate": 1.6947917234817114e-05, + "loss": 1.0623, + "step": 57915 + }, + { + "epoch": 0.83, + "grad_norm": 0.48046875, + "learning_rate": 1.6933974984713263e-05, + "loss": 0.9165, + "step": 57920 + }, + { + "epoch": 0.83, + "grad_norm": 0.51171875, + "learning_rate": 1.692003794128251e-05, + "loss": 0.8876, + "step": 57925 + }, + { + "epoch": 0.83, + "grad_norm": 0.6171875, + "learning_rate": 1.6906106105398356e-05, + "loss": 0.8691, + "step": 57930 + }, + { + "epoch": 0.83, + "grad_norm": 0.55859375, + "learning_rate": 1.6892179477934112e-05, + "loss": 0.9261, + "step": 57935 + }, + { + "epoch": 0.83, + "grad_norm": 0.5703125, + "learning_rate": 1.687825805976274e-05, + "loss": 0.9677, + "step": 57940 + }, + { + "epoch": 0.83, + "grad_norm": 0.5703125, + "learning_rate": 1.686434185175679e-05, + "loss": 0.9245, + "step": 57945 + }, + { + "epoch": 0.83, + "grad_norm": 0.515625, + "learning_rate": 1.685043085478858e-05, + "loss": 0.9576, + "step": 57950 + }, + { + "epoch": 0.83, + "grad_norm": 0.5625, + "learning_rate": 1.683652506973008e-05, + "loss": 0.9034, + "step": 57955 + }, + { + "epoch": 0.83, + "grad_norm": 0.6171875, + "learning_rate": 1.6822624497452888e-05, + "loss": 0.967, + "step": 57960 + }, + { + "epoch": 0.83, + "grad_norm": 0.609375, + "learning_rate": 1.680872913882835e-05, + "loss": 0.9726, + "step": 57965 + }, + { + "epoch": 0.83, + "grad_norm": 0.44921875, + "learning_rate": 1.679483899472739e-05, + "loss": 1.0131, + "step": 57970 + }, + { + "epoch": 0.83, + "grad_norm": 0.56640625, + "learning_rate": 1.6780954066020704e-05, + "loss": 1.1013, + "step": 57975 + }, + { + "epoch": 0.83, + "grad_norm": 0.53125, + "learning_rate": 1.6767074353578572e-05, + "loss": 0.9656, + "step": 57980 + }, + { + "epoch": 0.83, + "grad_norm": 0.5078125, + "learning_rate": 1.675319985827104e-05, + "loss": 0.8297, + "step": 57985 + }, + { + "epoch": 0.83, + "grad_norm": 0.51953125, + "learning_rate": 1.6739330580967728e-05, + "loss": 0.969, + "step": 57990 + }, + { + "epoch": 0.83, + "grad_norm": 0.671875, + "learning_rate": 1.6725466522538e-05, + "loss": 0.8789, + "step": 57995 + }, + { + "epoch": 0.83, + "grad_norm": 0.55859375, + "learning_rate": 1.6711607683850905e-05, + "loss": 0.8702, + "step": 58000 + }, + { + "epoch": 0.83, + "grad_norm": 0.6875, + "learning_rate": 1.669775406577506e-05, + "loss": 0.9958, + "step": 58005 + }, + { + "epoch": 0.83, + "grad_norm": 0.640625, + "learning_rate": 1.6683905669178866e-05, + "loss": 1.0337, + "step": 58010 + }, + { + "epoch": 0.83, + "grad_norm": 0.53125, + "learning_rate": 1.667006249493038e-05, + "loss": 0.9877, + "step": 58015 + }, + { + "epoch": 0.83, + "grad_norm": 0.5078125, + "learning_rate": 1.665622454389729e-05, + "loss": 1.002, + "step": 58020 + }, + { + "epoch": 0.83, + "grad_norm": 0.50390625, + "learning_rate": 1.6642391816946934e-05, + "loss": 0.9392, + "step": 58025 + }, + { + "epoch": 0.83, + "grad_norm": 0.4453125, + "learning_rate": 1.6628564314946393e-05, + "loss": 0.8782, + "step": 58030 + }, + { + "epoch": 0.83, + "grad_norm": 0.53515625, + "learning_rate": 1.661474203876242e-05, + "loss": 0.9426, + "step": 58035 + }, + { + "epoch": 0.83, + "grad_norm": 0.55859375, + "learning_rate": 1.6600924989261368e-05, + "loss": 1.0012, + "step": 58040 + }, + { + "epoch": 0.83, + "grad_norm": 0.56640625, + "learning_rate": 1.658711316730931e-05, + "loss": 1.0189, + "step": 58045 + }, + { + "epoch": 0.83, + "grad_norm": 0.56640625, + "learning_rate": 1.657330657377202e-05, + "loss": 0.9001, + "step": 58050 + }, + { + "epoch": 0.83, + "grad_norm": 0.5703125, + "learning_rate": 1.655950520951487e-05, + "loss": 0.9475, + "step": 58055 + }, + { + "epoch": 0.83, + "grad_norm": 0.54296875, + "learning_rate": 1.6545709075402972e-05, + "loss": 0.8767, + "step": 58060 + }, + { + "epoch": 0.83, + "grad_norm": 0.50390625, + "learning_rate": 1.6531918172301087e-05, + "loss": 0.8723, + "step": 58065 + }, + { + "epoch": 0.83, + "grad_norm": 0.51953125, + "learning_rate": 1.6518132501073634e-05, + "loss": 1.0125, + "step": 58070 + }, + { + "epoch": 0.83, + "grad_norm": 0.8203125, + "learning_rate": 1.6504352062584692e-05, + "loss": 0.9769, + "step": 58075 + }, + { + "epoch": 0.83, + "grad_norm": 0.64453125, + "learning_rate": 1.649057685769806e-05, + "loss": 0.991, + "step": 58080 + }, + { + "epoch": 0.83, + "grad_norm": 0.5859375, + "learning_rate": 1.6476806887277208e-05, + "loss": 0.9772, + "step": 58085 + }, + { + "epoch": 0.83, + "grad_norm": 0.5859375, + "learning_rate": 1.6463042152185193e-05, + "loss": 1.0976, + "step": 58090 + }, + { + "epoch": 0.83, + "grad_norm": 0.6953125, + "learning_rate": 1.6449282653284836e-05, + "loss": 1.1015, + "step": 58095 + }, + { + "epoch": 0.83, + "grad_norm": 0.546875, + "learning_rate": 1.6435528391438626e-05, + "loss": 0.9803, + "step": 58100 + }, + { + "epoch": 0.83, + "grad_norm": 0.59375, + "learning_rate": 1.642177936750865e-05, + "loss": 1.1817, + "step": 58105 + }, + { + "epoch": 0.83, + "grad_norm": 0.52734375, + "learning_rate": 1.640803558235672e-05, + "loss": 0.9719, + "step": 58110 + }, + { + "epoch": 0.83, + "grad_norm": 0.6484375, + "learning_rate": 1.6394297036844353e-05, + "loss": 1.0251, + "step": 58115 + }, + { + "epoch": 0.83, + "grad_norm": 0.5, + "learning_rate": 1.6380563731832664e-05, + "loss": 1.0194, + "step": 58120 + }, + { + "epoch": 0.83, + "grad_norm": 0.486328125, + "learning_rate": 1.6366835668182455e-05, + "loss": 1.0195, + "step": 58125 + }, + { + "epoch": 0.83, + "grad_norm": 0.5703125, + "learning_rate": 1.6353112846754247e-05, + "loss": 0.8529, + "step": 58130 + }, + { + "epoch": 0.83, + "grad_norm": 0.58203125, + "learning_rate": 1.6339395268408188e-05, + "loss": 0.9441, + "step": 58135 + }, + { + "epoch": 0.83, + "grad_norm": 0.5546875, + "learning_rate": 1.6325682934004104e-05, + "loss": 0.9558, + "step": 58140 + }, + { + "epoch": 0.83, + "grad_norm": 0.58984375, + "learning_rate": 1.6311975844401528e-05, + "loss": 0.9949, + "step": 58145 + }, + { + "epoch": 0.83, + "grad_norm": 0.55078125, + "learning_rate": 1.6298274000459612e-05, + "loss": 0.9409, + "step": 58150 + }, + { + "epoch": 0.83, + "grad_norm": 0.443359375, + "learning_rate": 1.6284577403037193e-05, + "loss": 0.9242, + "step": 58155 + }, + { + "epoch": 0.83, + "grad_norm": 0.62109375, + "learning_rate": 1.627088605299284e-05, + "loss": 1.1643, + "step": 58160 + }, + { + "epoch": 0.83, + "grad_norm": 0.5703125, + "learning_rate": 1.6257199951184686e-05, + "loss": 1.0059, + "step": 58165 + }, + { + "epoch": 0.83, + "grad_norm": 0.55078125, + "learning_rate": 1.6243519098470606e-05, + "loss": 0.9065, + "step": 58170 + }, + { + "epoch": 0.83, + "grad_norm": 0.53125, + "learning_rate": 1.6229843495708163e-05, + "loss": 0.9551, + "step": 58175 + }, + { + "epoch": 0.83, + "grad_norm": 0.54296875, + "learning_rate": 1.6216173143754544e-05, + "loss": 1.0303, + "step": 58180 + }, + { + "epoch": 0.83, + "grad_norm": 0.443359375, + "learning_rate": 1.620250804346659e-05, + "loss": 0.7746, + "step": 58185 + }, + { + "epoch": 0.83, + "grad_norm": 0.54296875, + "learning_rate": 1.618884819570087e-05, + "loss": 0.8097, + "step": 58190 + }, + { + "epoch": 0.83, + "grad_norm": 0.4921875, + "learning_rate": 1.617519360131361e-05, + "loss": 1.0209, + "step": 58195 + }, + { + "epoch": 0.83, + "grad_norm": 0.53125, + "learning_rate": 1.6161544261160676e-05, + "loss": 0.8574, + "step": 58200 + }, + { + "epoch": 0.83, + "grad_norm": 0.5, + "learning_rate": 1.614790017609762e-05, + "loss": 0.9302, + "step": 58205 + }, + { + "epoch": 0.83, + "grad_norm": 0.6171875, + "learning_rate": 1.6134261346979707e-05, + "loss": 1.0741, + "step": 58210 + }, + { + "epoch": 0.84, + "grad_norm": 0.51953125, + "learning_rate": 1.6120627774661788e-05, + "loss": 0.9656, + "step": 58215 + }, + { + "epoch": 0.84, + "grad_norm": 0.53125, + "learning_rate": 1.610699945999844e-05, + "loss": 0.8366, + "step": 58220 + }, + { + "epoch": 0.84, + "grad_norm": 0.55078125, + "learning_rate": 1.6093376403843973e-05, + "loss": 0.9303, + "step": 58225 + }, + { + "epoch": 0.84, + "grad_norm": 0.765625, + "learning_rate": 1.6079758607052176e-05, + "loss": 0.9636, + "step": 58230 + }, + { + "epoch": 0.84, + "grad_norm": 0.5625, + "learning_rate": 1.6066146070476696e-05, + "loss": 0.9858, + "step": 58235 + }, + { + "epoch": 0.84, + "grad_norm": 0.451171875, + "learning_rate": 1.6052538794970795e-05, + "loss": 0.8403, + "step": 58240 + }, + { + "epoch": 0.84, + "grad_norm": 0.5546875, + "learning_rate": 1.6038936781387337e-05, + "loss": 0.9222, + "step": 58245 + }, + { + "epoch": 0.84, + "grad_norm": 0.50390625, + "learning_rate": 1.602534003057895e-05, + "loss": 0.7898, + "step": 58250 + }, + { + "epoch": 0.84, + "grad_norm": 0.54296875, + "learning_rate": 1.601174854339792e-05, + "loss": 1.1416, + "step": 58255 + }, + { + "epoch": 0.84, + "grad_norm": 0.55859375, + "learning_rate": 1.5998162320696118e-05, + "loss": 0.9055, + "step": 58260 + }, + { + "epoch": 0.84, + "grad_norm": 0.6171875, + "learning_rate": 1.5984581363325168e-05, + "loss": 0.9214, + "step": 58265 + }, + { + "epoch": 0.84, + "grad_norm": 0.54296875, + "learning_rate": 1.5971005672136375e-05, + "loss": 0.9064, + "step": 58270 + }, + { + "epoch": 0.84, + "grad_norm": 0.5390625, + "learning_rate": 1.595743524798061e-05, + "loss": 1.0478, + "step": 58275 + }, + { + "epoch": 0.84, + "grad_norm": 0.578125, + "learning_rate": 1.5943870091708558e-05, + "loss": 1.0135, + "step": 58280 + }, + { + "epoch": 0.84, + "grad_norm": 0.70703125, + "learning_rate": 1.5930310204170427e-05, + "loss": 0.9129, + "step": 58285 + }, + { + "epoch": 0.84, + "grad_norm": 0.50390625, + "learning_rate": 1.5916755586216236e-05, + "loss": 0.782, + "step": 58290 + }, + { + "epoch": 0.84, + "grad_norm": 0.5078125, + "learning_rate": 1.5903206238695535e-05, + "loss": 0.8294, + "step": 58295 + }, + { + "epoch": 0.84, + "grad_norm": 0.5703125, + "learning_rate": 1.588966216245764e-05, + "loss": 0.9915, + "step": 58300 + }, + { + "epoch": 0.84, + "grad_norm": 0.65234375, + "learning_rate": 1.5876123358351547e-05, + "loss": 0.9133, + "step": 58305 + }, + { + "epoch": 0.84, + "grad_norm": 0.59375, + "learning_rate": 1.5862589827225827e-05, + "loss": 1.0046, + "step": 58310 + }, + { + "epoch": 0.84, + "grad_norm": 0.52734375, + "learning_rate": 1.5849061569928802e-05, + "loss": 1.0208, + "step": 58315 + }, + { + "epoch": 0.84, + "grad_norm": 0.455078125, + "learning_rate": 1.5835538587308453e-05, + "loss": 0.836, + "step": 58320 + }, + { + "epoch": 0.84, + "grad_norm": 0.578125, + "learning_rate": 1.5822020880212383e-05, + "loss": 0.9109, + "step": 58325 + }, + { + "epoch": 0.84, + "grad_norm": 0.56640625, + "learning_rate": 1.5808508449487935e-05, + "loss": 0.8614, + "step": 58330 + }, + { + "epoch": 0.84, + "grad_norm": 0.5390625, + "learning_rate": 1.579500129598207e-05, + "loss": 0.9432, + "step": 58335 + }, + { + "epoch": 0.84, + "grad_norm": 0.515625, + "learning_rate": 1.57814994205414e-05, + "loss": 0.956, + "step": 58340 + }, + { + "epoch": 0.84, + "grad_norm": 0.55078125, + "learning_rate": 1.5768002824012263e-05, + "loss": 1.0127, + "step": 58345 + }, + { + "epoch": 0.84, + "grad_norm": 0.546875, + "learning_rate": 1.5754511507240666e-05, + "loss": 0.9024, + "step": 58350 + }, + { + "epoch": 0.84, + "grad_norm": 0.58984375, + "learning_rate": 1.5741025471072202e-05, + "loss": 1.0308, + "step": 58355 + }, + { + "epoch": 0.84, + "grad_norm": 0.546875, + "learning_rate": 1.5727544716352228e-05, + "loss": 1.0369, + "step": 58360 + }, + { + "epoch": 0.84, + "grad_norm": 0.5546875, + "learning_rate": 1.5714069243925754e-05, + "loss": 0.8761, + "step": 58365 + }, + { + "epoch": 0.84, + "grad_norm": 0.52734375, + "learning_rate": 1.57005990546374e-05, + "loss": 0.9829, + "step": 58370 + }, + { + "epoch": 0.84, + "grad_norm": 0.5703125, + "learning_rate": 1.5687134149331496e-05, + "loss": 0.9662, + "step": 58375 + }, + { + "epoch": 0.84, + "grad_norm": 0.54296875, + "learning_rate": 1.5673674528852065e-05, + "loss": 1.0646, + "step": 58380 + }, + { + "epoch": 0.84, + "grad_norm": 0.5078125, + "learning_rate": 1.5660220194042774e-05, + "loss": 0.855, + "step": 58385 + }, + { + "epoch": 0.84, + "grad_norm": 0.55859375, + "learning_rate": 1.5646771145746897e-05, + "loss": 0.9553, + "step": 58390 + }, + { + "epoch": 0.84, + "grad_norm": 0.5546875, + "learning_rate": 1.5633327384807473e-05, + "loss": 0.9051, + "step": 58395 + }, + { + "epoch": 0.84, + "grad_norm": 0.56640625, + "learning_rate": 1.5619888912067205e-05, + "loss": 1.0056, + "step": 58400 + }, + { + "epoch": 0.84, + "grad_norm": 0.58203125, + "learning_rate": 1.5606455728368375e-05, + "loss": 0.9086, + "step": 58405 + }, + { + "epoch": 0.84, + "grad_norm": 0.68359375, + "learning_rate": 1.559302783455302e-05, + "loss": 0.9692, + "step": 58410 + }, + { + "epoch": 0.84, + "grad_norm": 0.54296875, + "learning_rate": 1.5579605231462835e-05, + "loss": 1.0364, + "step": 58415 + }, + { + "epoch": 0.84, + "grad_norm": 0.5390625, + "learning_rate": 1.5566187919939114e-05, + "loss": 0.8547, + "step": 58420 + }, + { + "epoch": 0.84, + "grad_norm": 0.51953125, + "learning_rate": 1.555277590082289e-05, + "loss": 0.9497, + "step": 58425 + }, + { + "epoch": 0.84, + "grad_norm": 0.6171875, + "learning_rate": 1.5539369174954887e-05, + "loss": 0.955, + "step": 58430 + }, + { + "epoch": 0.84, + "grad_norm": 0.53515625, + "learning_rate": 1.552596774317541e-05, + "loss": 0.9568, + "step": 58435 + }, + { + "epoch": 0.84, + "grad_norm": 0.5625, + "learning_rate": 1.5512571606324454e-05, + "loss": 0.8585, + "step": 58440 + }, + { + "epoch": 0.84, + "grad_norm": 0.486328125, + "learning_rate": 1.549918076524176e-05, + "loss": 0.9049, + "step": 58445 + }, + { + "epoch": 0.84, + "grad_norm": 0.46484375, + "learning_rate": 1.5485795220766642e-05, + "loss": 0.9563, + "step": 58450 + }, + { + "epoch": 0.84, + "grad_norm": 0.58203125, + "learning_rate": 1.5472414973738115e-05, + "loss": 0.9304, + "step": 58455 + }, + { + "epoch": 0.84, + "grad_norm": 0.63671875, + "learning_rate": 1.545904002499492e-05, + "loss": 0.9151, + "step": 58460 + }, + { + "epoch": 0.84, + "grad_norm": 0.53515625, + "learning_rate": 1.5445670375375354e-05, + "loss": 0.8703, + "step": 58465 + }, + { + "epoch": 0.84, + "grad_norm": 0.55078125, + "learning_rate": 1.543230602571747e-05, + "loss": 0.9275, + "step": 58470 + }, + { + "epoch": 0.84, + "grad_norm": 0.59375, + "learning_rate": 1.5418946976858983e-05, + "loss": 0.8973, + "step": 58475 + }, + { + "epoch": 0.84, + "grad_norm": 0.63671875, + "learning_rate": 1.5405593229637206e-05, + "loss": 0.9844, + "step": 58480 + }, + { + "epoch": 0.84, + "grad_norm": 0.734375, + "learning_rate": 1.5392244784889188e-05, + "loss": 0.8972, + "step": 58485 + }, + { + "epoch": 0.84, + "grad_norm": 0.55859375, + "learning_rate": 1.537890164345165e-05, + "loss": 0.8179, + "step": 58490 + }, + { + "epoch": 0.84, + "grad_norm": 0.546875, + "learning_rate": 1.536556380616093e-05, + "loss": 0.9699, + "step": 58495 + }, + { + "epoch": 0.84, + "grad_norm": 0.48046875, + "learning_rate": 1.5352231273853046e-05, + "loss": 0.9691, + "step": 58500 + }, + { + "epoch": 0.84, + "grad_norm": 0.56640625, + "learning_rate": 1.53389040473637e-05, + "loss": 0.9526, + "step": 58505 + }, + { + "epoch": 0.84, + "grad_norm": 0.53125, + "learning_rate": 1.5325582127528303e-05, + "loss": 0.9293, + "step": 58510 + }, + { + "epoch": 0.84, + "grad_norm": 0.67578125, + "learning_rate": 1.5312265515181824e-05, + "loss": 0.9441, + "step": 58515 + }, + { + "epoch": 0.84, + "grad_norm": 0.57421875, + "learning_rate": 1.5298954211159e-05, + "loss": 0.9324, + "step": 58520 + }, + { + "epoch": 0.84, + "grad_norm": 0.515625, + "learning_rate": 1.5285648216294213e-05, + "loss": 0.8834, + "step": 58525 + }, + { + "epoch": 0.84, + "grad_norm": 0.51171875, + "learning_rate": 1.527234753142145e-05, + "loss": 0.9087, + "step": 58530 + }, + { + "epoch": 0.84, + "grad_norm": 0.4921875, + "learning_rate": 1.5259052157374442e-05, + "loss": 0.9629, + "step": 58535 + }, + { + "epoch": 0.84, + "grad_norm": 0.6015625, + "learning_rate": 1.5245762094986581e-05, + "loss": 0.9918, + "step": 58540 + }, + { + "epoch": 0.84, + "grad_norm": 0.5703125, + "learning_rate": 1.523247734509089e-05, + "loss": 1.027, + "step": 58545 + }, + { + "epoch": 0.84, + "grad_norm": 0.474609375, + "learning_rate": 1.5219197908520023e-05, + "loss": 1.0379, + "step": 58550 + }, + { + "epoch": 0.84, + "grad_norm": 0.50390625, + "learning_rate": 1.5205923786106414e-05, + "loss": 1.0219, + "step": 58555 + }, + { + "epoch": 0.84, + "grad_norm": 0.55078125, + "learning_rate": 1.5192654978682052e-05, + "loss": 1.1855, + "step": 58560 + }, + { + "epoch": 0.84, + "grad_norm": 0.51953125, + "learning_rate": 1.5179391487078664e-05, + "loss": 0.8573, + "step": 58565 + }, + { + "epoch": 0.84, + "grad_norm": 0.58203125, + "learning_rate": 1.516613331212765e-05, + "loss": 1.0287, + "step": 58570 + }, + { + "epoch": 0.84, + "grad_norm": 0.51953125, + "learning_rate": 1.515288045465999e-05, + "loss": 0.9057, + "step": 58575 + }, + { + "epoch": 0.84, + "grad_norm": 0.5859375, + "learning_rate": 1.5139632915506407e-05, + "loss": 0.9238, + "step": 58580 + }, + { + "epoch": 0.84, + "grad_norm": 0.546875, + "learning_rate": 1.5126390695497317e-05, + "loss": 0.8945, + "step": 58585 + }, + { + "epoch": 0.84, + "grad_norm": 0.5859375, + "learning_rate": 1.5113153795462687e-05, + "loss": 0.9769, + "step": 58590 + }, + { + "epoch": 0.84, + "grad_norm": 0.494140625, + "learning_rate": 1.5099922216232288e-05, + "loss": 0.8387, + "step": 58595 + }, + { + "epoch": 0.84, + "grad_norm": 0.52734375, + "learning_rate": 1.5086695958635432e-05, + "loss": 0.8441, + "step": 58600 + }, + { + "epoch": 0.84, + "grad_norm": 0.6328125, + "learning_rate": 1.5073475023501204e-05, + "loss": 1.0256, + "step": 58605 + }, + { + "epoch": 0.84, + "grad_norm": 0.61328125, + "learning_rate": 1.5060259411658261e-05, + "loss": 1.1083, + "step": 58610 + }, + { + "epoch": 0.84, + "grad_norm": 0.5234375, + "learning_rate": 1.5047049123934987e-05, + "loss": 1.0134, + "step": 58615 + }, + { + "epoch": 0.84, + "grad_norm": 0.5859375, + "learning_rate": 1.503384416115946e-05, + "loss": 0.8517, + "step": 58620 + }, + { + "epoch": 0.84, + "grad_norm": 0.5625, + "learning_rate": 1.5020644524159333e-05, + "loss": 0.8647, + "step": 58625 + }, + { + "epoch": 0.84, + "grad_norm": 0.5703125, + "learning_rate": 1.5007450213761976e-05, + "loss": 0.953, + "step": 58630 + }, + { + "epoch": 0.84, + "grad_norm": 0.546875, + "learning_rate": 1.499426123079447e-05, + "loss": 1.1321, + "step": 58635 + }, + { + "epoch": 0.84, + "grad_norm": 0.53125, + "learning_rate": 1.4981077576083457e-05, + "loss": 1.0327, + "step": 58640 + }, + { + "epoch": 0.84, + "grad_norm": 0.5, + "learning_rate": 1.496789925045533e-05, + "loss": 1.0104, + "step": 58645 + }, + { + "epoch": 0.84, + "grad_norm": 0.53515625, + "learning_rate": 1.4954726254736174e-05, + "loss": 0.9281, + "step": 58650 + }, + { + "epoch": 0.84, + "grad_norm": 0.462890625, + "learning_rate": 1.4941558589751581e-05, + "loss": 0.9496, + "step": 58655 + }, + { + "epoch": 0.84, + "grad_norm": 0.609375, + "learning_rate": 1.4928396256326971e-05, + "loss": 0.9333, + "step": 58660 + }, + { + "epoch": 0.84, + "grad_norm": 0.53125, + "learning_rate": 1.4915239255287395e-05, + "loss": 0.9894, + "step": 58665 + }, + { + "epoch": 0.84, + "grad_norm": 0.52734375, + "learning_rate": 1.4902087587457502e-05, + "loss": 0.9979, + "step": 58670 + }, + { + "epoch": 0.84, + "grad_norm": 0.58203125, + "learning_rate": 1.4888941253661681e-05, + "loss": 0.957, + "step": 58675 + }, + { + "epoch": 0.84, + "grad_norm": 0.6015625, + "learning_rate": 1.4875800254723982e-05, + "loss": 0.9186, + "step": 58680 + }, + { + "epoch": 0.84, + "grad_norm": 0.546875, + "learning_rate": 1.4862664591468034e-05, + "loss": 0.7844, + "step": 58685 + }, + { + "epoch": 0.84, + "grad_norm": 0.58984375, + "learning_rate": 1.4849534264717246e-05, + "loss": 0.8935, + "step": 58690 + }, + { + "epoch": 0.84, + "grad_norm": 0.5546875, + "learning_rate": 1.483640927529465e-05, + "loss": 1.0858, + "step": 58695 + }, + { + "epoch": 0.84, + "grad_norm": 0.609375, + "learning_rate": 1.4823289624022907e-05, + "loss": 1.0458, + "step": 58700 + }, + { + "epoch": 0.84, + "grad_norm": 0.59765625, + "learning_rate": 1.4810175311724361e-05, + "loss": 1.0108, + "step": 58705 + }, + { + "epoch": 0.84, + "grad_norm": 0.609375, + "learning_rate": 1.4797066339221044e-05, + "loss": 0.938, + "step": 58710 + }, + { + "epoch": 0.84, + "grad_norm": 0.6171875, + "learning_rate": 1.4783962707334675e-05, + "loss": 1.0258, + "step": 58715 + }, + { + "epoch": 0.84, + "grad_norm": 0.54296875, + "learning_rate": 1.4770864416886555e-05, + "loss": 0.9121, + "step": 58720 + }, + { + "epoch": 0.84, + "grad_norm": 0.5, + "learning_rate": 1.4757771468697713e-05, + "loss": 1.0701, + "step": 58725 + }, + { + "epoch": 0.84, + "grad_norm": 0.51171875, + "learning_rate": 1.474468386358887e-05, + "loss": 0.9293, + "step": 58730 + }, + { + "epoch": 0.84, + "grad_norm": 0.52734375, + "learning_rate": 1.4731601602380307e-05, + "loss": 0.9501, + "step": 58735 + }, + { + "epoch": 0.84, + "grad_norm": 0.5703125, + "learning_rate": 1.4718524685892077e-05, + "loss": 0.9337, + "step": 58740 + }, + { + "epoch": 0.84, + "grad_norm": 0.7890625, + "learning_rate": 1.4705453114943868e-05, + "loss": 0.9834, + "step": 58745 + }, + { + "epoch": 0.84, + "grad_norm": 0.52734375, + "learning_rate": 1.4692386890354981e-05, + "loss": 0.9413, + "step": 58750 + }, + { + "epoch": 0.84, + "grad_norm": 0.6015625, + "learning_rate": 1.467932601294446e-05, + "loss": 1.1198, + "step": 58755 + }, + { + "epoch": 0.84, + "grad_norm": 0.52734375, + "learning_rate": 1.466627048353093e-05, + "loss": 0.9578, + "step": 58760 + }, + { + "epoch": 0.84, + "grad_norm": 0.55859375, + "learning_rate": 1.465322030293278e-05, + "loss": 0.9431, + "step": 58765 + }, + { + "epoch": 0.84, + "grad_norm": 0.53515625, + "learning_rate": 1.4640175471967965e-05, + "loss": 0.964, + "step": 58770 + }, + { + "epoch": 0.84, + "grad_norm": 0.62109375, + "learning_rate": 1.4627135991454155e-05, + "loss": 0.9486, + "step": 58775 + }, + { + "epoch": 0.84, + "grad_norm": 0.57421875, + "learning_rate": 1.4614101862208729e-05, + "loss": 1.0664, + "step": 58780 + }, + { + "epoch": 0.84, + "grad_norm": 0.546875, + "learning_rate": 1.460107308504861e-05, + "loss": 0.9023, + "step": 58785 + }, + { + "epoch": 0.84, + "grad_norm": 0.62109375, + "learning_rate": 1.4588049660790527e-05, + "loss": 1.0412, + "step": 58790 + }, + { + "epoch": 0.84, + "grad_norm": 0.49609375, + "learning_rate": 1.4575031590250732e-05, + "loss": 0.9061, + "step": 58795 + }, + { + "epoch": 0.84, + "grad_norm": 0.49609375, + "learning_rate": 1.4562018874245254e-05, + "loss": 0.939, + "step": 58800 + }, + { + "epoch": 0.84, + "grad_norm": 0.5546875, + "learning_rate": 1.4549011513589762e-05, + "loss": 0.8717, + "step": 58805 + }, + { + "epoch": 0.84, + "grad_norm": 0.5546875, + "learning_rate": 1.4536009509099547e-05, + "loss": 0.9185, + "step": 58810 + }, + { + "epoch": 0.84, + "grad_norm": 0.55859375, + "learning_rate": 1.4523012861589568e-05, + "loss": 0.9918, + "step": 58815 + }, + { + "epoch": 0.84, + "grad_norm": 0.54296875, + "learning_rate": 1.4510021571874498e-05, + "loss": 0.9828, + "step": 58820 + }, + { + "epoch": 0.84, + "grad_norm": 0.6171875, + "learning_rate": 1.4497035640768664e-05, + "loss": 0.8807, + "step": 58825 + }, + { + "epoch": 0.84, + "grad_norm": 0.55078125, + "learning_rate": 1.4484055069085989e-05, + "loss": 0.9587, + "step": 58830 + }, + { + "epoch": 0.84, + "grad_norm": 0.51171875, + "learning_rate": 1.4471079857640134e-05, + "loss": 0.9532, + "step": 58835 + }, + { + "epoch": 0.84, + "grad_norm": 0.5234375, + "learning_rate": 1.445811000724443e-05, + "loss": 0.8421, + "step": 58840 + }, + { + "epoch": 0.84, + "grad_norm": 0.64453125, + "learning_rate": 1.4445145518711789e-05, + "loss": 0.9329, + "step": 58845 + }, + { + "epoch": 0.84, + "grad_norm": 0.49609375, + "learning_rate": 1.4432186392854862e-05, + "loss": 0.9504, + "step": 58850 + }, + { + "epoch": 0.84, + "grad_norm": 0.65234375, + "learning_rate": 1.441923263048598e-05, + "loss": 0.8915, + "step": 58855 + }, + { + "epoch": 0.84, + "grad_norm": 0.58203125, + "learning_rate": 1.4406284232417056e-05, + "loss": 1.0894, + "step": 58860 + }, + { + "epoch": 0.84, + "grad_norm": 0.68359375, + "learning_rate": 1.4393341199459698e-05, + "loss": 0.9599, + "step": 58865 + }, + { + "epoch": 0.84, + "grad_norm": 0.57421875, + "learning_rate": 1.4380403532425212e-05, + "loss": 0.8874, + "step": 58870 + }, + { + "epoch": 0.84, + "grad_norm": 0.5703125, + "learning_rate": 1.4367471232124575e-05, + "loss": 1.1097, + "step": 58875 + }, + { + "epoch": 0.84, + "grad_norm": 0.6640625, + "learning_rate": 1.4354544299368333e-05, + "loss": 0.9817, + "step": 58880 + }, + { + "epoch": 0.84, + "grad_norm": 0.494140625, + "learning_rate": 1.4341622734966797e-05, + "loss": 0.9076, + "step": 58885 + }, + { + "epoch": 0.84, + "grad_norm": 0.49609375, + "learning_rate": 1.4328706539729941e-05, + "loss": 0.8973, + "step": 58890 + }, + { + "epoch": 0.84, + "grad_norm": 0.5859375, + "learning_rate": 1.4315795714467294e-05, + "loss": 1.0058, + "step": 58895 + }, + { + "epoch": 0.84, + "grad_norm": 0.56640625, + "learning_rate": 1.4302890259988167e-05, + "loss": 0.8475, + "step": 58900 + }, + { + "epoch": 0.84, + "grad_norm": 0.515625, + "learning_rate": 1.4289990177101497e-05, + "loss": 1.0302, + "step": 58905 + }, + { + "epoch": 0.85, + "grad_norm": 0.431640625, + "learning_rate": 1.4277095466615864e-05, + "loss": 0.9015, + "step": 58910 + }, + { + "epoch": 0.85, + "grad_norm": 0.5546875, + "learning_rate": 1.4264206129339486e-05, + "loss": 0.984, + "step": 58915 + }, + { + "epoch": 0.85, + "grad_norm": 0.50390625, + "learning_rate": 1.4251322166080339e-05, + "loss": 1.0828, + "step": 58920 + }, + { + "epoch": 0.85, + "grad_norm": 0.59765625, + "learning_rate": 1.4238443577645955e-05, + "loss": 0.8563, + "step": 58925 + }, + { + "epoch": 0.85, + "grad_norm": 0.60546875, + "learning_rate": 1.4225570364843599e-05, + "loss": 1.044, + "step": 58930 + }, + { + "epoch": 0.85, + "grad_norm": 0.66796875, + "learning_rate": 1.4212702528480214e-05, + "loss": 1.1616, + "step": 58935 + }, + { + "epoch": 0.85, + "grad_norm": 0.5703125, + "learning_rate": 1.4199840069362302e-05, + "loss": 0.9844, + "step": 58940 + }, + { + "epoch": 0.85, + "grad_norm": 0.59765625, + "learning_rate": 1.418698298829615e-05, + "loss": 0.9174, + "step": 58945 + }, + { + "epoch": 0.85, + "grad_norm": 0.478515625, + "learning_rate": 1.4174131286087645e-05, + "loss": 1.0788, + "step": 58950 + }, + { + "epoch": 0.85, + "grad_norm": 0.6171875, + "learning_rate": 1.4161284963542321e-05, + "loss": 1.0316, + "step": 58955 + }, + { + "epoch": 0.85, + "grad_norm": 0.59375, + "learning_rate": 1.4148444021465413e-05, + "loss": 0.859, + "step": 58960 + }, + { + "epoch": 0.85, + "grad_norm": 0.625, + "learning_rate": 1.4135608460661842e-05, + "loss": 0.9537, + "step": 58965 + }, + { + "epoch": 0.85, + "grad_norm": 0.5, + "learning_rate": 1.412277828193611e-05, + "loss": 0.9823, + "step": 58970 + }, + { + "epoch": 0.85, + "grad_norm": 0.5390625, + "learning_rate": 1.410995348609243e-05, + "loss": 0.8578, + "step": 58975 + }, + { + "epoch": 0.85, + "grad_norm": 0.51953125, + "learning_rate": 1.4097134073934693e-05, + "loss": 1.0997, + "step": 58980 + }, + { + "epoch": 0.85, + "grad_norm": 0.5625, + "learning_rate": 1.408432004626643e-05, + "loss": 1.0181, + "step": 58985 + }, + { + "epoch": 0.85, + "grad_norm": 0.5390625, + "learning_rate": 1.4071511403890824e-05, + "loss": 1.0344, + "step": 58990 + }, + { + "epoch": 0.85, + "grad_norm": 0.6171875, + "learning_rate": 1.4058708147610755e-05, + "loss": 0.9378, + "step": 58995 + }, + { + "epoch": 0.85, + "grad_norm": 0.45703125, + "learning_rate": 1.4045910278228757e-05, + "loss": 0.7372, + "step": 59000 + }, + { + "epoch": 0.85, + "grad_norm": 0.5078125, + "learning_rate": 1.4033117796546969e-05, + "loss": 0.9252, + "step": 59005 + }, + { + "epoch": 0.85, + "grad_norm": 0.65625, + "learning_rate": 1.4020330703367268e-05, + "loss": 0.9874, + "step": 59010 + }, + { + "epoch": 0.85, + "grad_norm": 0.53515625, + "learning_rate": 1.4007548999491204e-05, + "loss": 0.9918, + "step": 59015 + }, + { + "epoch": 0.85, + "grad_norm": 0.5234375, + "learning_rate": 1.399477268571986e-05, + "loss": 0.9145, + "step": 59020 + }, + { + "epoch": 0.85, + "grad_norm": 0.5625, + "learning_rate": 1.3982001762854114e-05, + "loss": 0.9774, + "step": 59025 + }, + { + "epoch": 0.85, + "grad_norm": 0.5078125, + "learning_rate": 1.3969236231694494e-05, + "loss": 0.9343, + "step": 59030 + }, + { + "epoch": 0.85, + "grad_norm": 0.54296875, + "learning_rate": 1.3956476093041105e-05, + "loss": 0.8558, + "step": 59035 + }, + { + "epoch": 0.85, + "grad_norm": 0.6015625, + "learning_rate": 1.3943721347693783e-05, + "loss": 0.8362, + "step": 59040 + }, + { + "epoch": 0.85, + "grad_norm": 0.57421875, + "learning_rate": 1.3930971996452048e-05, + "loss": 0.8882, + "step": 59045 + }, + { + "epoch": 0.85, + "grad_norm": 0.58984375, + "learning_rate": 1.3918228040114977e-05, + "loss": 0.8783, + "step": 59050 + }, + { + "epoch": 0.85, + "grad_norm": 0.578125, + "learning_rate": 1.3905489479481425e-05, + "loss": 0.8499, + "step": 59055 + }, + { + "epoch": 0.85, + "grad_norm": 0.5390625, + "learning_rate": 1.3892756315349853e-05, + "loss": 0.9287, + "step": 59060 + }, + { + "epoch": 0.85, + "grad_norm": 0.56640625, + "learning_rate": 1.3880028548518376e-05, + "loss": 1.0537, + "step": 59065 + }, + { + "epoch": 0.85, + "grad_norm": 0.52734375, + "learning_rate": 1.3867306179784811e-05, + "loss": 1.0329, + "step": 59070 + }, + { + "epoch": 0.85, + "grad_norm": 0.73046875, + "learning_rate": 1.3854589209946567e-05, + "loss": 0.9988, + "step": 59075 + }, + { + "epoch": 0.85, + "grad_norm": 0.5078125, + "learning_rate": 1.3841877639800805e-05, + "loss": 0.9891, + "step": 59080 + }, + { + "epoch": 0.85, + "grad_norm": 0.55859375, + "learning_rate": 1.3829171470144254e-05, + "loss": 0.893, + "step": 59085 + }, + { + "epoch": 0.85, + "grad_norm": 0.5078125, + "learning_rate": 1.3816470701773377e-05, + "loss": 1.0603, + "step": 59090 + }, + { + "epoch": 0.85, + "grad_norm": 0.703125, + "learning_rate": 1.3803775335484292e-05, + "loss": 0.7809, + "step": 59095 + }, + { + "epoch": 0.85, + "grad_norm": 0.53125, + "learning_rate": 1.379108537207272e-05, + "loss": 0.9883, + "step": 59100 + }, + { + "epoch": 0.85, + "grad_norm": 0.64453125, + "learning_rate": 1.3778400812334102e-05, + "loss": 1.0164, + "step": 59105 + }, + { + "epoch": 0.85, + "grad_norm": 0.6015625, + "learning_rate": 1.3765721657063535e-05, + "loss": 0.9677, + "step": 59110 + }, + { + "epoch": 0.85, + "grad_norm": 1.140625, + "learning_rate": 1.3753047907055727e-05, + "loss": 0.9704, + "step": 59115 + }, + { + "epoch": 0.85, + "grad_norm": 0.515625, + "learning_rate": 1.3740379563105122e-05, + "loss": 0.8938, + "step": 59120 + }, + { + "epoch": 0.85, + "grad_norm": 0.55078125, + "learning_rate": 1.372771662600577e-05, + "loss": 1.0333, + "step": 59125 + }, + { + "epoch": 0.85, + "grad_norm": 0.640625, + "learning_rate": 1.3715059096551386e-05, + "loss": 0.9952, + "step": 59130 + }, + { + "epoch": 0.85, + "grad_norm": 0.5625, + "learning_rate": 1.3702406975535353e-05, + "loss": 0.8836, + "step": 59135 + }, + { + "epoch": 0.85, + "grad_norm": 0.6171875, + "learning_rate": 1.3689760263750762e-05, + "loss": 0.8332, + "step": 59140 + }, + { + "epoch": 0.85, + "grad_norm": 0.5390625, + "learning_rate": 1.3677118961990276e-05, + "loss": 0.9347, + "step": 59145 + }, + { + "epoch": 0.85, + "grad_norm": 0.5, + "learning_rate": 1.3664483071046286e-05, + "loss": 0.8312, + "step": 59150 + }, + { + "epoch": 0.85, + "grad_norm": 0.474609375, + "learning_rate": 1.3651852591710845e-05, + "loss": 0.9411, + "step": 59155 + }, + { + "epoch": 0.85, + "grad_norm": 0.515625, + "learning_rate": 1.363922752477561e-05, + "loss": 0.9272, + "step": 59160 + }, + { + "epoch": 0.85, + "grad_norm": 0.56640625, + "learning_rate": 1.3626607871031939e-05, + "loss": 0.9742, + "step": 59165 + }, + { + "epoch": 0.85, + "grad_norm": 0.62109375, + "learning_rate": 1.3613993631270894e-05, + "loss": 0.9807, + "step": 59170 + }, + { + "epoch": 0.85, + "grad_norm": 0.58203125, + "learning_rate": 1.3601384806283101e-05, + "loss": 0.7687, + "step": 59175 + }, + { + "epoch": 0.85, + "grad_norm": 0.55078125, + "learning_rate": 1.3588781396858896e-05, + "loss": 1.0959, + "step": 59180 + }, + { + "epoch": 0.85, + "grad_norm": 0.44140625, + "learning_rate": 1.3576183403788279e-05, + "loss": 1.0129, + "step": 59185 + }, + { + "epoch": 0.85, + "grad_norm": 0.5234375, + "learning_rate": 1.3563590827860939e-05, + "loss": 0.9644, + "step": 59190 + }, + { + "epoch": 0.85, + "grad_norm": 0.578125, + "learning_rate": 1.3551003669866146e-05, + "loss": 0.9295, + "step": 59195 + }, + { + "epoch": 0.85, + "grad_norm": 0.55078125, + "learning_rate": 1.3538421930592903e-05, + "loss": 0.9419, + "step": 59200 + }, + { + "epoch": 0.85, + "grad_norm": 0.50390625, + "learning_rate": 1.3525845610829868e-05, + "loss": 1.0253, + "step": 59205 + }, + { + "epoch": 0.85, + "grad_norm": 0.484375, + "learning_rate": 1.3513274711365298e-05, + "loss": 0.8688, + "step": 59210 + }, + { + "epoch": 0.85, + "grad_norm": 0.546875, + "learning_rate": 1.3500709232987163e-05, + "loss": 0.9537, + "step": 59215 + }, + { + "epoch": 0.85, + "grad_norm": 0.60546875, + "learning_rate": 1.3488149176483112e-05, + "loss": 0.9867, + "step": 59220 + }, + { + "epoch": 0.85, + "grad_norm": 0.5703125, + "learning_rate": 1.3475594542640391e-05, + "loss": 0.9123, + "step": 59225 + }, + { + "epoch": 0.85, + "grad_norm": 0.5, + "learning_rate": 1.3463045332245939e-05, + "loss": 1.1345, + "step": 59230 + }, + { + "epoch": 0.85, + "grad_norm": 0.578125, + "learning_rate": 1.345050154608637e-05, + "loss": 0.9323, + "step": 59235 + }, + { + "epoch": 0.85, + "grad_norm": 0.498046875, + "learning_rate": 1.343796318494792e-05, + "loss": 0.78, + "step": 59240 + }, + { + "epoch": 0.85, + "grad_norm": 0.515625, + "learning_rate": 1.3425430249616522e-05, + "loss": 0.9499, + "step": 59245 + }, + { + "epoch": 0.85, + "grad_norm": 0.49609375, + "learning_rate": 1.3412902740877786e-05, + "loss": 0.9186, + "step": 59250 + }, + { + "epoch": 0.85, + "grad_norm": 0.52734375, + "learning_rate": 1.3400380659516898e-05, + "loss": 1.0809, + "step": 59255 + }, + { + "epoch": 0.85, + "grad_norm": 0.57421875, + "learning_rate": 1.3387864006318773e-05, + "loss": 0.9911, + "step": 59260 + }, + { + "epoch": 0.85, + "grad_norm": 0.515625, + "learning_rate": 1.3375352782067996e-05, + "loss": 0.9153, + "step": 59265 + }, + { + "epoch": 0.85, + "grad_norm": 0.5546875, + "learning_rate": 1.3362846987548738e-05, + "loss": 0.9067, + "step": 59270 + }, + { + "epoch": 0.85, + "grad_norm": 0.53125, + "learning_rate": 1.3350346623544918e-05, + "loss": 0.9464, + "step": 59275 + }, + { + "epoch": 0.85, + "grad_norm": 0.61328125, + "learning_rate": 1.3337851690840064e-05, + "loss": 0.998, + "step": 59280 + }, + { + "epoch": 0.85, + "grad_norm": 0.498046875, + "learning_rate": 1.3325362190217371e-05, + "loss": 0.8617, + "step": 59285 + }, + { + "epoch": 0.85, + "grad_norm": 0.609375, + "learning_rate": 1.331287812245967e-05, + "loss": 0.9716, + "step": 59290 + }, + { + "epoch": 0.85, + "grad_norm": 0.546875, + "learning_rate": 1.3300399488349491e-05, + "loss": 0.8445, + "step": 59295 + }, + { + "epoch": 0.85, + "grad_norm": 0.5234375, + "learning_rate": 1.328792628866905e-05, + "loss": 0.9536, + "step": 59300 + }, + { + "epoch": 0.85, + "grad_norm": 0.53125, + "learning_rate": 1.3275458524200112e-05, + "loss": 0.9947, + "step": 59305 + }, + { + "epoch": 0.85, + "grad_norm": 0.56640625, + "learning_rate": 1.3262996195724209e-05, + "loss": 1.0093, + "step": 59310 + }, + { + "epoch": 0.85, + "grad_norm": 0.63671875, + "learning_rate": 1.3250539304022503e-05, + "loss": 0.9308, + "step": 59315 + }, + { + "epoch": 0.85, + "grad_norm": 0.5859375, + "learning_rate": 1.323808784987578e-05, + "loss": 0.9925, + "step": 59320 + }, + { + "epoch": 0.85, + "grad_norm": 0.546875, + "learning_rate": 1.3225641834064529e-05, + "loss": 1.017, + "step": 59325 + }, + { + "epoch": 0.85, + "grad_norm": 0.84765625, + "learning_rate": 1.3213201257368913e-05, + "loss": 0.7214, + "step": 59330 + }, + { + "epoch": 0.85, + "grad_norm": 0.58203125, + "learning_rate": 1.320076612056863e-05, + "loss": 0.9199, + "step": 59335 + }, + { + "epoch": 0.85, + "grad_norm": 0.5625, + "learning_rate": 1.3188336424443193e-05, + "loss": 0.9226, + "step": 59340 + }, + { + "epoch": 0.85, + "grad_norm": 0.5390625, + "learning_rate": 1.317591216977172e-05, + "loss": 0.8979, + "step": 59345 + }, + { + "epoch": 0.85, + "grad_norm": 0.57421875, + "learning_rate": 1.3163493357332934e-05, + "loss": 1.0728, + "step": 59350 + }, + { + "epoch": 0.85, + "grad_norm": 0.59765625, + "learning_rate": 1.315107998790529e-05, + "loss": 0.7297, + "step": 59355 + }, + { + "epoch": 0.85, + "grad_norm": 0.5390625, + "learning_rate": 1.3138672062266876e-05, + "loss": 0.9474, + "step": 59360 + }, + { + "epoch": 0.85, + "grad_norm": 0.63671875, + "learning_rate": 1.3126269581195416e-05, + "loss": 0.9479, + "step": 59365 + }, + { + "epoch": 0.85, + "grad_norm": 0.5859375, + "learning_rate": 1.3113872545468308e-05, + "loss": 0.8623, + "step": 59370 + }, + { + "epoch": 0.85, + "grad_norm": 0.58984375, + "learning_rate": 1.3101480955862644e-05, + "loss": 0.8414, + "step": 59375 + }, + { + "epoch": 0.85, + "grad_norm": 0.6328125, + "learning_rate": 1.3089094813155112e-05, + "loss": 0.9636, + "step": 59380 + }, + { + "epoch": 0.85, + "grad_norm": 0.53125, + "learning_rate": 1.3076714118122114e-05, + "loss": 0.9034, + "step": 59385 + }, + { + "epoch": 0.85, + "grad_norm": 0.5546875, + "learning_rate": 1.306433887153965e-05, + "loss": 0.8748, + "step": 59390 + }, + { + "epoch": 0.85, + "grad_norm": 0.5625, + "learning_rate": 1.3051969074183457e-05, + "loss": 1.0434, + "step": 59395 + }, + { + "epoch": 0.85, + "grad_norm": 0.63671875, + "learning_rate": 1.3039604726828847e-05, + "loss": 0.9884, + "step": 59400 + }, + { + "epoch": 0.85, + "grad_norm": 0.5703125, + "learning_rate": 1.3027245830250856e-05, + "loss": 0.8497, + "step": 59405 + }, + { + "epoch": 0.85, + "grad_norm": 0.578125, + "learning_rate": 1.3014892385224175e-05, + "loss": 0.9093, + "step": 59410 + }, + { + "epoch": 0.85, + "grad_norm": 0.58203125, + "learning_rate": 1.3002544392523076e-05, + "loss": 1.004, + "step": 59415 + }, + { + "epoch": 0.85, + "grad_norm": 0.50390625, + "learning_rate": 1.299020185292158e-05, + "loss": 0.9488, + "step": 59420 + }, + { + "epoch": 0.85, + "grad_norm": 0.5703125, + "learning_rate": 1.297786476719336e-05, + "loss": 0.7838, + "step": 59425 + }, + { + "epoch": 0.85, + "grad_norm": 0.51953125, + "learning_rate": 1.2965533136111652e-05, + "loss": 0.9595, + "step": 59430 + }, + { + "epoch": 0.85, + "grad_norm": 0.55859375, + "learning_rate": 1.2953206960449471e-05, + "loss": 1.0603, + "step": 59435 + }, + { + "epoch": 0.85, + "grad_norm": 0.61328125, + "learning_rate": 1.29408862409794e-05, + "loss": 0.9718, + "step": 59440 + }, + { + "epoch": 0.85, + "grad_norm": 0.5390625, + "learning_rate": 1.2928570978473753e-05, + "loss": 1.0246, + "step": 59445 + }, + { + "epoch": 0.85, + "grad_norm": 0.6171875, + "learning_rate": 1.2916261173704425e-05, + "loss": 0.9285, + "step": 59450 + }, + { + "epoch": 0.85, + "grad_norm": 0.55078125, + "learning_rate": 1.290395682744302e-05, + "loss": 0.8797, + "step": 59455 + }, + { + "epoch": 0.85, + "grad_norm": 0.5, + "learning_rate": 1.2891657940460822e-05, + "loss": 0.8116, + "step": 59460 + }, + { + "epoch": 0.85, + "grad_norm": 0.451171875, + "learning_rate": 1.287936451352868e-05, + "loss": 1.02, + "step": 59465 + }, + { + "epoch": 0.85, + "grad_norm": 0.51953125, + "learning_rate": 1.2867076547417223e-05, + "loss": 0.8766, + "step": 59470 + }, + { + "epoch": 0.85, + "grad_norm": 0.69140625, + "learning_rate": 1.2854794042896623e-05, + "loss": 1.0155, + "step": 59475 + }, + { + "epoch": 0.85, + "grad_norm": 0.65234375, + "learning_rate": 1.2842517000736776e-05, + "loss": 0.9822, + "step": 59480 + }, + { + "epoch": 0.85, + "grad_norm": 0.46484375, + "learning_rate": 1.2830245421707255e-05, + "loss": 0.8895, + "step": 59485 + }, + { + "epoch": 0.85, + "grad_norm": 0.609375, + "learning_rate": 1.2817979306577232e-05, + "loss": 1.0055, + "step": 59490 + }, + { + "epoch": 0.85, + "grad_norm": 0.55859375, + "learning_rate": 1.2805718656115528e-05, + "loss": 0.9778, + "step": 59495 + }, + { + "epoch": 0.85, + "grad_norm": 0.703125, + "learning_rate": 1.2793463471090695e-05, + "loss": 1.0549, + "step": 59500 + }, + { + "epoch": 0.85, + "grad_norm": 0.58984375, + "learning_rate": 1.2781213752270904e-05, + "loss": 1.0148, + "step": 59505 + }, + { + "epoch": 0.85, + "grad_norm": 1.09375, + "learning_rate": 1.2768969500423954e-05, + "loss": 0.9994, + "step": 59510 + }, + { + "epoch": 0.85, + "grad_norm": 0.546875, + "learning_rate": 1.2756730716317333e-05, + "loss": 1.0061, + "step": 59515 + }, + { + "epoch": 0.85, + "grad_norm": 0.5703125, + "learning_rate": 1.2744497400718224e-05, + "loss": 0.9772, + "step": 59520 + }, + { + "epoch": 0.85, + "grad_norm": 0.59765625, + "learning_rate": 1.2732269554393373e-05, + "loss": 0.9542, + "step": 59525 + }, + { + "epoch": 0.85, + "grad_norm": 0.5546875, + "learning_rate": 1.2720047178109252e-05, + "loss": 1.0453, + "step": 59530 + }, + { + "epoch": 0.85, + "grad_norm": 0.5234375, + "learning_rate": 1.2707830272631994e-05, + "loss": 1.0213, + "step": 59535 + }, + { + "epoch": 0.85, + "grad_norm": 0.5546875, + "learning_rate": 1.2695618838727352e-05, + "loss": 0.9817, + "step": 59540 + }, + { + "epoch": 0.85, + "grad_norm": 0.52734375, + "learning_rate": 1.268341287716074e-05, + "loss": 1.0595, + "step": 59545 + }, + { + "epoch": 0.85, + "grad_norm": 0.6171875, + "learning_rate": 1.267121238869725e-05, + "loss": 0.9901, + "step": 59550 + }, + { + "epoch": 0.85, + "grad_norm": 0.55859375, + "learning_rate": 1.2659017374101646e-05, + "loss": 0.9364, + "step": 59555 + }, + { + "epoch": 0.85, + "grad_norm": 0.5546875, + "learning_rate": 1.2646827834138286e-05, + "loss": 0.8643, + "step": 59560 + }, + { + "epoch": 0.85, + "grad_norm": 0.5703125, + "learning_rate": 1.2634643769571242e-05, + "loss": 0.9698, + "step": 59565 + }, + { + "epoch": 0.85, + "grad_norm": 0.59375, + "learning_rate": 1.2622465181164256e-05, + "loss": 0.8793, + "step": 59570 + }, + { + "epoch": 0.85, + "grad_norm": 0.60546875, + "learning_rate": 1.261029206968064e-05, + "loss": 0.8717, + "step": 59575 + }, + { + "epoch": 0.85, + "grad_norm": 0.51953125, + "learning_rate": 1.2598124435883452e-05, + "loss": 0.7989, + "step": 59580 + }, + { + "epoch": 0.85, + "grad_norm": 0.53125, + "learning_rate": 1.258596228053538e-05, + "loss": 0.9763, + "step": 59585 + }, + { + "epoch": 0.85, + "grad_norm": 0.58984375, + "learning_rate": 1.2573805604398725e-05, + "loss": 0.8852, + "step": 59590 + }, + { + "epoch": 0.85, + "grad_norm": 0.462890625, + "learning_rate": 1.2561654408235535e-05, + "loss": 1.0915, + "step": 59595 + }, + { + "epoch": 0.85, + "grad_norm": 0.5, + "learning_rate": 1.2549508692807422e-05, + "loss": 0.9351, + "step": 59600 + }, + { + "epoch": 0.86, + "grad_norm": 0.48828125, + "learning_rate": 1.2537368458875687e-05, + "loss": 0.9376, + "step": 59605 + }, + { + "epoch": 0.86, + "grad_norm": 0.55078125, + "learning_rate": 1.2525233707201311e-05, + "loss": 0.9712, + "step": 59610 + }, + { + "epoch": 0.86, + "grad_norm": 0.66015625, + "learning_rate": 1.2513104438544932e-05, + "loss": 1.1597, + "step": 59615 + }, + { + "epoch": 0.86, + "grad_norm": 0.5546875, + "learning_rate": 1.2500980653666782e-05, + "loss": 0.7914, + "step": 59620 + }, + { + "epoch": 0.86, + "grad_norm": 0.64453125, + "learning_rate": 1.2488862353326813e-05, + "loss": 0.9986, + "step": 59625 + }, + { + "epoch": 0.86, + "grad_norm": 0.6953125, + "learning_rate": 1.2476749538284648e-05, + "loss": 1.0384, + "step": 59630 + }, + { + "epoch": 0.86, + "grad_norm": 0.55078125, + "learning_rate": 1.246464220929947e-05, + "loss": 0.9533, + "step": 59635 + }, + { + "epoch": 0.86, + "grad_norm": 0.546875, + "learning_rate": 1.2452540367130228e-05, + "loss": 0.9425, + "step": 59640 + }, + { + "epoch": 0.86, + "grad_norm": 0.55859375, + "learning_rate": 1.2440444012535468e-05, + "loss": 0.8041, + "step": 59645 + }, + { + "epoch": 0.86, + "grad_norm": 0.66796875, + "learning_rate": 1.2428353146273407e-05, + "loss": 1.0403, + "step": 59650 + }, + { + "epoch": 0.86, + "grad_norm": 0.625, + "learning_rate": 1.2416267769101898e-05, + "loss": 1.0474, + "step": 59655 + }, + { + "epoch": 0.86, + "grad_norm": 0.59375, + "learning_rate": 1.2404187881778462e-05, + "loss": 1.2028, + "step": 59660 + }, + { + "epoch": 0.86, + "grad_norm": 0.59375, + "learning_rate": 1.2392113485060309e-05, + "loss": 1.0045, + "step": 59665 + }, + { + "epoch": 0.86, + "grad_norm": 0.5546875, + "learning_rate": 1.2380044579704254e-05, + "loss": 1.0195, + "step": 59670 + }, + { + "epoch": 0.86, + "grad_norm": 0.55859375, + "learning_rate": 1.236798116646678e-05, + "loss": 0.9197, + "step": 59675 + }, + { + "epoch": 0.86, + "grad_norm": 0.51953125, + "learning_rate": 1.2355923246104084e-05, + "loss": 0.9245, + "step": 59680 + }, + { + "epoch": 0.86, + "grad_norm": 0.66015625, + "learning_rate": 1.2343870819371906e-05, + "loss": 1.0074, + "step": 59685 + }, + { + "epoch": 0.86, + "grad_norm": 0.5234375, + "learning_rate": 1.2331823887025739e-05, + "loss": 0.9121, + "step": 59690 + }, + { + "epoch": 0.86, + "grad_norm": 0.53515625, + "learning_rate": 1.2319782449820727e-05, + "loss": 0.8504, + "step": 59695 + }, + { + "epoch": 0.86, + "grad_norm": 0.478515625, + "learning_rate": 1.2307746508511597e-05, + "loss": 1.005, + "step": 59700 + }, + { + "epoch": 0.86, + "grad_norm": 0.66796875, + "learning_rate": 1.229571606385278e-05, + "loss": 0.9193, + "step": 59705 + }, + { + "epoch": 0.86, + "grad_norm": 0.5234375, + "learning_rate": 1.2283691116598384e-05, + "loss": 0.8849, + "step": 59710 + }, + { + "epoch": 0.86, + "grad_norm": 0.58984375, + "learning_rate": 1.2271671667502105e-05, + "loss": 0.9522, + "step": 59715 + }, + { + "epoch": 0.86, + "grad_norm": 0.5703125, + "learning_rate": 1.2259657717317362e-05, + "loss": 0.9858, + "step": 59720 + }, + { + "epoch": 0.86, + "grad_norm": 0.5, + "learning_rate": 1.2247649266797223e-05, + "loss": 0.9662, + "step": 59725 + }, + { + "epoch": 0.86, + "grad_norm": 0.6484375, + "learning_rate": 1.223564631669435e-05, + "loss": 0.9701, + "step": 59730 + }, + { + "epoch": 0.86, + "grad_norm": 0.56640625, + "learning_rate": 1.2223648867761116e-05, + "loss": 1.1265, + "step": 59735 + }, + { + "epoch": 0.86, + "grad_norm": 0.58984375, + "learning_rate": 1.2211656920749559e-05, + "loss": 0.9027, + "step": 59740 + }, + { + "epoch": 0.86, + "grad_norm": 0.59765625, + "learning_rate": 1.2199670476411319e-05, + "loss": 0.9315, + "step": 59745 + }, + { + "epoch": 0.86, + "grad_norm": 0.5390625, + "learning_rate": 1.2187689535497738e-05, + "loss": 1.036, + "step": 59750 + }, + { + "epoch": 0.86, + "grad_norm": 0.57421875, + "learning_rate": 1.2175714098759772e-05, + "loss": 0.9804, + "step": 59755 + }, + { + "epoch": 0.86, + "grad_norm": 0.51171875, + "learning_rate": 1.2163744166948099e-05, + "loss": 0.8822, + "step": 59760 + }, + { + "epoch": 0.86, + "grad_norm": 0.6015625, + "learning_rate": 1.215177974081294e-05, + "loss": 1.0182, + "step": 59765 + }, + { + "epoch": 0.86, + "grad_norm": 0.515625, + "learning_rate": 1.2139820821104286e-05, + "loss": 0.82, + "step": 59770 + }, + { + "epoch": 0.86, + "grad_norm": 0.60546875, + "learning_rate": 1.2127867408571757e-05, + "loss": 0.862, + "step": 59775 + }, + { + "epoch": 0.86, + "grad_norm": 0.55078125, + "learning_rate": 1.2115919503964545e-05, + "loss": 0.9314, + "step": 59780 + }, + { + "epoch": 0.86, + "grad_norm": 0.60546875, + "learning_rate": 1.2103977108031595e-05, + "loss": 0.9286, + "step": 59785 + }, + { + "epoch": 0.86, + "grad_norm": 0.56640625, + "learning_rate": 1.2092040221521484e-05, + "loss": 0.8684, + "step": 59790 + }, + { + "epoch": 0.86, + "grad_norm": 0.54296875, + "learning_rate": 1.2080108845182392e-05, + "loss": 0.9665, + "step": 59795 + }, + { + "epoch": 0.86, + "grad_norm": 0.54296875, + "learning_rate": 1.2068182979762198e-05, + "loss": 0.8151, + "step": 59800 + }, + { + "epoch": 0.86, + "grad_norm": 0.53125, + "learning_rate": 1.2056262626008496e-05, + "loss": 0.8913, + "step": 59805 + }, + { + "epoch": 0.86, + "grad_norm": 0.53125, + "learning_rate": 1.2044347784668364e-05, + "loss": 0.9524, + "step": 59810 + }, + { + "epoch": 0.86, + "grad_norm": 0.55078125, + "learning_rate": 1.2032438456488692e-05, + "loss": 0.9238, + "step": 59815 + }, + { + "epoch": 0.86, + "grad_norm": 0.56640625, + "learning_rate": 1.2020534642215985e-05, + "loss": 0.9864, + "step": 59820 + }, + { + "epoch": 0.86, + "grad_norm": 0.50390625, + "learning_rate": 1.2008636342596347e-05, + "loss": 0.9358, + "step": 59825 + }, + { + "epoch": 0.86, + "grad_norm": 0.62890625, + "learning_rate": 1.199674355837559e-05, + "loss": 1.0017, + "step": 59830 + }, + { + "epoch": 0.86, + "grad_norm": 0.53515625, + "learning_rate": 1.1984856290299196e-05, + "loss": 0.9784, + "step": 59835 + }, + { + "epoch": 0.86, + "grad_norm": 0.5703125, + "learning_rate": 1.1972974539112225e-05, + "loss": 0.9955, + "step": 59840 + }, + { + "epoch": 0.86, + "grad_norm": 0.578125, + "learning_rate": 1.1961098305559471e-05, + "loss": 1.0199, + "step": 59845 + }, + { + "epoch": 0.86, + "grad_norm": 0.515625, + "learning_rate": 1.194922759038537e-05, + "loss": 0.8711, + "step": 59850 + }, + { + "epoch": 0.86, + "grad_norm": 0.515625, + "learning_rate": 1.193736239433395e-05, + "loss": 1.0284, + "step": 59855 + }, + { + "epoch": 0.86, + "grad_norm": 0.5625, + "learning_rate": 1.192550271814894e-05, + "loss": 0.8833, + "step": 59860 + }, + { + "epoch": 0.86, + "grad_norm": 0.54296875, + "learning_rate": 1.1913648562573732e-05, + "loss": 0.8689, + "step": 59865 + }, + { + "epoch": 0.86, + "grad_norm": 0.51953125, + "learning_rate": 1.1901799928351365e-05, + "loss": 0.926, + "step": 59870 + }, + { + "epoch": 0.86, + "grad_norm": 0.5625, + "learning_rate": 1.1889956816224502e-05, + "loss": 0.9446, + "step": 59875 + }, + { + "epoch": 0.86, + "grad_norm": 0.6015625, + "learning_rate": 1.1878119226935481e-05, + "loss": 1.0387, + "step": 59880 + }, + { + "epoch": 0.86, + "grad_norm": 0.49609375, + "learning_rate": 1.1866287161226341e-05, + "loss": 0.9219, + "step": 59885 + }, + { + "epoch": 0.86, + "grad_norm": 0.5625, + "learning_rate": 1.185446061983867e-05, + "loss": 0.9404, + "step": 59890 + }, + { + "epoch": 0.86, + "grad_norm": 0.48828125, + "learning_rate": 1.1842639603513805e-05, + "loss": 0.9646, + "step": 59895 + }, + { + "epoch": 0.86, + "grad_norm": 0.65234375, + "learning_rate": 1.183082411299271e-05, + "loss": 1.0031, + "step": 59900 + }, + { + "epoch": 0.86, + "grad_norm": 0.47265625, + "learning_rate": 1.181901414901595e-05, + "loss": 1.0052, + "step": 59905 + }, + { + "epoch": 0.86, + "grad_norm": 0.5390625, + "learning_rate": 1.1807209712323842e-05, + "loss": 0.9562, + "step": 59910 + }, + { + "epoch": 0.86, + "grad_norm": 0.478515625, + "learning_rate": 1.1795410803656271e-05, + "loss": 0.8217, + "step": 59915 + }, + { + "epoch": 0.86, + "grad_norm": 0.515625, + "learning_rate": 1.1783617423752779e-05, + "loss": 0.9394, + "step": 59920 + }, + { + "epoch": 0.86, + "grad_norm": 0.5078125, + "learning_rate": 1.1771829573352622e-05, + "loss": 1.0048, + "step": 59925 + }, + { + "epoch": 0.86, + "grad_norm": 0.5703125, + "learning_rate": 1.1760047253194695e-05, + "loss": 1.036, + "step": 59930 + }, + { + "epoch": 0.86, + "grad_norm": 0.56640625, + "learning_rate": 1.1748270464017475e-05, + "loss": 0.9587, + "step": 59935 + }, + { + "epoch": 0.86, + "grad_norm": 0.58984375, + "learning_rate": 1.1736499206559182e-05, + "loss": 0.8339, + "step": 59940 + }, + { + "epoch": 0.86, + "grad_norm": 0.58984375, + "learning_rate": 1.172473348155766e-05, + "loss": 0.9504, + "step": 59945 + }, + { + "epoch": 0.86, + "grad_norm": 0.58203125, + "learning_rate": 1.1712973289750351e-05, + "loss": 0.8442, + "step": 59950 + }, + { + "epoch": 0.86, + "grad_norm": 0.63671875, + "learning_rate": 1.1701218631874434e-05, + "loss": 0.9779, + "step": 59955 + }, + { + "epoch": 0.86, + "grad_norm": 0.6015625, + "learning_rate": 1.1689469508666718e-05, + "loss": 1.0213, + "step": 59960 + }, + { + "epoch": 0.86, + "grad_norm": 0.5234375, + "learning_rate": 1.1677725920863625e-05, + "loss": 1.0294, + "step": 59965 + }, + { + "epoch": 0.86, + "grad_norm": 0.55078125, + "learning_rate": 1.1665987869201245e-05, + "loss": 0.96, + "step": 59970 + }, + { + "epoch": 0.86, + "grad_norm": 0.57421875, + "learning_rate": 1.1654255354415356e-05, + "loss": 0.9051, + "step": 59975 + }, + { + "epoch": 0.86, + "grad_norm": 0.53515625, + "learning_rate": 1.1642528377241369e-05, + "loss": 0.8809, + "step": 59980 + }, + { + "epoch": 0.86, + "grad_norm": 0.5390625, + "learning_rate": 1.1630806938414318e-05, + "loss": 0.9754, + "step": 59985 + }, + { + "epoch": 0.86, + "grad_norm": 0.515625, + "learning_rate": 1.1619091038668928e-05, + "loss": 0.8443, + "step": 59990 + }, + { + "epoch": 0.86, + "grad_norm": 0.5234375, + "learning_rate": 1.1607380678739598e-05, + "loss": 0.949, + "step": 59995 + }, + { + "epoch": 0.86, + "grad_norm": 0.5390625, + "learning_rate": 1.1595675859360288e-05, + "loss": 0.9439, + "step": 60000 + }, + { + "epoch": 0.86, + "grad_norm": 0.5234375, + "learning_rate": 1.158397658126471e-05, + "loss": 0.8939, + "step": 60005 + }, + { + "epoch": 0.86, + "grad_norm": 0.56640625, + "learning_rate": 1.1572282845186188e-05, + "loss": 0.9047, + "step": 60010 + }, + { + "epoch": 0.86, + "grad_norm": 0.54296875, + "learning_rate": 1.1560594651857692e-05, + "loss": 0.9178, + "step": 60015 + }, + { + "epoch": 0.86, + "grad_norm": 0.546875, + "learning_rate": 1.1548912002011825e-05, + "loss": 0.7978, + "step": 60020 + }, + { + "epoch": 0.86, + "grad_norm": 0.50390625, + "learning_rate": 1.1537234896380922e-05, + "loss": 0.9594, + "step": 60025 + }, + { + "epoch": 0.86, + "grad_norm": 0.515625, + "learning_rate": 1.1525563335696854e-05, + "loss": 0.887, + "step": 60030 + }, + { + "epoch": 0.86, + "grad_norm": 0.5234375, + "learning_rate": 1.1513897320691236e-05, + "loss": 0.9302, + "step": 60035 + }, + { + "epoch": 0.86, + "grad_norm": 0.59375, + "learning_rate": 1.1502236852095338e-05, + "loss": 0.8694, + "step": 60040 + }, + { + "epoch": 0.86, + "grad_norm": 0.5703125, + "learning_rate": 1.149058193064001e-05, + "loss": 0.9651, + "step": 60045 + }, + { + "epoch": 0.86, + "grad_norm": 0.54296875, + "learning_rate": 1.1478932557055799e-05, + "loss": 1.0108, + "step": 60050 + }, + { + "epoch": 0.86, + "grad_norm": 0.51953125, + "learning_rate": 1.1467288732072944e-05, + "loss": 0.9062, + "step": 60055 + }, + { + "epoch": 0.86, + "grad_norm": 0.5703125, + "learning_rate": 1.1455650456421241e-05, + "loss": 1.0262, + "step": 60060 + }, + { + "epoch": 0.86, + "grad_norm": 0.60546875, + "learning_rate": 1.1444017730830226e-05, + "loss": 1.0358, + "step": 60065 + }, + { + "epoch": 0.86, + "grad_norm": 0.5546875, + "learning_rate": 1.1432390556029015e-05, + "loss": 1.0133, + "step": 60070 + }, + { + "epoch": 0.86, + "grad_norm": 0.5078125, + "learning_rate": 1.142076893274645e-05, + "loss": 0.8855, + "step": 60075 + }, + { + "epoch": 0.86, + "grad_norm": 0.59375, + "learning_rate": 1.1409152861710959e-05, + "loss": 1.0818, + "step": 60080 + }, + { + "epoch": 0.86, + "grad_norm": 0.6015625, + "learning_rate": 1.1397542343650657e-05, + "loss": 1.052, + "step": 60085 + }, + { + "epoch": 0.86, + "grad_norm": 0.609375, + "learning_rate": 1.1385937379293343e-05, + "loss": 1.0159, + "step": 60090 + }, + { + "epoch": 0.86, + "grad_norm": 0.50390625, + "learning_rate": 1.1374337969366367e-05, + "loss": 0.822, + "step": 60095 + }, + { + "epoch": 0.86, + "grad_norm": 0.498046875, + "learning_rate": 1.1362744114596823e-05, + "loss": 1.0121, + "step": 60100 + }, + { + "epoch": 0.86, + "grad_norm": 0.578125, + "learning_rate": 1.1351155815711445e-05, + "loss": 0.8235, + "step": 60105 + }, + { + "epoch": 0.86, + "grad_norm": 0.53125, + "learning_rate": 1.133957307343657e-05, + "loss": 0.9591, + "step": 60110 + }, + { + "epoch": 0.86, + "grad_norm": 0.609375, + "learning_rate": 1.1327995888498233e-05, + "loss": 1.0327, + "step": 60115 + }, + { + "epoch": 0.86, + "grad_norm": 0.578125, + "learning_rate": 1.1316424261622116e-05, + "loss": 1.0982, + "step": 60120 + }, + { + "epoch": 0.86, + "grad_norm": 0.55859375, + "learning_rate": 1.130485819353353e-05, + "loss": 1.0098, + "step": 60125 + }, + { + "epoch": 0.86, + "grad_norm": 0.5078125, + "learning_rate": 1.1293297684957427e-05, + "loss": 0.965, + "step": 60130 + }, + { + "epoch": 0.86, + "grad_norm": 0.51171875, + "learning_rate": 1.1281742736618451e-05, + "loss": 0.8862, + "step": 60135 + }, + { + "epoch": 0.86, + "grad_norm": 0.5, + "learning_rate": 1.127019334924091e-05, + "loss": 0.9905, + "step": 60140 + }, + { + "epoch": 0.86, + "grad_norm": 0.53515625, + "learning_rate": 1.1258649523548681e-05, + "loss": 0.919, + "step": 60145 + }, + { + "epoch": 0.86, + "grad_norm": 0.53515625, + "learning_rate": 1.1247111260265386e-05, + "loss": 0.93, + "step": 60150 + }, + { + "epoch": 0.86, + "grad_norm": 0.55859375, + "learning_rate": 1.1235578560114212e-05, + "loss": 0.87, + "step": 60155 + }, + { + "epoch": 0.86, + "grad_norm": 0.53125, + "learning_rate": 1.1224051423818083e-05, + "loss": 0.9597, + "step": 60160 + }, + { + "epoch": 0.86, + "grad_norm": 0.53515625, + "learning_rate": 1.121252985209953e-05, + "loss": 0.9734, + "step": 60165 + }, + { + "epoch": 0.86, + "grad_norm": 0.54296875, + "learning_rate": 1.1201013845680719e-05, + "loss": 0.8142, + "step": 60170 + }, + { + "epoch": 0.86, + "grad_norm": 0.5625, + "learning_rate": 1.1189503405283486e-05, + "loss": 1.0303, + "step": 60175 + }, + { + "epoch": 0.86, + "grad_norm": 0.6171875, + "learning_rate": 1.117799853162932e-05, + "loss": 1.0191, + "step": 60180 + }, + { + "epoch": 0.86, + "grad_norm": 0.52734375, + "learning_rate": 1.1166499225439387e-05, + "loss": 0.956, + "step": 60185 + }, + { + "epoch": 0.86, + "grad_norm": 0.61328125, + "learning_rate": 1.1155005487434433e-05, + "loss": 0.9282, + "step": 60190 + }, + { + "epoch": 0.86, + "grad_norm": 0.609375, + "learning_rate": 1.1143517318334928e-05, + "loss": 0.9121, + "step": 60195 + }, + { + "epoch": 0.86, + "grad_norm": 0.71484375, + "learning_rate": 1.1132034718860973e-05, + "loss": 1.0497, + "step": 60200 + }, + { + "epoch": 0.86, + "grad_norm": 0.68359375, + "learning_rate": 1.112055768973227e-05, + "loss": 0.9809, + "step": 60205 + }, + { + "epoch": 0.86, + "grad_norm": 0.5546875, + "learning_rate": 1.1109086231668243e-05, + "loss": 0.934, + "step": 60210 + }, + { + "epoch": 0.86, + "grad_norm": 0.54296875, + "learning_rate": 1.1097620345387938e-05, + "loss": 0.88, + "step": 60215 + }, + { + "epoch": 0.86, + "grad_norm": 0.6015625, + "learning_rate": 1.1086160031610038e-05, + "loss": 1.0801, + "step": 60220 + }, + { + "epoch": 0.86, + "grad_norm": 0.65234375, + "learning_rate": 1.10747052910529e-05, + "loss": 0.9801, + "step": 60225 + }, + { + "epoch": 0.86, + "grad_norm": 0.625, + "learning_rate": 1.1063256124434496e-05, + "loss": 0.926, + "step": 60230 + }, + { + "epoch": 0.86, + "grad_norm": 0.75, + "learning_rate": 1.1051812532472505e-05, + "loss": 0.9908, + "step": 60235 + }, + { + "epoch": 0.86, + "grad_norm": 0.58984375, + "learning_rate": 1.1040374515884189e-05, + "loss": 0.9456, + "step": 60240 + }, + { + "epoch": 0.86, + "grad_norm": 0.62890625, + "learning_rate": 1.1028942075386517e-05, + "loss": 1.0099, + "step": 60245 + }, + { + "epoch": 0.86, + "grad_norm": 0.51953125, + "learning_rate": 1.1017515211696095e-05, + "loss": 0.9202, + "step": 60250 + }, + { + "epoch": 0.86, + "grad_norm": 0.5546875, + "learning_rate": 1.100609392552916e-05, + "loss": 0.9572, + "step": 60255 + }, + { + "epoch": 0.86, + "grad_norm": 0.58203125, + "learning_rate": 1.0994678217601595e-05, + "loss": 1.1325, + "step": 60260 + }, + { + "epoch": 0.86, + "grad_norm": 0.55078125, + "learning_rate": 1.0983268088629006e-05, + "loss": 0.9196, + "step": 60265 + }, + { + "epoch": 0.86, + "grad_norm": 0.59375, + "learning_rate": 1.0971863539326521e-05, + "loss": 1.079, + "step": 60270 + }, + { + "epoch": 0.86, + "grad_norm": 0.51953125, + "learning_rate": 1.0960464570409045e-05, + "loss": 0.9669, + "step": 60275 + }, + { + "epoch": 0.86, + "grad_norm": 0.53125, + "learning_rate": 1.0949071182591064e-05, + "loss": 1.0241, + "step": 60280 + }, + { + "epoch": 0.86, + "grad_norm": 0.625, + "learning_rate": 1.0937683376586693e-05, + "loss": 0.7955, + "step": 60285 + }, + { + "epoch": 0.86, + "grad_norm": 0.55859375, + "learning_rate": 1.0926301153109774e-05, + "loss": 0.9317, + "step": 60290 + }, + { + "epoch": 0.86, + "grad_norm": 0.56640625, + "learning_rate": 1.0914924512873759e-05, + "loss": 0.9039, + "step": 60295 + }, + { + "epoch": 0.86, + "grad_norm": 0.55078125, + "learning_rate": 1.0903553456591709e-05, + "loss": 0.9029, + "step": 60300 + }, + { + "epoch": 0.87, + "grad_norm": 0.6328125, + "learning_rate": 1.089218798497641e-05, + "loss": 0.9796, + "step": 60305 + }, + { + "epoch": 0.87, + "grad_norm": 0.5546875, + "learning_rate": 1.088082809874027e-05, + "loss": 1.0671, + "step": 60310 + }, + { + "epoch": 0.87, + "grad_norm": 0.45703125, + "learning_rate": 1.0869473798595298e-05, + "loss": 0.7776, + "step": 60315 + }, + { + "epoch": 0.87, + "grad_norm": 0.55859375, + "learning_rate": 1.0858125085253223e-05, + "loss": 0.9387, + "step": 60320 + }, + { + "epoch": 0.87, + "grad_norm": 0.60546875, + "learning_rate": 1.084678195942541e-05, + "loss": 0.9069, + "step": 60325 + }, + { + "epoch": 0.87, + "grad_norm": 0.5546875, + "learning_rate": 1.0835444421822848e-05, + "loss": 1.0661, + "step": 60330 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 1.0824112473156157e-05, + "loss": 0.9517, + "step": 60335 + }, + { + "epoch": 0.87, + "grad_norm": 0.5234375, + "learning_rate": 1.0812786114135653e-05, + "loss": 1.0505, + "step": 60340 + }, + { + "epoch": 0.87, + "grad_norm": 0.59375, + "learning_rate": 1.0801465345471318e-05, + "loss": 1.0632, + "step": 60345 + }, + { + "epoch": 0.87, + "grad_norm": 0.515625, + "learning_rate": 1.0790150167872703e-05, + "loss": 0.9453, + "step": 60350 + }, + { + "epoch": 0.87, + "grad_norm": 0.52734375, + "learning_rate": 1.0778840582049066e-05, + "loss": 0.9767, + "step": 60355 + }, + { + "epoch": 0.87, + "grad_norm": 0.58203125, + "learning_rate": 1.0767536588709349e-05, + "loss": 0.9211, + "step": 60360 + }, + { + "epoch": 0.87, + "grad_norm": 0.625, + "learning_rate": 1.075623818856204e-05, + "loss": 0.9557, + "step": 60365 + }, + { + "epoch": 0.87, + "grad_norm": 0.63671875, + "learning_rate": 1.0744945382315364e-05, + "loss": 0.9671, + "step": 60370 + }, + { + "epoch": 0.87, + "grad_norm": 1.6015625, + "learning_rate": 1.0733658170677185e-05, + "loss": 1.0677, + "step": 60375 + }, + { + "epoch": 0.87, + "grad_norm": 0.62890625, + "learning_rate": 1.0722376554354973e-05, + "loss": 1.0411, + "step": 60380 + }, + { + "epoch": 0.87, + "grad_norm": 0.59765625, + "learning_rate": 1.0711100534055862e-05, + "loss": 0.9604, + "step": 60385 + }, + { + "epoch": 0.87, + "grad_norm": 0.6015625, + "learning_rate": 1.0699830110486675e-05, + "loss": 1.0111, + "step": 60390 + }, + { + "epoch": 0.87, + "grad_norm": 0.58203125, + "learning_rate": 1.0688565284353825e-05, + "loss": 1.0119, + "step": 60395 + }, + { + "epoch": 0.87, + "grad_norm": 0.56640625, + "learning_rate": 1.0677306056363424e-05, + "loss": 0.9933, + "step": 60400 + }, + { + "epoch": 0.87, + "grad_norm": 0.6015625, + "learning_rate": 1.066605242722123e-05, + "loss": 1.035, + "step": 60405 + }, + { + "epoch": 0.87, + "grad_norm": 0.498046875, + "learning_rate": 1.065480439763258e-05, + "loss": 0.7821, + "step": 60410 + }, + { + "epoch": 0.87, + "grad_norm": 0.484375, + "learning_rate": 1.0643561968302552e-05, + "loss": 0.9285, + "step": 60415 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 1.063232513993585e-05, + "loss": 1.0274, + "step": 60420 + }, + { + "epoch": 0.87, + "grad_norm": 0.70703125, + "learning_rate": 1.062109391323678e-05, + "loss": 0.9299, + "step": 60425 + }, + { + "epoch": 0.87, + "grad_norm": 0.546875, + "learning_rate": 1.0609868288909319e-05, + "loss": 0.9836, + "step": 60430 + }, + { + "epoch": 0.87, + "grad_norm": 0.546875, + "learning_rate": 1.0598648267657151e-05, + "loss": 0.9447, + "step": 60435 + }, + { + "epoch": 0.87, + "grad_norm": 0.55078125, + "learning_rate": 1.0587433850183525e-05, + "loss": 0.8362, + "step": 60440 + }, + { + "epoch": 0.87, + "grad_norm": 0.56640625, + "learning_rate": 1.0576225037191367e-05, + "loss": 0.9271, + "step": 60445 + }, + { + "epoch": 0.87, + "grad_norm": 0.53125, + "learning_rate": 1.056502182938326e-05, + "loss": 0.976, + "step": 60450 + }, + { + "epoch": 0.87, + "grad_norm": 0.5390625, + "learning_rate": 1.0553824227461461e-05, + "loss": 0.9894, + "step": 60455 + }, + { + "epoch": 0.87, + "grad_norm": 0.6875, + "learning_rate": 1.0542632232127814e-05, + "loss": 1.0547, + "step": 60460 + }, + { + "epoch": 0.87, + "grad_norm": 0.51953125, + "learning_rate": 1.0531445844083865e-05, + "loss": 0.9005, + "step": 60465 + }, + { + "epoch": 0.87, + "grad_norm": 0.45703125, + "learning_rate": 1.05202650640308e-05, + "loss": 0.9292, + "step": 60470 + }, + { + "epoch": 0.87, + "grad_norm": 0.6796875, + "learning_rate": 1.0509089892669411e-05, + "loss": 0.8289, + "step": 60475 + }, + { + "epoch": 0.87, + "grad_norm": 0.5, + "learning_rate": 1.0497920330700195e-05, + "loss": 0.9028, + "step": 60480 + }, + { + "epoch": 0.87, + "grad_norm": 0.5546875, + "learning_rate": 1.0486756378823304e-05, + "loss": 0.8278, + "step": 60485 + }, + { + "epoch": 0.87, + "grad_norm": 0.546875, + "learning_rate": 1.0475598037738433e-05, + "loss": 0.9213, + "step": 60490 + }, + { + "epoch": 0.87, + "grad_norm": 0.5390625, + "learning_rate": 1.0464445308145022e-05, + "loss": 0.9051, + "step": 60495 + }, + { + "epoch": 0.87, + "grad_norm": 0.55078125, + "learning_rate": 1.0453298190742189e-05, + "loss": 1.0026, + "step": 60500 + }, + { + "epoch": 0.87, + "grad_norm": 0.53515625, + "learning_rate": 1.0442156686228576e-05, + "loss": 0.9675, + "step": 60505 + }, + { + "epoch": 0.87, + "grad_norm": 0.6015625, + "learning_rate": 1.0431020795302592e-05, + "loss": 1.1873, + "step": 60510 + }, + { + "epoch": 0.87, + "grad_norm": 0.58984375, + "learning_rate": 1.0419890518662256e-05, + "loss": 0.9899, + "step": 60515 + }, + { + "epoch": 0.87, + "grad_norm": 0.6015625, + "learning_rate": 1.0408765857005177e-05, + "loss": 1.0228, + "step": 60520 + }, + { + "epoch": 0.87, + "grad_norm": 0.61328125, + "learning_rate": 1.0397646811028695e-05, + "loss": 1.025, + "step": 60525 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 1.0386533381429775e-05, + "loss": 0.8756, + "step": 60530 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 1.0375425568904984e-05, + "loss": 1.1357, + "step": 60535 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 1.0364323374150619e-05, + "loss": 0.9211, + "step": 60540 + }, + { + "epoch": 0.87, + "grad_norm": 0.51171875, + "learning_rate": 1.0353226797862526e-05, + "loss": 0.9579, + "step": 60545 + }, + { + "epoch": 0.87, + "grad_norm": 0.46875, + "learning_rate": 1.03421358407363e-05, + "loss": 0.9193, + "step": 60550 + }, + { + "epoch": 0.87, + "grad_norm": 0.59375, + "learning_rate": 1.0331050503467088e-05, + "loss": 1.0952, + "step": 60555 + }, + { + "epoch": 0.87, + "grad_norm": 0.5234375, + "learning_rate": 1.0319970786749755e-05, + "loss": 1.0066, + "step": 60560 + }, + { + "epoch": 0.87, + "grad_norm": 0.5546875, + "learning_rate": 1.0308896691278812e-05, + "loss": 0.8826, + "step": 60565 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 1.029782821774835e-05, + "loss": 0.9592, + "step": 60570 + }, + { + "epoch": 0.87, + "grad_norm": 0.5546875, + "learning_rate": 1.028676536685218e-05, + "loss": 0.8765, + "step": 60575 + }, + { + "epoch": 0.87, + "grad_norm": 0.53125, + "learning_rate": 1.0275708139283746e-05, + "loss": 0.8267, + "step": 60580 + }, + { + "epoch": 0.87, + "grad_norm": 0.53125, + "learning_rate": 1.0264656535736106e-05, + "loss": 0.9003, + "step": 60585 + }, + { + "epoch": 0.87, + "grad_norm": 0.51953125, + "learning_rate": 1.0253610556902005e-05, + "loss": 0.9266, + "step": 60590 + }, + { + "epoch": 0.87, + "grad_norm": 0.51171875, + "learning_rate": 1.0242570203473811e-05, + "loss": 0.9141, + "step": 60595 + }, + { + "epoch": 0.87, + "grad_norm": 0.6484375, + "learning_rate": 1.0231535476143516e-05, + "loss": 0.9906, + "step": 60600 + }, + { + "epoch": 0.87, + "grad_norm": 0.6015625, + "learning_rate": 1.0220506375602823e-05, + "loss": 0.9491, + "step": 60605 + }, + { + "epoch": 0.87, + "grad_norm": 0.55078125, + "learning_rate": 1.0209482902543055e-05, + "loss": 0.8141, + "step": 60610 + }, + { + "epoch": 0.87, + "grad_norm": 0.515625, + "learning_rate": 1.0198465057655137e-05, + "loss": 0.9482, + "step": 60615 + }, + { + "epoch": 0.87, + "grad_norm": 0.65625, + "learning_rate": 1.0187452841629718e-05, + "loss": 0.9945, + "step": 60620 + }, + { + "epoch": 0.87, + "grad_norm": 0.609375, + "learning_rate": 1.0176446255157058e-05, + "loss": 0.8699, + "step": 60625 + }, + { + "epoch": 0.87, + "grad_norm": 0.486328125, + "learning_rate": 1.0165445298927023e-05, + "loss": 0.8981, + "step": 60630 + }, + { + "epoch": 0.87, + "grad_norm": 0.60546875, + "learning_rate": 1.0154449973629188e-05, + "loss": 1.1487, + "step": 60635 + }, + { + "epoch": 0.87, + "grad_norm": 0.4609375, + "learning_rate": 1.0143460279952777e-05, + "loss": 0.8834, + "step": 60640 + }, + { + "epoch": 0.87, + "grad_norm": 0.546875, + "learning_rate": 1.0132476218586618e-05, + "loss": 1.0051, + "step": 60645 + }, + { + "epoch": 0.87, + "grad_norm": 0.62109375, + "learning_rate": 1.012149779021917e-05, + "loss": 0.8582, + "step": 60650 + }, + { + "epoch": 0.87, + "grad_norm": 0.5390625, + "learning_rate": 1.0110524995538617e-05, + "loss": 1.0623, + "step": 60655 + }, + { + "epoch": 0.87, + "grad_norm": 0.515625, + "learning_rate": 1.0099557835232743e-05, + "loss": 1.0097, + "step": 60660 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 1.0088596309988952e-05, + "loss": 1.0686, + "step": 60665 + }, + { + "epoch": 0.87, + "grad_norm": 0.609375, + "learning_rate": 1.0077640420494349e-05, + "loss": 1.0395, + "step": 60670 + }, + { + "epoch": 0.87, + "grad_norm": 0.53125, + "learning_rate": 1.0066690167435677e-05, + "loss": 0.8752, + "step": 60675 + }, + { + "epoch": 0.87, + "grad_norm": 0.5546875, + "learning_rate": 1.0055745551499263e-05, + "loss": 0.8251, + "step": 60680 + }, + { + "epoch": 0.87, + "grad_norm": 0.48046875, + "learning_rate": 1.0044806573371168e-05, + "loss": 0.7511, + "step": 60685 + }, + { + "epoch": 0.87, + "grad_norm": 0.53515625, + "learning_rate": 1.0033873233737056e-05, + "loss": 1.025, + "step": 60690 + }, + { + "epoch": 0.87, + "grad_norm": 0.55859375, + "learning_rate": 1.0022945533282235e-05, + "loss": 0.9772, + "step": 60695 + }, + { + "epoch": 0.87, + "grad_norm": 0.49609375, + "learning_rate": 1.0012023472691655e-05, + "loss": 0.993, + "step": 60700 + }, + { + "epoch": 0.87, + "grad_norm": 0.5546875, + "learning_rate": 1.0001107052649939e-05, + "loss": 0.8906, + "step": 60705 + }, + { + "epoch": 0.87, + "grad_norm": 0.63671875, + "learning_rate": 9.990196273841312e-06, + "loss": 0.9415, + "step": 60710 + }, + { + "epoch": 0.87, + "grad_norm": 0.515625, + "learning_rate": 9.979291136949708e-06, + "loss": 1.1337, + "step": 60715 + }, + { + "epoch": 0.87, + "grad_norm": 0.5546875, + "learning_rate": 9.96839164265867e-06, + "loss": 0.8363, + "step": 60720 + }, + { + "epoch": 0.87, + "grad_norm": 0.5390625, + "learning_rate": 9.95749779165136e-06, + "loss": 0.9499, + "step": 60725 + }, + { + "epoch": 0.87, + "grad_norm": 0.50390625, + "learning_rate": 9.946609584610644e-06, + "loss": 0.8801, + "step": 60730 + }, + { + "epoch": 0.87, + "grad_norm": 0.52734375, + "learning_rate": 9.935727022219022e-06, + "loss": 0.8072, + "step": 60735 + }, + { + "epoch": 0.87, + "grad_norm": 0.5625, + "learning_rate": 9.924850105158578e-06, + "loss": 0.8885, + "step": 60740 + }, + { + "epoch": 0.87, + "grad_norm": 0.609375, + "learning_rate": 9.913978834111126e-06, + "loss": 0.9325, + "step": 60745 + }, + { + "epoch": 0.87, + "grad_norm": 0.51171875, + "learning_rate": 9.903113209758096e-06, + "loss": 0.981, + "step": 60750 + }, + { + "epoch": 0.87, + "grad_norm": 0.54296875, + "learning_rate": 9.892253232780536e-06, + "loss": 0.8377, + "step": 60755 + }, + { + "epoch": 0.87, + "grad_norm": 0.6015625, + "learning_rate": 9.881398903859152e-06, + "loss": 0.9308, + "step": 60760 + }, + { + "epoch": 0.87, + "grad_norm": 0.62109375, + "learning_rate": 9.870550223674323e-06, + "loss": 1.1031, + "step": 60765 + }, + { + "epoch": 0.87, + "grad_norm": 0.59375, + "learning_rate": 9.859707192906076e-06, + "loss": 0.7768, + "step": 60770 + }, + { + "epoch": 0.87, + "grad_norm": 0.625, + "learning_rate": 9.848869812234018e-06, + "loss": 1.0453, + "step": 60775 + }, + { + "epoch": 0.87, + "grad_norm": 0.578125, + "learning_rate": 9.838038082337474e-06, + "loss": 0.9651, + "step": 60780 + }, + { + "epoch": 0.87, + "grad_norm": 0.609375, + "learning_rate": 9.827212003895414e-06, + "loss": 1.0032, + "step": 60785 + }, + { + "epoch": 0.87, + "grad_norm": 0.46875, + "learning_rate": 9.816391577586381e-06, + "loss": 1.1093, + "step": 60790 + }, + { + "epoch": 0.87, + "grad_norm": 0.53515625, + "learning_rate": 9.805576804088633e-06, + "loss": 0.8107, + "step": 60795 + }, + { + "epoch": 0.87, + "grad_norm": 0.5546875, + "learning_rate": 9.794767684080076e-06, + "loss": 1.0138, + "step": 60800 + }, + { + "epoch": 0.87, + "grad_norm": 0.546875, + "learning_rate": 9.783964218238206e-06, + "loss": 0.9424, + "step": 60805 + }, + { + "epoch": 0.87, + "grad_norm": 0.74609375, + "learning_rate": 9.773166407240197e-06, + "loss": 0.9967, + "step": 60810 + }, + { + "epoch": 0.87, + "grad_norm": 0.546875, + "learning_rate": 9.762374251762873e-06, + "loss": 0.8841, + "step": 60815 + }, + { + "epoch": 0.87, + "grad_norm": 0.5390625, + "learning_rate": 9.751587752482728e-06, + "loss": 1.014, + "step": 60820 + }, + { + "epoch": 0.87, + "grad_norm": 0.57421875, + "learning_rate": 9.740806910075829e-06, + "loss": 1.0144, + "step": 60825 + }, + { + "epoch": 0.87, + "grad_norm": 0.51171875, + "learning_rate": 9.73003172521796e-06, + "loss": 0.9608, + "step": 60830 + }, + { + "epoch": 0.87, + "grad_norm": 0.51171875, + "learning_rate": 9.7192621985845e-06, + "loss": 0.9867, + "step": 60835 + }, + { + "epoch": 0.87, + "grad_norm": 0.5625, + "learning_rate": 9.708498330850502e-06, + "loss": 0.9342, + "step": 60840 + }, + { + "epoch": 0.87, + "grad_norm": 0.4296875, + "learning_rate": 9.697740122690679e-06, + "loss": 0.9266, + "step": 60845 + }, + { + "epoch": 0.87, + "grad_norm": 0.6640625, + "learning_rate": 9.686987574779338e-06, + "loss": 1.1289, + "step": 60850 + }, + { + "epoch": 0.87, + "grad_norm": 0.51953125, + "learning_rate": 9.676240687790484e-06, + "loss": 0.8821, + "step": 60855 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 9.66549946239772e-06, + "loss": 0.9076, + "step": 60860 + }, + { + "epoch": 0.87, + "grad_norm": 0.6484375, + "learning_rate": 9.654763899274355e-06, + "loss": 0.9685, + "step": 60865 + }, + { + "epoch": 0.87, + "grad_norm": 0.55078125, + "learning_rate": 9.64403399909326e-06, + "loss": 0.9358, + "step": 60870 + }, + { + "epoch": 0.87, + "grad_norm": 0.625, + "learning_rate": 9.63330976252702e-06, + "loss": 1.0837, + "step": 60875 + }, + { + "epoch": 0.87, + "grad_norm": 0.51171875, + "learning_rate": 9.622591190247864e-06, + "loss": 0.8836, + "step": 60880 + }, + { + "epoch": 0.87, + "grad_norm": 0.66796875, + "learning_rate": 9.6118782829276e-06, + "loss": 1.0149, + "step": 60885 + }, + { + "epoch": 0.87, + "grad_norm": 0.48046875, + "learning_rate": 9.601171041237756e-06, + "loss": 0.9759, + "step": 60890 + }, + { + "epoch": 0.87, + "grad_norm": 0.63671875, + "learning_rate": 9.590469465849484e-06, + "loss": 0.9763, + "step": 60895 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 9.579773557433535e-06, + "loss": 0.9364, + "step": 60900 + }, + { + "epoch": 0.87, + "grad_norm": 0.5, + "learning_rate": 9.569083316660377e-06, + "loss": 0.8355, + "step": 60905 + }, + { + "epoch": 0.87, + "grad_norm": 0.66796875, + "learning_rate": 9.558398744200048e-06, + "loss": 0.9732, + "step": 60910 + }, + { + "epoch": 0.87, + "grad_norm": 0.6015625, + "learning_rate": 9.547719840722301e-06, + "loss": 0.9637, + "step": 60915 + }, + { + "epoch": 0.87, + "grad_norm": 0.6171875, + "learning_rate": 9.537046606896482e-06, + "loss": 1.1266, + "step": 60920 + }, + { + "epoch": 0.87, + "grad_norm": 0.498046875, + "learning_rate": 9.52637904339162e-06, + "loss": 0.8926, + "step": 60925 + }, + { + "epoch": 0.87, + "grad_norm": 0.58984375, + "learning_rate": 9.515717150876369e-06, + "loss": 0.8966, + "step": 60930 + }, + { + "epoch": 0.87, + "grad_norm": 0.482421875, + "learning_rate": 9.505060930018995e-06, + "loss": 0.9604, + "step": 60935 + }, + { + "epoch": 0.87, + "grad_norm": 0.55078125, + "learning_rate": 9.494410381487474e-06, + "loss": 0.8663, + "step": 60940 + }, + { + "epoch": 0.87, + "grad_norm": 0.59375, + "learning_rate": 9.483765505949405e-06, + "loss": 0.9535, + "step": 60945 + }, + { + "epoch": 0.87, + "grad_norm": 0.5703125, + "learning_rate": 9.473126304071989e-06, + "loss": 0.9091, + "step": 60950 + }, + { + "epoch": 0.87, + "grad_norm": 0.53125, + "learning_rate": 9.462492776522114e-06, + "loss": 0.9805, + "step": 60955 + }, + { + "epoch": 0.87, + "grad_norm": 0.58203125, + "learning_rate": 9.451864923966336e-06, + "loss": 0.9339, + "step": 60960 + }, + { + "epoch": 0.87, + "grad_norm": 0.58984375, + "learning_rate": 9.441242747070766e-06, + "loss": 0.9978, + "step": 60965 + }, + { + "epoch": 0.87, + "grad_norm": 0.53125, + "learning_rate": 9.430626246501228e-06, + "loss": 0.9269, + "step": 60970 + }, + { + "epoch": 0.87, + "grad_norm": 0.859375, + "learning_rate": 9.420015422923212e-06, + "loss": 1.0158, + "step": 60975 + }, + { + "epoch": 0.87, + "grad_norm": 0.6875, + "learning_rate": 9.409410277001762e-06, + "loss": 0.9552, + "step": 60980 + }, + { + "epoch": 0.87, + "grad_norm": 0.578125, + "learning_rate": 9.39881080940166e-06, + "loss": 1.0178, + "step": 60985 + }, + { + "epoch": 0.87, + "grad_norm": 0.625, + "learning_rate": 9.388217020787304e-06, + "loss": 0.9533, + "step": 60990 + }, + { + "epoch": 0.87, + "grad_norm": 0.4921875, + "learning_rate": 9.377628911822678e-06, + "loss": 0.9394, + "step": 60995 + }, + { + "epoch": 0.88, + "grad_norm": 0.56640625, + "learning_rate": 9.367046483171482e-06, + "loss": 0.9528, + "step": 61000 + }, + { + "epoch": 0.88, + "grad_norm": 0.55859375, + "learning_rate": 9.356469735497053e-06, + "loss": 1.0007, + "step": 61005 + }, + { + "epoch": 0.88, + "grad_norm": 0.54296875, + "learning_rate": 9.345898669462338e-06, + "loss": 0.9128, + "step": 61010 + }, + { + "epoch": 0.88, + "grad_norm": 0.55859375, + "learning_rate": 9.33533328572992e-06, + "loss": 0.997, + "step": 61015 + }, + { + "epoch": 0.88, + "grad_norm": 0.546875, + "learning_rate": 9.324773584962077e-06, + "loss": 0.9481, + "step": 61020 + }, + { + "epoch": 0.88, + "grad_norm": 0.5390625, + "learning_rate": 9.314219567820703e-06, + "loss": 0.899, + "step": 61025 + }, + { + "epoch": 0.88, + "grad_norm": 0.6328125, + "learning_rate": 9.303671234967315e-06, + "loss": 1.0267, + "step": 61030 + }, + { + "epoch": 0.88, + "grad_norm": 0.58984375, + "learning_rate": 9.293128587063115e-06, + "loss": 0.9971, + "step": 61035 + }, + { + "epoch": 0.88, + "grad_norm": 0.58203125, + "learning_rate": 9.282591624768932e-06, + "loss": 0.9922, + "step": 61040 + }, + { + "epoch": 0.88, + "grad_norm": 0.58203125, + "learning_rate": 9.272060348745216e-06, + "loss": 1.1262, + "step": 61045 + }, + { + "epoch": 0.88, + "grad_norm": 0.515625, + "learning_rate": 9.26153475965208e-06, + "loss": 0.9235, + "step": 61050 + }, + { + "epoch": 0.88, + "grad_norm": 0.5546875, + "learning_rate": 9.251014858149298e-06, + "loss": 0.8597, + "step": 61055 + }, + { + "epoch": 0.88, + "grad_norm": 0.53515625, + "learning_rate": 9.240500644896255e-06, + "loss": 1.07, + "step": 61060 + }, + { + "epoch": 0.88, + "grad_norm": 0.55859375, + "learning_rate": 9.22999212055201e-06, + "loss": 0.9589, + "step": 61065 + }, + { + "epoch": 0.88, + "grad_norm": 0.625, + "learning_rate": 9.21948928577524e-06, + "loss": 0.9358, + "step": 61070 + }, + { + "epoch": 0.88, + "grad_norm": 0.56640625, + "learning_rate": 9.208992141224249e-06, + "loss": 0.9216, + "step": 61075 + }, + { + "epoch": 0.88, + "grad_norm": 0.53125, + "learning_rate": 9.198500687557031e-06, + "loss": 0.9737, + "step": 61080 + }, + { + "epoch": 0.88, + "grad_norm": 0.59765625, + "learning_rate": 9.188014925431233e-06, + "loss": 1.0058, + "step": 61085 + }, + { + "epoch": 0.88, + "grad_norm": 0.54296875, + "learning_rate": 9.177534855504055e-06, + "loss": 0.9112, + "step": 61090 + }, + { + "epoch": 0.88, + "grad_norm": 0.546875, + "learning_rate": 9.167060478432443e-06, + "loss": 0.9753, + "step": 61095 + }, + { + "epoch": 0.88, + "grad_norm": 0.5703125, + "learning_rate": 9.156591794872938e-06, + "loss": 1.0607, + "step": 61100 + }, + { + "epoch": 0.88, + "grad_norm": 0.5234375, + "learning_rate": 9.146128805481712e-06, + "loss": 1.0579, + "step": 61105 + }, + { + "epoch": 0.88, + "grad_norm": 0.49609375, + "learning_rate": 9.1356715109146e-06, + "loss": 0.8809, + "step": 61110 + }, + { + "epoch": 0.88, + "grad_norm": 0.5234375, + "learning_rate": 9.125219911827099e-06, + "loss": 0.9079, + "step": 61115 + }, + { + "epoch": 0.88, + "grad_norm": 0.60546875, + "learning_rate": 9.114774008874317e-06, + "loss": 1.0232, + "step": 61120 + }, + { + "epoch": 0.88, + "grad_norm": 0.55859375, + "learning_rate": 9.104333802710997e-06, + "loss": 0.8465, + "step": 61125 + }, + { + "epoch": 0.88, + "grad_norm": 0.58203125, + "learning_rate": 9.093899293991548e-06, + "loss": 0.9612, + "step": 61130 + }, + { + "epoch": 0.88, + "grad_norm": 0.54296875, + "learning_rate": 9.083470483370037e-06, + "loss": 1.0004, + "step": 61135 + }, + { + "epoch": 0.88, + "grad_norm": 0.65234375, + "learning_rate": 9.073047371500133e-06, + "loss": 1.0981, + "step": 61140 + }, + { + "epoch": 0.88, + "grad_norm": 0.578125, + "learning_rate": 9.062629959035173e-06, + "loss": 0.9233, + "step": 61145 + }, + { + "epoch": 0.88, + "grad_norm": 0.5703125, + "learning_rate": 9.052218246628163e-06, + "loss": 0.9419, + "step": 61150 + }, + { + "epoch": 0.88, + "grad_norm": 0.5, + "learning_rate": 9.041812234931667e-06, + "loss": 0.8708, + "step": 61155 + }, + { + "epoch": 0.88, + "grad_norm": 0.5234375, + "learning_rate": 9.031411924597977e-06, + "loss": 0.9768, + "step": 61160 + }, + { + "epoch": 0.88, + "grad_norm": 0.58203125, + "learning_rate": 9.021017316279001e-06, + "loss": 1.0744, + "step": 61165 + }, + { + "epoch": 0.88, + "grad_norm": 0.60546875, + "learning_rate": 9.010628410626287e-06, + "loss": 0.9773, + "step": 61170 + }, + { + "epoch": 0.88, + "grad_norm": 0.54296875, + "learning_rate": 9.00024520829098e-06, + "loss": 0.9747, + "step": 61175 + }, + { + "epoch": 0.88, + "grad_norm": 0.625, + "learning_rate": 8.989867709923983e-06, + "loss": 0.9236, + "step": 61180 + }, + { + "epoch": 0.88, + "grad_norm": 0.58984375, + "learning_rate": 8.979495916175695e-06, + "loss": 0.9793, + "step": 61185 + }, + { + "epoch": 0.88, + "grad_norm": 0.53515625, + "learning_rate": 8.969129827696265e-06, + "loss": 0.9123, + "step": 61190 + }, + { + "epoch": 0.88, + "grad_norm": 0.546875, + "learning_rate": 8.958769445135483e-06, + "loss": 0.888, + "step": 61195 + }, + { + "epoch": 0.88, + "grad_norm": 0.53125, + "learning_rate": 8.948414769142698e-06, + "loss": 0.9846, + "step": 61200 + }, + { + "epoch": 0.88, + "grad_norm": 0.62109375, + "learning_rate": 8.938065800366967e-06, + "loss": 0.9033, + "step": 61205 + }, + { + "epoch": 0.88, + "grad_norm": 0.5390625, + "learning_rate": 8.927722539457006e-06, + "loss": 0.9003, + "step": 61210 + }, + { + "epoch": 0.88, + "grad_norm": 0.58203125, + "learning_rate": 8.917384987061105e-06, + "loss": 0.94, + "step": 61215 + }, + { + "epoch": 0.88, + "grad_norm": 0.5625, + "learning_rate": 8.90705314382726e-06, + "loss": 1.0094, + "step": 61220 + }, + { + "epoch": 0.88, + "grad_norm": 0.59375, + "learning_rate": 8.89672701040305e-06, + "loss": 1.0216, + "step": 61225 + }, + { + "epoch": 0.88, + "grad_norm": 0.7109375, + "learning_rate": 8.88640658743577e-06, + "loss": 1.0245, + "step": 61230 + }, + { + "epoch": 0.88, + "grad_norm": 0.5, + "learning_rate": 8.876091875572279e-06, + "loss": 0.9166, + "step": 61235 + }, + { + "epoch": 0.88, + "grad_norm": 0.70703125, + "learning_rate": 8.865782875459127e-06, + "loss": 1.1021, + "step": 61240 + }, + { + "epoch": 0.88, + "grad_norm": 0.51171875, + "learning_rate": 8.855479587742521e-06, + "loss": 0.9594, + "step": 61245 + }, + { + "epoch": 0.88, + "grad_norm": 0.53125, + "learning_rate": 8.845182013068231e-06, + "loss": 0.8905, + "step": 61250 + }, + { + "epoch": 0.88, + "grad_norm": 0.5546875, + "learning_rate": 8.834890152081754e-06, + "loss": 0.9806, + "step": 61255 + }, + { + "epoch": 0.88, + "grad_norm": 0.5234375, + "learning_rate": 8.824604005428217e-06, + "loss": 0.9909, + "step": 61260 + }, + { + "epoch": 0.88, + "grad_norm": 0.5390625, + "learning_rate": 8.814323573752315e-06, + "loss": 0.8444, + "step": 61265 + }, + { + "epoch": 0.88, + "grad_norm": 0.62109375, + "learning_rate": 8.804048857698466e-06, + "loss": 0.9958, + "step": 61270 + }, + { + "epoch": 0.88, + "grad_norm": 0.5078125, + "learning_rate": 8.793779857910734e-06, + "loss": 0.8591, + "step": 61275 + }, + { + "epoch": 0.88, + "grad_norm": 0.53125, + "learning_rate": 8.783516575032713e-06, + "loss": 0.9333, + "step": 61280 + }, + { + "epoch": 0.88, + "grad_norm": 0.48046875, + "learning_rate": 8.773259009707769e-06, + "loss": 0.9208, + "step": 61285 + }, + { + "epoch": 0.88, + "grad_norm": 0.62890625, + "learning_rate": 8.763007162578862e-06, + "loss": 1.2006, + "step": 61290 + }, + { + "epoch": 0.88, + "grad_norm": 0.67578125, + "learning_rate": 8.75276103428856e-06, + "loss": 1.1762, + "step": 61295 + }, + { + "epoch": 0.88, + "grad_norm": 0.5390625, + "learning_rate": 8.742520625479111e-06, + "loss": 0.9718, + "step": 61300 + }, + { + "epoch": 0.88, + "grad_norm": 0.6328125, + "learning_rate": 8.732285936792427e-06, + "loss": 1.0278, + "step": 61305 + }, + { + "epoch": 0.88, + "grad_norm": 0.6015625, + "learning_rate": 8.722056968869985e-06, + "loss": 0.908, + "step": 61310 + }, + { + "epoch": 0.88, + "grad_norm": 0.51171875, + "learning_rate": 8.71183372235298e-06, + "loss": 0.8661, + "step": 61315 + }, + { + "epoch": 0.88, + "grad_norm": 0.5078125, + "learning_rate": 8.701616197882211e-06, + "loss": 1.0178, + "step": 61320 + }, + { + "epoch": 0.88, + "grad_norm": 0.5859375, + "learning_rate": 8.691404396098124e-06, + "loss": 0.9051, + "step": 61325 + }, + { + "epoch": 0.88, + "grad_norm": 0.5625, + "learning_rate": 8.681198317640783e-06, + "loss": 0.887, + "step": 61330 + }, + { + "epoch": 0.88, + "grad_norm": 0.609375, + "learning_rate": 8.67099796314993e-06, + "loss": 0.9121, + "step": 61335 + }, + { + "epoch": 0.88, + "grad_norm": 0.58203125, + "learning_rate": 8.660803333264966e-06, + "loss": 0.9048, + "step": 61340 + }, + { + "epoch": 0.88, + "grad_norm": 0.59765625, + "learning_rate": 8.650614428624848e-06, + "loss": 0.9388, + "step": 61345 + }, + { + "epoch": 0.88, + "grad_norm": 0.515625, + "learning_rate": 8.640431249868264e-06, + "loss": 0.9448, + "step": 61350 + }, + { + "epoch": 0.88, + "grad_norm": 0.5703125, + "learning_rate": 8.630253797633514e-06, + "loss": 0.8843, + "step": 61355 + }, + { + "epoch": 0.88, + "grad_norm": 0.546875, + "learning_rate": 8.620082072558499e-06, + "loss": 0.9721, + "step": 61360 + }, + { + "epoch": 0.88, + "grad_norm": 0.53125, + "learning_rate": 8.609916075280821e-06, + "loss": 0.8592, + "step": 61365 + }, + { + "epoch": 0.88, + "grad_norm": 0.53515625, + "learning_rate": 8.599755806437693e-06, + "loss": 0.8063, + "step": 61370 + }, + { + "epoch": 0.88, + "grad_norm": 0.51953125, + "learning_rate": 8.58960126666596e-06, + "loss": 0.9778, + "step": 61375 + }, + { + "epoch": 0.88, + "grad_norm": 0.4921875, + "learning_rate": 8.579452456602133e-06, + "loss": 0.9136, + "step": 61380 + }, + { + "epoch": 0.88, + "grad_norm": 0.53125, + "learning_rate": 8.569309376882362e-06, + "loss": 0.9862, + "step": 61385 + }, + { + "epoch": 0.88, + "grad_norm": 0.53125, + "learning_rate": 8.559172028142381e-06, + "loss": 0.9479, + "step": 61390 + }, + { + "epoch": 0.88, + "grad_norm": 0.6015625, + "learning_rate": 8.549040411017639e-06, + "loss": 1.0091, + "step": 61395 + }, + { + "epoch": 0.88, + "grad_norm": 0.6015625, + "learning_rate": 8.538914526143216e-06, + "loss": 0.9978, + "step": 61400 + }, + { + "epoch": 0.88, + "grad_norm": 0.50390625, + "learning_rate": 8.528794374153781e-06, + "loss": 0.7958, + "step": 61405 + }, + { + "epoch": 0.88, + "grad_norm": 0.640625, + "learning_rate": 8.518679955683694e-06, + "loss": 1.1249, + "step": 61410 + }, + { + "epoch": 0.88, + "grad_norm": 0.55078125, + "learning_rate": 8.50857127136695e-06, + "loss": 0.9851, + "step": 61415 + }, + { + "epoch": 0.88, + "grad_norm": 0.51171875, + "learning_rate": 8.498468321837139e-06, + "loss": 1.1021, + "step": 61420 + }, + { + "epoch": 0.88, + "grad_norm": 0.56640625, + "learning_rate": 8.488371107727545e-06, + "loss": 0.9302, + "step": 61425 + }, + { + "epoch": 0.88, + "grad_norm": 0.5625, + "learning_rate": 8.478279629671094e-06, + "loss": 0.9321, + "step": 61430 + }, + { + "epoch": 0.88, + "grad_norm": 0.62890625, + "learning_rate": 8.468193888300313e-06, + "loss": 0.9139, + "step": 61435 + }, + { + "epoch": 0.88, + "grad_norm": 0.5625, + "learning_rate": 8.458113884247365e-06, + "loss": 0.9128, + "step": 61440 + }, + { + "epoch": 0.88, + "grad_norm": 0.546875, + "learning_rate": 8.448039618144089e-06, + "loss": 0.8569, + "step": 61445 + }, + { + "epoch": 0.88, + "grad_norm": 0.53515625, + "learning_rate": 8.437971090621988e-06, + "loss": 1.0033, + "step": 61450 + }, + { + "epoch": 0.88, + "grad_norm": 0.58984375, + "learning_rate": 8.427908302312115e-06, + "loss": 0.9779, + "step": 61455 + }, + { + "epoch": 0.88, + "grad_norm": 0.69921875, + "learning_rate": 8.417851253845244e-06, + "loss": 0.7853, + "step": 61460 + }, + { + "epoch": 0.88, + "grad_norm": 0.62890625, + "learning_rate": 8.40779994585178e-06, + "loss": 1.0948, + "step": 61465 + }, + { + "epoch": 0.88, + "grad_norm": 0.5703125, + "learning_rate": 8.397754378961709e-06, + "loss": 1.0884, + "step": 61470 + }, + { + "epoch": 0.88, + "grad_norm": 0.59375, + "learning_rate": 8.387714553804716e-06, + "loss": 1.2371, + "step": 61475 + }, + { + "epoch": 0.88, + "grad_norm": 0.5234375, + "learning_rate": 8.377680471010129e-06, + "loss": 0.9435, + "step": 61480 + }, + { + "epoch": 0.88, + "grad_norm": 0.57421875, + "learning_rate": 8.367652131206882e-06, + "loss": 1.043, + "step": 61485 + }, + { + "epoch": 0.88, + "grad_norm": 0.578125, + "learning_rate": 8.357629535023547e-06, + "loss": 0.8901, + "step": 61490 + }, + { + "epoch": 0.88, + "grad_norm": 0.59375, + "learning_rate": 8.34761268308838e-06, + "loss": 0.9678, + "step": 61495 + }, + { + "epoch": 0.88, + "grad_norm": 0.55859375, + "learning_rate": 8.337601576029219e-06, + "loss": 0.8858, + "step": 61500 + }, + { + "epoch": 0.88, + "grad_norm": 0.56640625, + "learning_rate": 8.327596214473576e-06, + "loss": 1.0307, + "step": 61505 + }, + { + "epoch": 0.88, + "grad_norm": 0.490234375, + "learning_rate": 8.317596599048638e-06, + "loss": 0.94, + "step": 61510 + }, + { + "epoch": 0.88, + "grad_norm": 0.5703125, + "learning_rate": 8.307602730381137e-06, + "loss": 1.0737, + "step": 61515 + }, + { + "epoch": 0.88, + "grad_norm": 0.55078125, + "learning_rate": 8.297614609097526e-06, + "loss": 0.9876, + "step": 61520 + }, + { + "epoch": 0.88, + "grad_norm": 0.578125, + "learning_rate": 8.287632235823884e-06, + "loss": 0.946, + "step": 61525 + }, + { + "epoch": 0.88, + "grad_norm": 0.5859375, + "learning_rate": 8.277655611185897e-06, + "loss": 0.9989, + "step": 61530 + }, + { + "epoch": 0.88, + "grad_norm": 0.59375, + "learning_rate": 8.267684735808934e-06, + "loss": 0.8744, + "step": 61535 + }, + { + "epoch": 0.88, + "grad_norm": 0.55859375, + "learning_rate": 8.25771961031795e-06, + "loss": 0.8663, + "step": 61540 + }, + { + "epoch": 0.88, + "grad_norm": 0.546875, + "learning_rate": 8.247760235337598e-06, + "loss": 0.9953, + "step": 61545 + }, + { + "epoch": 0.88, + "grad_norm": 0.52734375, + "learning_rate": 8.237806611492127e-06, + "loss": 0.8985, + "step": 61550 + }, + { + "epoch": 0.88, + "grad_norm": 0.56640625, + "learning_rate": 8.227858739405436e-06, + "loss": 1.0169, + "step": 61555 + }, + { + "epoch": 0.88, + "grad_norm": 0.61328125, + "learning_rate": 8.217916619701093e-06, + "loss": 0.8976, + "step": 61560 + }, + { + "epoch": 0.88, + "grad_norm": 0.5859375, + "learning_rate": 8.207980253002257e-06, + "loss": 0.9396, + "step": 61565 + }, + { + "epoch": 0.88, + "grad_norm": 0.58984375, + "learning_rate": 8.19804963993176e-06, + "loss": 0.8884, + "step": 61570 + }, + { + "epoch": 0.88, + "grad_norm": 0.52734375, + "learning_rate": 8.188124781112082e-06, + "loss": 0.8163, + "step": 61575 + }, + { + "epoch": 0.88, + "grad_norm": 0.5078125, + "learning_rate": 8.178205677165285e-06, + "loss": 0.8764, + "step": 61580 + }, + { + "epoch": 0.88, + "grad_norm": 0.62890625, + "learning_rate": 8.168292328713145e-06, + "loss": 1.0038, + "step": 61585 + }, + { + "epoch": 0.88, + "grad_norm": 0.79296875, + "learning_rate": 8.158384736377033e-06, + "loss": 0.9548, + "step": 61590 + }, + { + "epoch": 0.88, + "grad_norm": 0.5859375, + "learning_rate": 8.148482900777976e-06, + "loss": 1.0273, + "step": 61595 + }, + { + "epoch": 0.88, + "grad_norm": 0.55078125, + "learning_rate": 8.138586822536597e-06, + "loss": 0.9237, + "step": 61600 + }, + { + "epoch": 0.88, + "grad_norm": 0.51953125, + "learning_rate": 8.128696502273214e-06, + "loss": 0.9631, + "step": 61605 + }, + { + "epoch": 0.88, + "grad_norm": 0.51171875, + "learning_rate": 8.118811940607795e-06, + "loss": 0.9891, + "step": 61610 + }, + { + "epoch": 0.88, + "grad_norm": 0.52734375, + "learning_rate": 8.108933138159857e-06, + "loss": 0.9581, + "step": 61615 + }, + { + "epoch": 0.88, + "grad_norm": 0.54296875, + "learning_rate": 8.099060095548661e-06, + "loss": 0.9396, + "step": 61620 + }, + { + "epoch": 0.88, + "grad_norm": 0.60546875, + "learning_rate": 8.089192813393042e-06, + "loss": 1.2638, + "step": 61625 + }, + { + "epoch": 0.88, + "grad_norm": 0.52734375, + "learning_rate": 8.079331292311498e-06, + "loss": 0.9143, + "step": 61630 + }, + { + "epoch": 0.88, + "grad_norm": 0.5234375, + "learning_rate": 8.069475532922166e-06, + "loss": 1.0492, + "step": 61635 + }, + { + "epoch": 0.88, + "grad_norm": 0.58203125, + "learning_rate": 8.059625535842807e-06, + "loss": 0.8953, + "step": 61640 + }, + { + "epoch": 0.88, + "grad_norm": 0.515625, + "learning_rate": 8.049781301690806e-06, + "loss": 1.0398, + "step": 61645 + }, + { + "epoch": 0.88, + "grad_norm": 0.5859375, + "learning_rate": 8.039942831083246e-06, + "loss": 0.9835, + "step": 61650 + }, + { + "epoch": 0.88, + "grad_norm": 0.5546875, + "learning_rate": 8.030110124636814e-06, + "loss": 0.8837, + "step": 61655 + }, + { + "epoch": 0.88, + "grad_norm": 0.53125, + "learning_rate": 8.020283182967814e-06, + "loss": 0.9021, + "step": 61660 + }, + { + "epoch": 0.88, + "grad_norm": 0.5625, + "learning_rate": 8.010462006692221e-06, + "loss": 0.966, + "step": 61665 + }, + { + "epoch": 0.88, + "grad_norm": 0.57421875, + "learning_rate": 8.000646596425643e-06, + "loss": 0.9157, + "step": 61670 + }, + { + "epoch": 0.88, + "grad_norm": 0.58984375, + "learning_rate": 7.99083695278331e-06, + "loss": 0.9347, + "step": 61675 + }, + { + "epoch": 0.88, + "grad_norm": 0.56640625, + "learning_rate": 7.981033076380095e-06, + "loss": 0.9805, + "step": 61680 + }, + { + "epoch": 0.88, + "grad_norm": 0.54296875, + "learning_rate": 7.971234967830554e-06, + "loss": 0.8568, + "step": 61685 + }, + { + "epoch": 0.88, + "grad_norm": 0.52734375, + "learning_rate": 7.961442627748793e-06, + "loss": 0.903, + "step": 61690 + }, + { + "epoch": 0.88, + "grad_norm": 0.55078125, + "learning_rate": 7.951656056748658e-06, + "loss": 0.9831, + "step": 61695 + }, + { + "epoch": 0.89, + "grad_norm": 0.56640625, + "learning_rate": 7.941875255443532e-06, + "loss": 0.9424, + "step": 61700 + }, + { + "epoch": 0.89, + "grad_norm": 0.62890625, + "learning_rate": 7.932100224446526e-06, + "loss": 0.9975, + "step": 61705 + }, + { + "epoch": 0.89, + "grad_norm": 0.56640625, + "learning_rate": 7.922330964370316e-06, + "loss": 1.0486, + "step": 61710 + }, + { + "epoch": 0.89, + "grad_norm": 0.69140625, + "learning_rate": 7.912567475827271e-06, + "loss": 1.1237, + "step": 61715 + }, + { + "epoch": 0.89, + "grad_norm": 0.53125, + "learning_rate": 7.902809759429398e-06, + "loss": 0.9946, + "step": 61720 + }, + { + "epoch": 0.89, + "grad_norm": 0.58984375, + "learning_rate": 7.893057815788273e-06, + "loss": 0.9619, + "step": 61725 + }, + { + "epoch": 0.89, + "grad_norm": 0.5390625, + "learning_rate": 7.88331164551519e-06, + "loss": 0.8598, + "step": 61730 + }, + { + "epoch": 0.89, + "grad_norm": 0.5390625, + "learning_rate": 7.873571249221057e-06, + "loss": 0.924, + "step": 61735 + }, + { + "epoch": 0.89, + "grad_norm": 0.51953125, + "learning_rate": 7.863836627516396e-06, + "loss": 0.974, + "step": 61740 + }, + { + "epoch": 0.89, + "grad_norm": 0.51171875, + "learning_rate": 7.854107781011399e-06, + "loss": 1.0555, + "step": 61745 + }, + { + "epoch": 0.89, + "grad_norm": 0.58984375, + "learning_rate": 7.844384710315867e-06, + "loss": 1.0496, + "step": 61750 + }, + { + "epoch": 0.89, + "grad_norm": 0.53125, + "learning_rate": 7.834667416039254e-06, + "loss": 0.9431, + "step": 61755 + }, + { + "epoch": 0.89, + "grad_norm": 0.609375, + "learning_rate": 7.824955898790642e-06, + "loss": 0.9346, + "step": 61760 + }, + { + "epoch": 0.89, + "grad_norm": 0.5546875, + "learning_rate": 7.8152501591788e-06, + "loss": 1.0252, + "step": 61765 + }, + { + "epoch": 0.89, + "grad_norm": 0.5546875, + "learning_rate": 7.80555019781204e-06, + "loss": 0.9873, + "step": 61770 + }, + { + "epoch": 0.89, + "grad_norm": 0.66796875, + "learning_rate": 7.795856015298397e-06, + "loss": 0.813, + "step": 61775 + }, + { + "epoch": 0.89, + "grad_norm": 0.51953125, + "learning_rate": 7.786167612245531e-06, + "loss": 0.8898, + "step": 61780 + }, + { + "epoch": 0.89, + "grad_norm": 0.625, + "learning_rate": 7.776484989260691e-06, + "loss": 1.0275, + "step": 61785 + }, + { + "epoch": 0.89, + "grad_norm": 0.53125, + "learning_rate": 7.7668081469508e-06, + "loss": 0.8083, + "step": 61790 + }, + { + "epoch": 0.89, + "grad_norm": 0.498046875, + "learning_rate": 7.757137085922428e-06, + "loss": 0.9573, + "step": 61795 + }, + { + "epoch": 0.89, + "grad_norm": 0.55859375, + "learning_rate": 7.74747180678177e-06, + "loss": 0.9706, + "step": 61800 + }, + { + "epoch": 0.89, + "grad_norm": 0.609375, + "learning_rate": 7.737812310134617e-06, + "loss": 1.0688, + "step": 61805 + }, + { + "epoch": 0.89, + "grad_norm": 0.59375, + "learning_rate": 7.728158596586477e-06, + "loss": 1.0002, + "step": 61810 + }, + { + "epoch": 0.89, + "grad_norm": 0.5703125, + "learning_rate": 7.718510666742462e-06, + "loss": 0.977, + "step": 61815 + }, + { + "epoch": 0.89, + "grad_norm": 0.5, + "learning_rate": 7.70886852120728e-06, + "loss": 0.9562, + "step": 61820 + }, + { + "epoch": 0.89, + "grad_norm": 0.54296875, + "learning_rate": 7.699232160585324e-06, + "loss": 1.0195, + "step": 61825 + }, + { + "epoch": 0.89, + "grad_norm": 0.48046875, + "learning_rate": 7.689601585480643e-06, + "loss": 1.0175, + "step": 61830 + }, + { + "epoch": 0.89, + "grad_norm": 0.53515625, + "learning_rate": 7.679976796496846e-06, + "loss": 0.8564, + "step": 61835 + }, + { + "epoch": 0.89, + "grad_norm": 0.58203125, + "learning_rate": 7.670357794237249e-06, + "loss": 0.9566, + "step": 61840 + }, + { + "epoch": 0.89, + "grad_norm": 0.498046875, + "learning_rate": 7.6607445793048e-06, + "loss": 0.9369, + "step": 61845 + }, + { + "epoch": 0.89, + "grad_norm": 0.57421875, + "learning_rate": 7.651137152302035e-06, + "loss": 1.0571, + "step": 61850 + }, + { + "epoch": 0.89, + "grad_norm": 0.59375, + "learning_rate": 7.641535513831165e-06, + "loss": 0.995, + "step": 61855 + }, + { + "epoch": 0.89, + "grad_norm": 0.68359375, + "learning_rate": 7.631939664494048e-06, + "loss": 0.9018, + "step": 61860 + }, + { + "epoch": 0.89, + "grad_norm": 0.53125, + "learning_rate": 7.622349604892131e-06, + "loss": 0.9492, + "step": 61865 + }, + { + "epoch": 0.89, + "grad_norm": 0.53125, + "learning_rate": 7.612765335626559e-06, + "loss": 1.0655, + "step": 61870 + }, + { + "epoch": 0.89, + "grad_norm": 0.474609375, + "learning_rate": 7.603186857298083e-06, + "loss": 0.9456, + "step": 61875 + }, + { + "epoch": 0.89, + "grad_norm": 0.53515625, + "learning_rate": 7.59361417050708e-06, + "loss": 0.72, + "step": 61880 + }, + { + "epoch": 0.89, + "grad_norm": 0.482421875, + "learning_rate": 7.584047275853578e-06, + "loss": 1.0067, + "step": 61885 + }, + { + "epoch": 0.89, + "grad_norm": 0.5390625, + "learning_rate": 7.574486173937256e-06, + "loss": 1.0156, + "step": 61890 + }, + { + "epoch": 0.89, + "grad_norm": 0.6484375, + "learning_rate": 7.564930865357389e-06, + "loss": 0.9955, + "step": 61895 + }, + { + "epoch": 0.89, + "grad_norm": 0.484375, + "learning_rate": 7.5553813507129426e-06, + "loss": 0.8775, + "step": 61900 + }, + { + "epoch": 0.89, + "grad_norm": 0.50390625, + "learning_rate": 7.545837630602481e-06, + "loss": 0.8675, + "step": 61905 + }, + { + "epoch": 0.89, + "grad_norm": 0.578125, + "learning_rate": 7.536299705624217e-06, + "loss": 0.9599, + "step": 61910 + }, + { + "epoch": 0.89, + "grad_norm": 0.53515625, + "learning_rate": 7.526767576375982e-06, + "loss": 1.0218, + "step": 61915 + }, + { + "epoch": 0.89, + "grad_norm": 0.578125, + "learning_rate": 7.5172412434552756e-06, + "loss": 0.8365, + "step": 61920 + }, + { + "epoch": 0.89, + "grad_norm": 0.54296875, + "learning_rate": 7.507720707459232e-06, + "loss": 1.0037, + "step": 61925 + }, + { + "epoch": 0.89, + "grad_norm": 0.51171875, + "learning_rate": 7.4982059689845726e-06, + "loss": 1.0069, + "step": 61930 + }, + { + "epoch": 0.89, + "grad_norm": 0.6171875, + "learning_rate": 7.488697028627711e-06, + "loss": 0.9896, + "step": 61935 + }, + { + "epoch": 0.89, + "grad_norm": 0.5859375, + "learning_rate": 7.479193886984703e-06, + "loss": 0.9226, + "step": 61940 + }, + { + "epoch": 0.89, + "grad_norm": 0.50390625, + "learning_rate": 7.469696544651184e-06, + "loss": 0.944, + "step": 61945 + }, + { + "epoch": 0.89, + "grad_norm": 0.515625, + "learning_rate": 7.460205002222464e-06, + "loss": 0.9059, + "step": 61950 + }, + { + "epoch": 0.89, + "grad_norm": 0.5625, + "learning_rate": 7.450719260293515e-06, + "loss": 0.9737, + "step": 61955 + }, + { + "epoch": 0.89, + "grad_norm": 0.5625, + "learning_rate": 7.44123931945887e-06, + "loss": 0.8882, + "step": 61960 + }, + { + "epoch": 0.89, + "grad_norm": 0.53125, + "learning_rate": 7.431765180312744e-06, + "loss": 0.797, + "step": 61965 + }, + { + "epoch": 0.89, + "grad_norm": 0.65625, + "learning_rate": 7.422296843449028e-06, + "loss": 0.9947, + "step": 61970 + }, + { + "epoch": 0.89, + "grad_norm": 0.57421875, + "learning_rate": 7.412834309461159e-06, + "loss": 0.9082, + "step": 61975 + }, + { + "epoch": 0.89, + "grad_norm": 0.58203125, + "learning_rate": 7.403377578942294e-06, + "loss": 0.9802, + "step": 61980 + }, + { + "epoch": 0.89, + "grad_norm": 0.5859375, + "learning_rate": 7.393926652485195e-06, + "loss": 1.0125, + "step": 61985 + }, + { + "epoch": 0.89, + "grad_norm": 0.609375, + "learning_rate": 7.384481530682219e-06, + "loss": 0.9947, + "step": 61990 + }, + { + "epoch": 0.89, + "grad_norm": 0.578125, + "learning_rate": 7.3750422141254275e-06, + "loss": 1.0376, + "step": 61995 + }, + { + "epoch": 0.89, + "grad_norm": 0.5625, + "learning_rate": 7.3656087034064904e-06, + "loss": 1.016, + "step": 62000 + }, + { + "epoch": 0.89, + "grad_norm": 0.55859375, + "learning_rate": 7.356180999116691e-06, + "loss": 0.9243, + "step": 62005 + }, + { + "epoch": 0.89, + "grad_norm": 0.62109375, + "learning_rate": 7.346759101847e-06, + "loss": 1.0494, + "step": 62010 + }, + { + "epoch": 0.89, + "grad_norm": 0.51953125, + "learning_rate": 7.337343012187947e-06, + "loss": 0.8579, + "step": 62015 + }, + { + "epoch": 0.89, + "grad_norm": 0.5703125, + "learning_rate": 7.32793273072978e-06, + "loss": 0.8811, + "step": 62020 + }, + { + "epoch": 0.89, + "grad_norm": 0.53515625, + "learning_rate": 7.318528258062329e-06, + "loss": 0.8636, + "step": 62025 + }, + { + "epoch": 0.89, + "grad_norm": 1.5390625, + "learning_rate": 7.309129594775077e-06, + "loss": 1.0092, + "step": 62030 + }, + { + "epoch": 0.89, + "grad_norm": 0.5390625, + "learning_rate": 7.299736741457164e-06, + "loss": 1.0579, + "step": 62035 + }, + { + "epoch": 0.89, + "grad_norm": 0.58984375, + "learning_rate": 7.290349698697318e-06, + "loss": 0.9998, + "step": 62040 + }, + { + "epoch": 0.89, + "grad_norm": 0.546875, + "learning_rate": 7.280968467083937e-06, + "loss": 0.7762, + "step": 62045 + }, + { + "epoch": 0.89, + "grad_norm": 0.6171875, + "learning_rate": 7.271593047205061e-06, + "loss": 0.9415, + "step": 62050 + }, + { + "epoch": 0.89, + "grad_norm": 0.55859375, + "learning_rate": 7.2622234396483306e-06, + "loss": 0.8974, + "step": 62055 + }, + { + "epoch": 0.89, + "grad_norm": 0.462890625, + "learning_rate": 7.252859645001075e-06, + "loss": 1.0368, + "step": 62060 + }, + { + "epoch": 0.89, + "grad_norm": 0.52734375, + "learning_rate": 7.243501663850205e-06, + "loss": 0.9015, + "step": 62065 + }, + { + "epoch": 0.89, + "grad_norm": 0.59765625, + "learning_rate": 7.234149496782272e-06, + "loss": 1.1338, + "step": 62070 + }, + { + "epoch": 0.89, + "grad_norm": 0.56640625, + "learning_rate": 7.224803144383496e-06, + "loss": 0.8891, + "step": 62075 + }, + { + "epoch": 0.89, + "grad_norm": 0.578125, + "learning_rate": 7.215462607239743e-06, + "loss": 1.0407, + "step": 62080 + }, + { + "epoch": 0.89, + "grad_norm": 0.5390625, + "learning_rate": 7.206127885936453e-06, + "loss": 0.8616, + "step": 62085 + }, + { + "epoch": 0.89, + "grad_norm": 0.5703125, + "learning_rate": 7.196798981058739e-06, + "loss": 1.0154, + "step": 62090 + }, + { + "epoch": 0.89, + "grad_norm": 0.55859375, + "learning_rate": 7.187475893191387e-06, + "loss": 0.9882, + "step": 62095 + }, + { + "epoch": 0.89, + "grad_norm": 0.578125, + "learning_rate": 7.178158622918729e-06, + "loss": 0.8684, + "step": 62100 + }, + { + "epoch": 0.89, + "grad_norm": 0.5546875, + "learning_rate": 7.168847170824811e-06, + "loss": 0.9177, + "step": 62105 + }, + { + "epoch": 0.89, + "grad_norm": 0.49609375, + "learning_rate": 7.159541537493286e-06, + "loss": 0.9148, + "step": 62110 + }, + { + "epoch": 0.89, + "grad_norm": 0.56640625, + "learning_rate": 7.150241723507433e-06, + "loss": 1.0282, + "step": 62115 + }, + { + "epoch": 0.89, + "grad_norm": 0.62890625, + "learning_rate": 7.1409477294501645e-06, + "loss": 1.0454, + "step": 62120 + }, + { + "epoch": 0.89, + "grad_norm": 0.5078125, + "learning_rate": 7.1316595559040465e-06, + "loss": 0.9332, + "step": 62125 + }, + { + "epoch": 0.89, + "grad_norm": 0.51171875, + "learning_rate": 7.122377203451292e-06, + "loss": 1.0585, + "step": 62130 + }, + { + "epoch": 0.89, + "grad_norm": 0.59765625, + "learning_rate": 7.113100672673701e-06, + "loss": 0.9731, + "step": 62135 + }, + { + "epoch": 0.89, + "grad_norm": 0.5703125, + "learning_rate": 7.1038299641527416e-06, + "loss": 0.869, + "step": 62140 + }, + { + "epoch": 0.89, + "grad_norm": 0.53125, + "learning_rate": 7.0945650784695396e-06, + "loss": 1.0144, + "step": 62145 + }, + { + "epoch": 0.89, + "grad_norm": 0.515625, + "learning_rate": 7.085306016204796e-06, + "loss": 0.9836, + "step": 62150 + }, + { + "epoch": 0.89, + "grad_norm": 0.53125, + "learning_rate": 7.076052777938891e-06, + "loss": 0.9506, + "step": 62155 + }, + { + "epoch": 0.89, + "grad_norm": 0.51953125, + "learning_rate": 7.066805364251849e-06, + "loss": 0.9612, + "step": 62160 + }, + { + "epoch": 0.89, + "grad_norm": 0.52734375, + "learning_rate": 7.057563775723286e-06, + "loss": 0.8578, + "step": 62165 + }, + { + "epoch": 0.89, + "grad_norm": 0.478515625, + "learning_rate": 7.048328012932459e-06, + "loss": 0.9426, + "step": 62170 + }, + { + "epoch": 0.89, + "grad_norm": 0.546875, + "learning_rate": 7.039098076458306e-06, + "loss": 0.8757, + "step": 62175 + }, + { + "epoch": 0.89, + "grad_norm": 0.47265625, + "learning_rate": 7.029873966879352e-06, + "loss": 0.9648, + "step": 62180 + }, + { + "epoch": 0.89, + "grad_norm": 0.60546875, + "learning_rate": 7.02065568477378e-06, + "loss": 0.8497, + "step": 62185 + }, + { + "epoch": 0.89, + "grad_norm": 0.58984375, + "learning_rate": 7.011443230719428e-06, + "loss": 0.8719, + "step": 62190 + }, + { + "epoch": 0.89, + "grad_norm": 0.5546875, + "learning_rate": 7.0022366052936885e-06, + "loss": 1.085, + "step": 62195 + }, + { + "epoch": 0.89, + "grad_norm": 0.58984375, + "learning_rate": 6.993035809073678e-06, + "loss": 1.1285, + "step": 62200 + }, + { + "epoch": 0.89, + "grad_norm": 0.62890625, + "learning_rate": 6.983840842636136e-06, + "loss": 1.1114, + "step": 62205 + }, + { + "epoch": 0.89, + "grad_norm": 0.61328125, + "learning_rate": 6.9746517065573556e-06, + "loss": 0.979, + "step": 62210 + }, + { + "epoch": 0.89, + "grad_norm": 0.58984375, + "learning_rate": 6.965468401413366e-06, + "loss": 0.8401, + "step": 62215 + }, + { + "epoch": 0.89, + "grad_norm": 0.53125, + "learning_rate": 6.956290927779785e-06, + "loss": 1.0541, + "step": 62220 + }, + { + "epoch": 0.89, + "grad_norm": 0.6015625, + "learning_rate": 6.947119286231851e-06, + "loss": 1.0197, + "step": 62225 + }, + { + "epoch": 0.89, + "grad_norm": 0.55078125, + "learning_rate": 6.93795347734445e-06, + "loss": 1.0151, + "step": 62230 + }, + { + "epoch": 0.89, + "grad_norm": 0.58203125, + "learning_rate": 6.92879350169211e-06, + "loss": 1.0242, + "step": 62235 + }, + { + "epoch": 0.89, + "grad_norm": 0.55859375, + "learning_rate": 6.9196393598490175e-06, + "loss": 0.9719, + "step": 62240 + }, + { + "epoch": 0.89, + "grad_norm": 0.5703125, + "learning_rate": 6.910491052388912e-06, + "loss": 0.9262, + "step": 62245 + }, + { + "epoch": 0.89, + "grad_norm": 0.62109375, + "learning_rate": 6.901348579885258e-06, + "loss": 1.1226, + "step": 62250 + }, + { + "epoch": 0.89, + "grad_norm": 0.5859375, + "learning_rate": 6.892211942911109e-06, + "loss": 1.05, + "step": 62255 + }, + { + "epoch": 0.89, + "grad_norm": 0.625, + "learning_rate": 6.88308114203915e-06, + "loss": 0.8664, + "step": 62260 + }, + { + "epoch": 0.89, + "grad_norm": 0.609375, + "learning_rate": 6.873956177841711e-06, + "loss": 1.0119, + "step": 62265 + }, + { + "epoch": 0.89, + "grad_norm": 0.546875, + "learning_rate": 6.8648370508907825e-06, + "loss": 1.0299, + "step": 62270 + }, + { + "epoch": 0.89, + "grad_norm": 0.54296875, + "learning_rate": 6.855723761757926e-06, + "loss": 0.8667, + "step": 62275 + }, + { + "epoch": 0.89, + "grad_norm": 0.58203125, + "learning_rate": 6.846616311014386e-06, + "loss": 1.0801, + "step": 62280 + }, + { + "epoch": 0.89, + "grad_norm": 0.5703125, + "learning_rate": 6.837514699231018e-06, + "loss": 1.0818, + "step": 62285 + }, + { + "epoch": 0.89, + "grad_norm": 0.51171875, + "learning_rate": 6.828418926978353e-06, + "loss": 0.9123, + "step": 62290 + }, + { + "epoch": 0.89, + "grad_norm": 0.5859375, + "learning_rate": 6.819328994826491e-06, + "loss": 0.9992, + "step": 62295 + }, + { + "epoch": 0.89, + "grad_norm": 0.5078125, + "learning_rate": 6.810244903345209e-06, + "loss": 1.0025, + "step": 62300 + }, + { + "epoch": 0.89, + "grad_norm": 0.56640625, + "learning_rate": 6.8011666531039185e-06, + "loss": 1.0085, + "step": 62305 + }, + { + "epoch": 0.89, + "grad_norm": 0.546875, + "learning_rate": 6.7920942446716425e-06, + "loss": 0.9108, + "step": 62310 + }, + { + "epoch": 0.89, + "grad_norm": 0.490234375, + "learning_rate": 6.78302767861706e-06, + "loss": 0.8411, + "step": 62315 + }, + { + "epoch": 0.89, + "grad_norm": 0.59375, + "learning_rate": 6.77396695550846e-06, + "loss": 0.8664, + "step": 62320 + }, + { + "epoch": 0.89, + "grad_norm": 0.609375, + "learning_rate": 6.764912075913799e-06, + "loss": 0.8948, + "step": 62325 + }, + { + "epoch": 0.89, + "grad_norm": 0.48828125, + "learning_rate": 6.755863040400612e-06, + "loss": 1.0311, + "step": 62330 + }, + { + "epoch": 0.89, + "grad_norm": 0.58984375, + "learning_rate": 6.7468198495361564e-06, + "loss": 0.9616, + "step": 62335 + }, + { + "epoch": 0.89, + "grad_norm": 0.5078125, + "learning_rate": 6.7377825038872135e-06, + "loss": 0.9264, + "step": 62340 + }, + { + "epoch": 0.89, + "grad_norm": 0.55078125, + "learning_rate": 6.728751004020284e-06, + "loss": 0.8163, + "step": 62345 + }, + { + "epoch": 0.89, + "grad_norm": 0.5859375, + "learning_rate": 6.7197253505014825e-06, + "loss": 0.8381, + "step": 62350 + }, + { + "epoch": 0.89, + "grad_norm": 0.5625, + "learning_rate": 6.710705543896512e-06, + "loss": 1.0656, + "step": 62355 + }, + { + "epoch": 0.89, + "grad_norm": 0.5078125, + "learning_rate": 6.701691584770775e-06, + "loss": 0.8805, + "step": 62360 + }, + { + "epoch": 0.89, + "grad_norm": 0.6640625, + "learning_rate": 6.692683473689276e-06, + "loss": 1.0133, + "step": 62365 + }, + { + "epoch": 0.89, + "grad_norm": 0.5390625, + "learning_rate": 6.68368121121663e-06, + "loss": 0.8248, + "step": 62370 + }, + { + "epoch": 0.89, + "grad_norm": 0.6328125, + "learning_rate": 6.6746847979171525e-06, + "loss": 0.8632, + "step": 62375 + }, + { + "epoch": 0.89, + "grad_norm": 0.51171875, + "learning_rate": 6.665694234354691e-06, + "loss": 0.8974, + "step": 62380 + }, + { + "epoch": 0.89, + "grad_norm": 0.451171875, + "learning_rate": 6.65670952109283e-06, + "loss": 0.8646, + "step": 62385 + }, + { + "epoch": 0.89, + "grad_norm": 0.55078125, + "learning_rate": 6.647730658694718e-06, + "loss": 0.8876, + "step": 62390 + }, + { + "epoch": 0.9, + "grad_norm": 0.61328125, + "learning_rate": 6.6387576477231595e-06, + "loss": 1.0241, + "step": 62395 + }, + { + "epoch": 0.9, + "grad_norm": 0.6015625, + "learning_rate": 6.629790488740617e-06, + "loss": 0.9139, + "step": 62400 + }, + { + "epoch": 0.9, + "grad_norm": 0.55078125, + "learning_rate": 6.620829182309129e-06, + "loss": 1.0862, + "step": 62405 + }, + { + "epoch": 0.9, + "grad_norm": 0.4921875, + "learning_rate": 6.611873728990425e-06, + "loss": 1.0256, + "step": 62410 + }, + { + "epoch": 0.9, + "grad_norm": 0.5234375, + "learning_rate": 6.602924129345855e-06, + "loss": 1.1855, + "step": 62415 + }, + { + "epoch": 0.9, + "grad_norm": 0.58203125, + "learning_rate": 6.593980383936349e-06, + "loss": 0.8099, + "step": 62420 + }, + { + "epoch": 0.9, + "grad_norm": 0.5390625, + "learning_rate": 6.585042493322535e-06, + "loss": 1.1324, + "step": 62425 + }, + { + "epoch": 0.9, + "grad_norm": 0.4765625, + "learning_rate": 6.576110458064677e-06, + "loss": 0.9345, + "step": 62430 + }, + { + "epoch": 0.9, + "grad_norm": 0.58203125, + "learning_rate": 6.567184278722582e-06, + "loss": 1.0149, + "step": 62435 + }, + { + "epoch": 0.9, + "grad_norm": 0.59375, + "learning_rate": 6.558263955855792e-06, + "loss": 1.0576, + "step": 62440 + }, + { + "epoch": 0.9, + "grad_norm": 0.71875, + "learning_rate": 6.549349490023448e-06, + "loss": 1.0319, + "step": 62445 + }, + { + "epoch": 0.9, + "grad_norm": 0.640625, + "learning_rate": 6.540440881784305e-06, + "loss": 0.9467, + "step": 62450 + }, + { + "epoch": 0.9, + "grad_norm": 0.484375, + "learning_rate": 6.531538131696757e-06, + "loss": 0.8775, + "step": 62455 + }, + { + "epoch": 0.9, + "grad_norm": 0.546875, + "learning_rate": 6.522641240318872e-06, + "loss": 0.9048, + "step": 62460 + }, + { + "epoch": 0.9, + "grad_norm": 0.5703125, + "learning_rate": 6.51375020820828e-06, + "loss": 0.8971, + "step": 62465 + }, + { + "epoch": 0.9, + "grad_norm": 0.5703125, + "learning_rate": 6.5048650359223025e-06, + "loss": 0.8988, + "step": 62470 + }, + { + "epoch": 0.9, + "grad_norm": 0.5859375, + "learning_rate": 6.495985724017872e-06, + "loss": 0.9041, + "step": 62475 + }, + { + "epoch": 0.9, + "grad_norm": 0.58203125, + "learning_rate": 6.487112273051555e-06, + "loss": 0.8345, + "step": 62480 + }, + { + "epoch": 0.9, + "grad_norm": 0.62109375, + "learning_rate": 6.478244683579526e-06, + "loss": 1.0131, + "step": 62485 + }, + { + "epoch": 0.9, + "grad_norm": 0.54296875, + "learning_rate": 6.469382956157633e-06, + "loss": 0.9545, + "step": 62490 + }, + { + "epoch": 0.9, + "grad_norm": 0.5078125, + "learning_rate": 6.46052709134135e-06, + "loss": 0.8159, + "step": 62495 + }, + { + "epoch": 0.9, + "grad_norm": 0.63671875, + "learning_rate": 6.451677089685759e-06, + "loss": 0.872, + "step": 62500 + }, + { + "epoch": 0.9, + "grad_norm": 0.6484375, + "learning_rate": 6.442832951745581e-06, + "loss": 0.9304, + "step": 62505 + }, + { + "epoch": 0.9, + "grad_norm": 0.5390625, + "learning_rate": 6.433994678075195e-06, + "loss": 0.9571, + "step": 62510 + }, + { + "epoch": 0.9, + "grad_norm": 0.58984375, + "learning_rate": 6.42516226922858e-06, + "loss": 0.9523, + "step": 62515 + }, + { + "epoch": 0.9, + "grad_norm": 0.5234375, + "learning_rate": 6.416335725759359e-06, + "loss": 0.9097, + "step": 62520 + }, + { + "epoch": 0.9, + "grad_norm": 0.59375, + "learning_rate": 6.4075150482208245e-06, + "loss": 1.0431, + "step": 62525 + }, + { + "epoch": 0.9, + "grad_norm": 0.5546875, + "learning_rate": 6.398700237165811e-06, + "loss": 0.9239, + "step": 62530 + }, + { + "epoch": 0.9, + "grad_norm": 0.54296875, + "learning_rate": 6.389891293146899e-06, + "loss": 0.942, + "step": 62535 + }, + { + "epoch": 0.9, + "grad_norm": 0.5390625, + "learning_rate": 6.381088216716202e-06, + "loss": 0.9218, + "step": 62540 + }, + { + "epoch": 0.9, + "grad_norm": 0.51953125, + "learning_rate": 6.3722910084255014e-06, + "loss": 0.8891, + "step": 62545 + }, + { + "epoch": 0.9, + "grad_norm": 0.6015625, + "learning_rate": 6.363499668826245e-06, + "loss": 0.9076, + "step": 62550 + }, + { + "epoch": 0.9, + "grad_norm": 0.6015625, + "learning_rate": 6.35471419846948e-06, + "loss": 1.1142, + "step": 62555 + }, + { + "epoch": 0.9, + "grad_norm": 0.56640625, + "learning_rate": 6.345934597905867e-06, + "loss": 0.9014, + "step": 62560 + }, + { + "epoch": 0.9, + "grad_norm": 0.546875, + "learning_rate": 6.337160867685743e-06, + "loss": 0.9365, + "step": 62565 + }, + { + "epoch": 0.9, + "grad_norm": 0.5859375, + "learning_rate": 6.328393008359057e-06, + "loss": 0.9801, + "step": 62570 + }, + { + "epoch": 0.9, + "grad_norm": 0.52734375, + "learning_rate": 6.319631020475369e-06, + "loss": 0.9142, + "step": 62575 + }, + { + "epoch": 0.9, + "grad_norm": 0.55859375, + "learning_rate": 6.310874904583897e-06, + "loss": 0.9259, + "step": 62580 + }, + { + "epoch": 0.9, + "grad_norm": 0.65234375, + "learning_rate": 6.302124661233511e-06, + "loss": 1.1145, + "step": 62585 + }, + { + "epoch": 0.9, + "grad_norm": 0.55859375, + "learning_rate": 6.2933802909726615e-06, + "loss": 0.938, + "step": 62590 + }, + { + "epoch": 0.9, + "grad_norm": 0.609375, + "learning_rate": 6.284641794349433e-06, + "loss": 0.8037, + "step": 62595 + }, + { + "epoch": 0.9, + "grad_norm": 0.65625, + "learning_rate": 6.275909171911609e-06, + "loss": 0.9177, + "step": 62600 + }, + { + "epoch": 0.9, + "grad_norm": 0.58984375, + "learning_rate": 6.267182424206541e-06, + "loss": 0.9361, + "step": 62605 + }, + { + "epoch": 0.9, + "grad_norm": 0.57421875, + "learning_rate": 6.258461551781225e-06, + "loss": 0.9151, + "step": 62610 + }, + { + "epoch": 0.9, + "grad_norm": 0.58984375, + "learning_rate": 6.24974655518229e-06, + "loss": 1.0927, + "step": 62615 + }, + { + "epoch": 0.9, + "grad_norm": 0.6171875, + "learning_rate": 6.241037434956043e-06, + "loss": 1.11, + "step": 62620 + }, + { + "epoch": 0.9, + "grad_norm": 0.546875, + "learning_rate": 6.2323341916483254e-06, + "loss": 0.8704, + "step": 62625 + }, + { + "epoch": 0.9, + "grad_norm": 0.55859375, + "learning_rate": 6.223636825804702e-06, + "loss": 0.7773, + "step": 62630 + }, + { + "epoch": 0.9, + "grad_norm": 0.55859375, + "learning_rate": 6.214945337970335e-06, + "loss": 0.929, + "step": 62635 + }, + { + "epoch": 0.9, + "grad_norm": 0.4921875, + "learning_rate": 6.2062597286900005e-06, + "loss": 0.8479, + "step": 62640 + }, + { + "epoch": 0.9, + "grad_norm": 0.48828125, + "learning_rate": 6.197579998508118e-06, + "loss": 0.9114, + "step": 62645 + }, + { + "epoch": 0.9, + "grad_norm": 0.56640625, + "learning_rate": 6.188906147968776e-06, + "loss": 0.8494, + "step": 62650 + }, + { + "epoch": 0.9, + "grad_norm": 0.54296875, + "learning_rate": 6.180238177615616e-06, + "loss": 0.8771, + "step": 62655 + }, + { + "epoch": 0.9, + "grad_norm": 0.56640625, + "learning_rate": 6.171576087991981e-06, + "loss": 0.9365, + "step": 62660 + }, + { + "epoch": 0.9, + "grad_norm": 0.55078125, + "learning_rate": 6.1629198796408276e-06, + "loss": 1.0453, + "step": 62665 + }, + { + "epoch": 0.9, + "grad_norm": 0.5390625, + "learning_rate": 6.154269553104719e-06, + "loss": 0.9323, + "step": 62670 + }, + { + "epoch": 0.9, + "grad_norm": 0.60546875, + "learning_rate": 6.145625108925879e-06, + "loss": 0.8692, + "step": 62675 + }, + { + "epoch": 0.9, + "grad_norm": 0.609375, + "learning_rate": 6.136986547646151e-06, + "loss": 1.0395, + "step": 62680 + }, + { + "epoch": 0.9, + "grad_norm": 0.58203125, + "learning_rate": 6.128353869807002e-06, + "loss": 0.8201, + "step": 62685 + }, + { + "epoch": 0.9, + "grad_norm": 0.5390625, + "learning_rate": 6.119727075949555e-06, + "loss": 0.9716, + "step": 62690 + }, + { + "epoch": 0.9, + "grad_norm": 0.6015625, + "learning_rate": 6.111106166614522e-06, + "loss": 0.8377, + "step": 62695 + }, + { + "epoch": 0.9, + "grad_norm": 0.54296875, + "learning_rate": 6.102491142342304e-06, + "loss": 0.9958, + "step": 62700 + }, + { + "epoch": 0.9, + "grad_norm": 0.57421875, + "learning_rate": 6.093882003672868e-06, + "loss": 1.0683, + "step": 62705 + }, + { + "epoch": 0.9, + "grad_norm": 0.51171875, + "learning_rate": 6.085278751145851e-06, + "loss": 1.0908, + "step": 62710 + }, + { + "epoch": 0.9, + "grad_norm": 0.55078125, + "learning_rate": 6.076681385300531e-06, + "loss": 0.9115, + "step": 62715 + }, + { + "epoch": 0.9, + "grad_norm": 0.515625, + "learning_rate": 6.068089906675789e-06, + "loss": 1.0243, + "step": 62720 + }, + { + "epoch": 0.9, + "grad_norm": 0.52734375, + "learning_rate": 6.05950431581015e-06, + "loss": 0.9099, + "step": 62725 + }, + { + "epoch": 0.9, + "grad_norm": 0.609375, + "learning_rate": 6.0509246132417705e-06, + "loss": 1.0154, + "step": 62730 + }, + { + "epoch": 0.9, + "grad_norm": 0.52734375, + "learning_rate": 6.042350799508434e-06, + "loss": 0.9946, + "step": 62735 + }, + { + "epoch": 0.9, + "grad_norm": 0.55078125, + "learning_rate": 6.0337828751475535e-06, + "loss": 0.9308, + "step": 62740 + }, + { + "epoch": 0.9, + "grad_norm": 0.5234375, + "learning_rate": 6.025220840696211e-06, + "loss": 0.9511, + "step": 62745 + }, + { + "epoch": 0.9, + "grad_norm": 0.48828125, + "learning_rate": 6.0166646966910325e-06, + "loss": 0.8393, + "step": 62750 + }, + { + "epoch": 0.9, + "grad_norm": 0.57421875, + "learning_rate": 6.008114443668334e-06, + "loss": 0.873, + "step": 62755 + }, + { + "epoch": 0.9, + "grad_norm": 0.5546875, + "learning_rate": 5.999570082164096e-06, + "loss": 0.9249, + "step": 62760 + }, + { + "epoch": 0.9, + "grad_norm": 0.55078125, + "learning_rate": 5.991031612713849e-06, + "loss": 1.0535, + "step": 62765 + }, + { + "epoch": 0.9, + "grad_norm": 0.47265625, + "learning_rate": 5.982499035852795e-06, + "loss": 1.0427, + "step": 62770 + }, + { + "epoch": 0.9, + "grad_norm": 0.466796875, + "learning_rate": 5.9739723521158084e-06, + "loss": 0.9109, + "step": 62775 + }, + { + "epoch": 0.9, + "grad_norm": 0.703125, + "learning_rate": 5.965451562037294e-06, + "loss": 0.916, + "step": 62780 + }, + { + "epoch": 0.9, + "grad_norm": 0.56640625, + "learning_rate": 5.95693666615138e-06, + "loss": 0.944, + "step": 62785 + }, + { + "epoch": 0.9, + "grad_norm": 0.55859375, + "learning_rate": 5.948427664991796e-06, + "loss": 0.9449, + "step": 62790 + }, + { + "epoch": 0.9, + "grad_norm": 0.58203125, + "learning_rate": 5.9399245590918805e-06, + "loss": 0.8104, + "step": 62795 + }, + { + "epoch": 0.9, + "grad_norm": 0.54296875, + "learning_rate": 5.931427348984608e-06, + "loss": 1.0679, + "step": 62800 + }, + { + "epoch": 0.9, + "grad_norm": 0.55078125, + "learning_rate": 5.922936035202598e-06, + "loss": 0.8095, + "step": 62805 + }, + { + "epoch": 0.9, + "grad_norm": 0.5703125, + "learning_rate": 5.9144506182781225e-06, + "loss": 0.953, + "step": 62810 + }, + { + "epoch": 0.9, + "grad_norm": 0.59375, + "learning_rate": 5.905971098743013e-06, + "loss": 0.9414, + "step": 62815 + }, + { + "epoch": 0.9, + "grad_norm": 0.53125, + "learning_rate": 5.897497477128811e-06, + "loss": 0.8741, + "step": 62820 + }, + { + "epoch": 0.9, + "grad_norm": 0.53125, + "learning_rate": 5.889029753966646e-06, + "loss": 0.9239, + "step": 62825 + }, + { + "epoch": 0.9, + "grad_norm": 0.54296875, + "learning_rate": 5.880567929787273e-06, + "loss": 0.8671, + "step": 62830 + }, + { + "epoch": 0.9, + "grad_norm": 0.52734375, + "learning_rate": 5.872112005121089e-06, + "loss": 0.9372, + "step": 62835 + }, + { + "epoch": 0.9, + "grad_norm": 0.55859375, + "learning_rate": 5.863661980498137e-06, + "loss": 0.887, + "step": 62840 + }, + { + "epoch": 0.9, + "grad_norm": 0.55078125, + "learning_rate": 5.855217856448058e-06, + "loss": 1.017, + "step": 62845 + }, + { + "epoch": 0.9, + "grad_norm": 0.546875, + "learning_rate": 5.846779633500155e-06, + "loss": 0.8264, + "step": 62850 + }, + { + "epoch": 0.9, + "grad_norm": 0.51171875, + "learning_rate": 5.8383473121833455e-06, + "loss": 0.9504, + "step": 62855 + }, + { + "epoch": 0.9, + "grad_norm": 0.53515625, + "learning_rate": 5.829920893026142e-06, + "loss": 1.0121, + "step": 62860 + }, + { + "epoch": 0.9, + "grad_norm": 0.625, + "learning_rate": 5.8215003765567545e-06, + "loss": 0.9497, + "step": 62865 + }, + { + "epoch": 0.9, + "grad_norm": 0.50390625, + "learning_rate": 5.813085763302994e-06, + "loss": 0.8942, + "step": 62870 + }, + { + "epoch": 0.9, + "grad_norm": 0.45703125, + "learning_rate": 5.804677053792284e-06, + "loss": 0.9689, + "step": 62875 + }, + { + "epoch": 0.9, + "grad_norm": 0.56640625, + "learning_rate": 5.796274248551681e-06, + "loss": 0.9884, + "step": 62880 + }, + { + "epoch": 0.9, + "grad_norm": 0.59375, + "learning_rate": 5.787877348107918e-06, + "loss": 0.9302, + "step": 62885 + }, + { + "epoch": 0.9, + "grad_norm": 0.56640625, + "learning_rate": 5.779486352987285e-06, + "loss": 1.1086, + "step": 62890 + }, + { + "epoch": 0.9, + "grad_norm": 0.5703125, + "learning_rate": 5.771101263715761e-06, + "loss": 0.9932, + "step": 62895 + }, + { + "epoch": 0.9, + "grad_norm": 0.62890625, + "learning_rate": 5.762722080818939e-06, + "loss": 1.0333, + "step": 62900 + }, + { + "epoch": 0.9, + "grad_norm": 0.625, + "learning_rate": 5.754348804822018e-06, + "loss": 0.9, + "step": 62905 + }, + { + "epoch": 0.9, + "grad_norm": 0.546875, + "learning_rate": 5.745981436249847e-06, + "loss": 0.8931, + "step": 62910 + }, + { + "epoch": 0.9, + "grad_norm": 0.51953125, + "learning_rate": 5.737619975626907e-06, + "loss": 0.8853, + "step": 62915 + }, + { + "epoch": 0.9, + "grad_norm": 0.76953125, + "learning_rate": 5.7292644234773096e-06, + "loss": 0.9668, + "step": 62920 + }, + { + "epoch": 0.9, + "grad_norm": 0.5546875, + "learning_rate": 5.720914780324771e-06, + "loss": 0.9179, + "step": 62925 + }, + { + "epoch": 0.9, + "grad_norm": 0.5390625, + "learning_rate": 5.712571046692661e-06, + "loss": 0.9579, + "step": 62930 + }, + { + "epoch": 0.9, + "grad_norm": 0.671875, + "learning_rate": 5.704233223104005e-06, + "loss": 1.0306, + "step": 62935 + }, + { + "epoch": 0.9, + "grad_norm": 0.53125, + "learning_rate": 5.695901310081386e-06, + "loss": 0.9032, + "step": 62940 + }, + { + "epoch": 0.9, + "grad_norm": 0.55078125, + "learning_rate": 5.687575308147086e-06, + "loss": 0.9209, + "step": 62945 + }, + { + "epoch": 0.9, + "grad_norm": 0.55859375, + "learning_rate": 5.679255217822987e-06, + "loss": 0.9953, + "step": 62950 + }, + { + "epoch": 0.9, + "grad_norm": 0.5234375, + "learning_rate": 5.670941039630595e-06, + "loss": 0.9111, + "step": 62955 + }, + { + "epoch": 0.9, + "grad_norm": 0.609375, + "learning_rate": 5.662632774091026e-06, + "loss": 0.9192, + "step": 62960 + }, + { + "epoch": 0.9, + "grad_norm": 0.59375, + "learning_rate": 5.654330421725085e-06, + "loss": 0.8808, + "step": 62965 + }, + { + "epoch": 0.9, + "grad_norm": 0.59375, + "learning_rate": 5.646033983053178e-06, + "loss": 1.0373, + "step": 62970 + }, + { + "epoch": 0.9, + "grad_norm": 0.515625, + "learning_rate": 5.637743458595302e-06, + "loss": 0.9083, + "step": 62975 + }, + { + "epoch": 0.9, + "grad_norm": 0.6015625, + "learning_rate": 5.6294588488711385e-06, + "loss": 0.9685, + "step": 62980 + }, + { + "epoch": 0.9, + "grad_norm": 0.5390625, + "learning_rate": 5.621180154399996e-06, + "loss": 0.8314, + "step": 62985 + }, + { + "epoch": 0.9, + "grad_norm": 0.58984375, + "learning_rate": 5.612907375700749e-06, + "loss": 0.9279, + "step": 62990 + }, + { + "epoch": 0.9, + "grad_norm": 0.54296875, + "learning_rate": 5.60464051329197e-06, + "loss": 0.7895, + "step": 62995 + }, + { + "epoch": 0.9, + "grad_norm": 0.5234375, + "learning_rate": 5.596379567691834e-06, + "loss": 1.0582, + "step": 63000 + }, + { + "epoch": 0.9, + "grad_norm": 0.6015625, + "learning_rate": 5.588124539418127e-06, + "loss": 1.0279, + "step": 63005 + }, + { + "epoch": 0.9, + "grad_norm": 0.4921875, + "learning_rate": 5.579875428988324e-06, + "loss": 0.9399, + "step": 63010 + }, + { + "epoch": 0.9, + "grad_norm": 0.6796875, + "learning_rate": 5.571632236919466e-06, + "loss": 1.0019, + "step": 63015 + }, + { + "epoch": 0.9, + "grad_norm": 0.625, + "learning_rate": 5.563394963728219e-06, + "loss": 1.0214, + "step": 63020 + }, + { + "epoch": 0.9, + "grad_norm": 0.60546875, + "learning_rate": 5.555163609930947e-06, + "loss": 0.9731, + "step": 63025 + }, + { + "epoch": 0.9, + "grad_norm": 0.5703125, + "learning_rate": 5.546938176043581e-06, + "loss": 0.9859, + "step": 63030 + }, + { + "epoch": 0.9, + "grad_norm": 0.5234375, + "learning_rate": 5.538718662581699e-06, + "loss": 0.9335, + "step": 63035 + }, + { + "epoch": 0.9, + "grad_norm": 0.546875, + "learning_rate": 5.53050507006051e-06, + "loss": 0.9076, + "step": 63040 + }, + { + "epoch": 0.9, + "grad_norm": 0.57421875, + "learning_rate": 5.522297398994869e-06, + "loss": 0.9375, + "step": 63045 + }, + { + "epoch": 0.9, + "grad_norm": 0.5703125, + "learning_rate": 5.51409564989922e-06, + "loss": 0.9551, + "step": 63050 + }, + { + "epoch": 0.9, + "grad_norm": 0.68359375, + "learning_rate": 5.505899823287663e-06, + "loss": 1.0059, + "step": 63055 + }, + { + "epoch": 0.9, + "grad_norm": 0.703125, + "learning_rate": 5.4977099196739324e-06, + "loss": 1.0367, + "step": 63060 + }, + { + "epoch": 0.9, + "grad_norm": 0.515625, + "learning_rate": 5.489525939571383e-06, + "loss": 0.9067, + "step": 63065 + }, + { + "epoch": 0.9, + "grad_norm": 0.546875, + "learning_rate": 5.48134788349296e-06, + "loss": 0.9347, + "step": 63070 + }, + { + "epoch": 0.9, + "grad_norm": 0.546875, + "learning_rate": 5.47317575195131e-06, + "loss": 0.866, + "step": 63075 + }, + { + "epoch": 0.9, + "grad_norm": 0.498046875, + "learning_rate": 5.465009545458666e-06, + "loss": 0.8887, + "step": 63080 + }, + { + "epoch": 0.9, + "grad_norm": 0.5546875, + "learning_rate": 5.456849264526887e-06, + "loss": 0.9003, + "step": 63085 + }, + { + "epoch": 0.9, + "grad_norm": 0.54296875, + "learning_rate": 5.448694909667462e-06, + "loss": 1.0918, + "step": 63090 + }, + { + "epoch": 0.91, + "grad_norm": 0.66796875, + "learning_rate": 5.4405464813915395e-06, + "loss": 0.9515, + "step": 63095 + }, + { + "epoch": 0.91, + "grad_norm": 0.478515625, + "learning_rate": 5.4324039802098544e-06, + "loss": 1.002, + "step": 63100 + }, + { + "epoch": 0.91, + "grad_norm": 0.52734375, + "learning_rate": 5.424267406632777e-06, + "loss": 0.9115, + "step": 63105 + }, + { + "epoch": 0.91, + "grad_norm": 0.5234375, + "learning_rate": 5.416136761170354e-06, + "loss": 0.9534, + "step": 63110 + }, + { + "epoch": 0.91, + "grad_norm": 0.56640625, + "learning_rate": 5.4080120443322e-06, + "loss": 0.7717, + "step": 63115 + }, + { + "epoch": 0.91, + "grad_norm": 0.55859375, + "learning_rate": 5.399893256627564e-06, + "loss": 1.0774, + "step": 63120 + }, + { + "epoch": 0.91, + "grad_norm": 0.48828125, + "learning_rate": 5.391780398565383e-06, + "loss": 0.8528, + "step": 63125 + }, + { + "epoch": 0.91, + "grad_norm": 0.51953125, + "learning_rate": 5.3836734706541385e-06, + "loss": 1.0441, + "step": 63130 + }, + { + "epoch": 0.91, + "grad_norm": 0.6171875, + "learning_rate": 5.375572473401991e-06, + "loss": 1.0177, + "step": 63135 + }, + { + "epoch": 0.91, + "grad_norm": 0.52734375, + "learning_rate": 5.367477407316745e-06, + "loss": 0.927, + "step": 63140 + }, + { + "epoch": 0.91, + "grad_norm": 0.53125, + "learning_rate": 5.359388272905785e-06, + "loss": 0.9415, + "step": 63145 + }, + { + "epoch": 0.91, + "grad_norm": 0.53515625, + "learning_rate": 5.35130507067616e-06, + "loss": 1.005, + "step": 63150 + }, + { + "epoch": 0.91, + "grad_norm": 0.65625, + "learning_rate": 5.343227801134532e-06, + "loss": 0.8438, + "step": 63155 + }, + { + "epoch": 0.91, + "grad_norm": 0.55078125, + "learning_rate": 5.335156464787183e-06, + "loss": 0.8493, + "step": 63160 + }, + { + "epoch": 0.91, + "grad_norm": 0.51171875, + "learning_rate": 5.3270910621400435e-06, + "loss": 1.1147, + "step": 63165 + }, + { + "epoch": 0.91, + "grad_norm": 0.6171875, + "learning_rate": 5.319031593698653e-06, + "loss": 0.9504, + "step": 63170 + }, + { + "epoch": 0.91, + "grad_norm": 0.5546875, + "learning_rate": 5.310978059968219e-06, + "loss": 0.9684, + "step": 63175 + }, + { + "epoch": 0.91, + "grad_norm": 0.55859375, + "learning_rate": 5.302930461453492e-06, + "loss": 0.8936, + "step": 63180 + }, + { + "epoch": 0.91, + "grad_norm": 0.52734375, + "learning_rate": 5.294888798658948e-06, + "loss": 0.9478, + "step": 63185 + }, + { + "epoch": 0.91, + "grad_norm": 0.5234375, + "learning_rate": 5.286853072088638e-06, + "loss": 1.0088, + "step": 63190 + }, + { + "epoch": 0.91, + "grad_norm": 0.5078125, + "learning_rate": 5.278823282246237e-06, + "loss": 0.9158, + "step": 63195 + }, + { + "epoch": 0.91, + "grad_norm": 0.55859375, + "learning_rate": 5.270799429635065e-06, + "loss": 0.904, + "step": 63200 + }, + { + "epoch": 0.91, + "grad_norm": 0.56640625, + "learning_rate": 5.262781514758097e-06, + "loss": 1.006, + "step": 63205 + }, + { + "epoch": 0.91, + "grad_norm": 0.61328125, + "learning_rate": 5.254769538117854e-06, + "loss": 0.9891, + "step": 63210 + }, + { + "epoch": 0.91, + "grad_norm": 0.5703125, + "learning_rate": 5.246763500216578e-06, + "loss": 0.9617, + "step": 63215 + }, + { + "epoch": 0.91, + "grad_norm": 0.5859375, + "learning_rate": 5.23876340155609e-06, + "loss": 0.9842, + "step": 63220 + }, + { + "epoch": 0.91, + "grad_norm": 0.515625, + "learning_rate": 5.2307692426378226e-06, + "loss": 0.948, + "step": 63225 + }, + { + "epoch": 0.91, + "grad_norm": 0.5625, + "learning_rate": 5.2227810239628635e-06, + "loss": 0.9327, + "step": 63230 + }, + { + "epoch": 0.91, + "grad_norm": 0.55859375, + "learning_rate": 5.214798746031957e-06, + "loss": 0.9766, + "step": 63235 + }, + { + "epoch": 0.91, + "grad_norm": 0.56640625, + "learning_rate": 5.206822409345391e-06, + "loss": 0.9396, + "step": 63240 + }, + { + "epoch": 0.91, + "grad_norm": 0.5546875, + "learning_rate": 5.198852014403166e-06, + "loss": 1.098, + "step": 63245 + }, + { + "epoch": 0.91, + "grad_norm": 0.65234375, + "learning_rate": 5.190887561704871e-06, + "loss": 0.8943, + "step": 63250 + }, + { + "epoch": 0.91, + "grad_norm": 0.59765625, + "learning_rate": 5.182929051749708e-06, + "loss": 0.9164, + "step": 63255 + }, + { + "epoch": 0.91, + "grad_norm": 0.546875, + "learning_rate": 5.174976485036542e-06, + "loss": 0.9736, + "step": 63260 + }, + { + "epoch": 0.91, + "grad_norm": 0.6015625, + "learning_rate": 5.167029862063865e-06, + "loss": 1.0046, + "step": 63265 + }, + { + "epoch": 0.91, + "grad_norm": 0.6015625, + "learning_rate": 5.159089183329757e-06, + "loss": 0.8295, + "step": 63270 + }, + { + "epoch": 0.91, + "grad_norm": 0.59765625, + "learning_rate": 5.1511544493319515e-06, + "loss": 0.925, + "step": 63275 + }, + { + "epoch": 0.91, + "grad_norm": 0.58984375, + "learning_rate": 5.143225660567796e-06, + "loss": 1.0085, + "step": 63280 + }, + { + "epoch": 0.91, + "grad_norm": 0.59765625, + "learning_rate": 5.135302817534304e-06, + "loss": 1.0287, + "step": 63285 + }, + { + "epoch": 0.91, + "grad_norm": 0.5625, + "learning_rate": 5.127385920728067e-06, + "loss": 0.9187, + "step": 63290 + }, + { + "epoch": 0.91, + "grad_norm": 0.53125, + "learning_rate": 5.119474970645322e-06, + "loss": 0.8212, + "step": 63295 + }, + { + "epoch": 0.91, + "grad_norm": 0.55859375, + "learning_rate": 5.111569967781959e-06, + "loss": 1.0078, + "step": 63300 + }, + { + "epoch": 0.91, + "grad_norm": 0.51953125, + "learning_rate": 5.103670912633451e-06, + "loss": 0.9496, + "step": 63305 + }, + { + "epoch": 0.91, + "grad_norm": 0.482421875, + "learning_rate": 5.095777805694935e-06, + "loss": 0.9652, + "step": 63310 + }, + { + "epoch": 0.91, + "grad_norm": 0.5, + "learning_rate": 5.0878906474611574e-06, + "loss": 0.922, + "step": 63315 + }, + { + "epoch": 0.91, + "grad_norm": 0.671875, + "learning_rate": 5.0800094384264694e-06, + "loss": 1.0818, + "step": 63320 + }, + { + "epoch": 0.91, + "grad_norm": 0.5625, + "learning_rate": 5.07213417908492e-06, + "loss": 0.8417, + "step": 63325 + }, + { + "epoch": 0.91, + "grad_norm": 0.625, + "learning_rate": 5.064264869930113e-06, + "loss": 0.9057, + "step": 63330 + }, + { + "epoch": 0.91, + "grad_norm": 0.671875, + "learning_rate": 5.056401511455288e-06, + "loss": 0.9798, + "step": 63335 + }, + { + "epoch": 0.91, + "grad_norm": 0.59375, + "learning_rate": 5.048544104153352e-06, + "loss": 0.982, + "step": 63340 + }, + { + "epoch": 0.91, + "grad_norm": 0.55859375, + "learning_rate": 5.040692648516821e-06, + "loss": 0.927, + "step": 63345 + }, + { + "epoch": 0.91, + "grad_norm": 0.5546875, + "learning_rate": 5.032847145037811e-06, + "loss": 1.0344, + "step": 63350 + }, + { + "epoch": 0.91, + "grad_norm": 0.5234375, + "learning_rate": 5.025007594208109e-06, + "loss": 0.8876, + "step": 63355 + }, + { + "epoch": 0.91, + "grad_norm": 0.609375, + "learning_rate": 5.0171739965191085e-06, + "loss": 0.9278, + "step": 63360 + }, + { + "epoch": 0.91, + "grad_norm": 0.59765625, + "learning_rate": 5.0093463524617965e-06, + "loss": 0.8402, + "step": 63365 + }, + { + "epoch": 0.91, + "grad_norm": 0.546875, + "learning_rate": 5.001524662526846e-06, + "loss": 0.9543, + "step": 63370 + }, + { + "epoch": 0.91, + "grad_norm": 0.58984375, + "learning_rate": 4.993708927204543e-06, + "loss": 1.1113, + "step": 63375 + }, + { + "epoch": 0.91, + "grad_norm": 0.51171875, + "learning_rate": 4.985899146984762e-06, + "loss": 0.9639, + "step": 63380 + }, + { + "epoch": 0.91, + "grad_norm": 0.61328125, + "learning_rate": 4.978095322357024e-06, + "loss": 1.0694, + "step": 63385 + }, + { + "epoch": 0.91, + "grad_norm": 0.50390625, + "learning_rate": 4.97029745381048e-06, + "loss": 0.732, + "step": 63390 + }, + { + "epoch": 0.91, + "grad_norm": 0.59375, + "learning_rate": 4.9625055418339505e-06, + "loss": 1.0459, + "step": 63395 + }, + { + "epoch": 0.91, + "grad_norm": 0.57421875, + "learning_rate": 4.954719586915791e-06, + "loss": 0.9727, + "step": 63400 + }, + { + "epoch": 0.91, + "grad_norm": 0.447265625, + "learning_rate": 4.946939589544053e-06, + "loss": 0.9419, + "step": 63405 + }, + { + "epoch": 0.91, + "grad_norm": 0.6171875, + "learning_rate": 4.939165550206415e-06, + "loss": 0.7805, + "step": 63410 + }, + { + "epoch": 0.91, + "grad_norm": 0.59765625, + "learning_rate": 4.931397469390131e-06, + "loss": 0.9549, + "step": 63415 + }, + { + "epoch": 0.91, + "grad_norm": 0.5546875, + "learning_rate": 4.923635347582134e-06, + "loss": 1.0289, + "step": 63420 + }, + { + "epoch": 0.91, + "grad_norm": 0.5546875, + "learning_rate": 4.915879185268968e-06, + "loss": 0.8387, + "step": 63425 + }, + { + "epoch": 0.91, + "grad_norm": 0.53515625, + "learning_rate": 4.908128982936777e-06, + "loss": 0.9095, + "step": 63430 + }, + { + "epoch": 0.91, + "grad_norm": 0.5703125, + "learning_rate": 4.900384741071362e-06, + "loss": 0.988, + "step": 63435 + }, + { + "epoch": 0.91, + "grad_norm": 0.5390625, + "learning_rate": 4.892646460158146e-06, + "loss": 0.9183, + "step": 63440 + }, + { + "epoch": 0.91, + "grad_norm": 0.6015625, + "learning_rate": 4.884914140682151e-06, + "loss": 0.9776, + "step": 63445 + }, + { + "epoch": 0.91, + "grad_norm": 0.68359375, + "learning_rate": 4.877187783128068e-06, + "loss": 0.9776, + "step": 63450 + }, + { + "epoch": 0.91, + "grad_norm": 0.63671875, + "learning_rate": 4.869467387980209e-06, + "loss": 1.0015, + "step": 63455 + }, + { + "epoch": 0.91, + "grad_norm": 0.4765625, + "learning_rate": 4.861752955722454e-06, + "loss": 0.7547, + "step": 63460 + }, + { + "epoch": 0.91, + "grad_norm": 0.51171875, + "learning_rate": 4.8540444868383935e-06, + "loss": 1.2453, + "step": 63465 + }, + { + "epoch": 0.91, + "grad_norm": 0.54296875, + "learning_rate": 4.846341981811187e-06, + "loss": 0.8749, + "step": 63470 + }, + { + "epoch": 0.91, + "grad_norm": 0.67578125, + "learning_rate": 4.838645441123623e-06, + "loss": 1.0333, + "step": 63475 + }, + { + "epoch": 0.91, + "grad_norm": 0.75390625, + "learning_rate": 4.830954865258164e-06, + "loss": 0.931, + "step": 63480 + }, + { + "epoch": 0.91, + "grad_norm": 0.515625, + "learning_rate": 4.823270254696821e-06, + "loss": 0.9601, + "step": 63485 + }, + { + "epoch": 0.91, + "grad_norm": 0.494140625, + "learning_rate": 4.815591609921322e-06, + "loss": 0.8998, + "step": 63490 + }, + { + "epoch": 0.91, + "grad_norm": 0.69921875, + "learning_rate": 4.807918931412914e-06, + "loss": 1.0241, + "step": 63495 + }, + { + "epoch": 0.91, + "grad_norm": 0.640625, + "learning_rate": 4.800252219652579e-06, + "loss": 0.9631, + "step": 63500 + }, + { + "epoch": 0.91, + "grad_norm": 0.59765625, + "learning_rate": 4.792591475120867e-06, + "loss": 1.09, + "step": 63505 + }, + { + "epoch": 0.91, + "grad_norm": 0.55078125, + "learning_rate": 4.784936698297937e-06, + "loss": 0.9044, + "step": 63510 + }, + { + "epoch": 0.91, + "grad_norm": 0.51171875, + "learning_rate": 4.777287889663618e-06, + "loss": 0.9348, + "step": 63515 + }, + { + "epoch": 0.91, + "grad_norm": 0.53125, + "learning_rate": 4.7696450496973464e-06, + "loss": 0.9964, + "step": 63520 + }, + { + "epoch": 0.91, + "grad_norm": 0.5859375, + "learning_rate": 4.762008178878185e-06, + "loss": 0.8946, + "step": 63525 + }, + { + "epoch": 0.91, + "grad_norm": 0.61328125, + "learning_rate": 4.754377277684807e-06, + "loss": 1.025, + "step": 63530 + }, + { + "epoch": 0.91, + "grad_norm": 0.51953125, + "learning_rate": 4.746752346595562e-06, + "loss": 0.9254, + "step": 63535 + }, + { + "epoch": 0.91, + "grad_norm": 0.54296875, + "learning_rate": 4.739133386088345e-06, + "loss": 1.1481, + "step": 63540 + }, + { + "epoch": 0.91, + "grad_norm": 0.609375, + "learning_rate": 4.7315203966407425e-06, + "loss": 1.0514, + "step": 63545 + }, + { + "epoch": 0.91, + "grad_norm": 0.61328125, + "learning_rate": 4.723913378729949e-06, + "loss": 1.0534, + "step": 63550 + }, + { + "epoch": 0.91, + "grad_norm": 0.5234375, + "learning_rate": 4.716312332832762e-06, + "loss": 0.9579, + "step": 63555 + }, + { + "epoch": 0.91, + "grad_norm": 0.578125, + "learning_rate": 4.708717259425644e-06, + "loss": 0.9386, + "step": 63560 + }, + { + "epoch": 0.91, + "grad_norm": 0.470703125, + "learning_rate": 4.701128158984658e-06, + "loss": 0.8514, + "step": 63565 + }, + { + "epoch": 0.91, + "grad_norm": 0.490234375, + "learning_rate": 4.693545031985491e-06, + "loss": 0.9786, + "step": 63570 + }, + { + "epoch": 0.91, + "grad_norm": 0.5234375, + "learning_rate": 4.685967878903463e-06, + "loss": 0.8708, + "step": 63575 + }, + { + "epoch": 0.91, + "grad_norm": 0.447265625, + "learning_rate": 4.678396700213539e-06, + "loss": 0.9327, + "step": 63580 + }, + { + "epoch": 0.91, + "grad_norm": 0.56640625, + "learning_rate": 4.670831496390271e-06, + "loss": 0.9573, + "step": 63585 + }, + { + "epoch": 0.91, + "grad_norm": 0.546875, + "learning_rate": 4.6632722679078366e-06, + "loss": 0.9043, + "step": 63590 + }, + { + "epoch": 0.91, + "grad_norm": 0.55859375, + "learning_rate": 4.65571901524009e-06, + "loss": 0.9097, + "step": 63595 + }, + { + "epoch": 0.91, + "grad_norm": 0.57421875, + "learning_rate": 4.648171738860463e-06, + "loss": 1.0028, + "step": 63600 + }, + { + "epoch": 0.91, + "grad_norm": 0.54296875, + "learning_rate": 4.640630439242022e-06, + "loss": 0.8418, + "step": 63605 + }, + { + "epoch": 0.91, + "grad_norm": 0.57421875, + "learning_rate": 4.633095116857467e-06, + "loss": 0.9803, + "step": 63610 + }, + { + "epoch": 0.91, + "grad_norm": 0.515625, + "learning_rate": 4.62556577217913e-06, + "loss": 0.859, + "step": 63615 + }, + { + "epoch": 0.91, + "grad_norm": 0.482421875, + "learning_rate": 4.618042405678946e-06, + "loss": 0.9136, + "step": 63620 + }, + { + "epoch": 0.91, + "grad_norm": 0.640625, + "learning_rate": 4.610525017828504e-06, + "loss": 0.9965, + "step": 63625 + }, + { + "epoch": 0.91, + "grad_norm": 0.5390625, + "learning_rate": 4.603013609098994e-06, + "loss": 0.8447, + "step": 63630 + }, + { + "epoch": 0.91, + "grad_norm": 0.62890625, + "learning_rate": 4.595508179961228e-06, + "loss": 1.0054, + "step": 63635 + }, + { + "epoch": 0.91, + "grad_norm": 0.59375, + "learning_rate": 4.588008730885685e-06, + "loss": 0.9298, + "step": 63640 + }, + { + "epoch": 0.91, + "grad_norm": 0.6640625, + "learning_rate": 4.5805152623424e-06, + "loss": 0.9792, + "step": 63645 + }, + { + "epoch": 0.91, + "grad_norm": 0.578125, + "learning_rate": 4.573027774801109e-06, + "loss": 0.8879, + "step": 63650 + }, + { + "epoch": 0.91, + "grad_norm": 0.55078125, + "learning_rate": 4.565546268731102e-06, + "loss": 0.9782, + "step": 63655 + }, + { + "epoch": 0.91, + "grad_norm": 0.69921875, + "learning_rate": 4.55807074460135e-06, + "loss": 0.885, + "step": 63660 + }, + { + "epoch": 0.91, + "grad_norm": 0.54296875, + "learning_rate": 4.550601202880433e-06, + "loss": 0.867, + "step": 63665 + }, + { + "epoch": 0.91, + "grad_norm": 0.59375, + "learning_rate": 4.543137644036533e-06, + "loss": 1.1561, + "step": 63670 + }, + { + "epoch": 0.91, + "grad_norm": 0.546875, + "learning_rate": 4.5356800685374955e-06, + "loss": 0.9632, + "step": 63675 + }, + { + "epoch": 0.91, + "grad_norm": 0.58203125, + "learning_rate": 4.5282284768507375e-06, + "loss": 0.9658, + "step": 63680 + }, + { + "epoch": 0.91, + "grad_norm": 0.57421875, + "learning_rate": 4.520782869443352e-06, + "loss": 0.9817, + "step": 63685 + }, + { + "epoch": 0.91, + "grad_norm": 0.5625, + "learning_rate": 4.513343246782043e-06, + "loss": 0.8341, + "step": 63690 + }, + { + "epoch": 0.91, + "grad_norm": 0.5234375, + "learning_rate": 4.505909609333147e-06, + "loss": 0.8791, + "step": 63695 + }, + { + "epoch": 0.91, + "grad_norm": 0.56640625, + "learning_rate": 4.498481957562573e-06, + "loss": 0.9347, + "step": 63700 + }, + { + "epoch": 0.91, + "grad_norm": 0.55078125, + "learning_rate": 4.491060291935911e-06, + "loss": 0.8894, + "step": 63705 + }, + { + "epoch": 0.91, + "grad_norm": 0.52734375, + "learning_rate": 4.4836446129183914e-06, + "loss": 0.8209, + "step": 63710 + }, + { + "epoch": 0.91, + "grad_norm": 0.45703125, + "learning_rate": 4.476234920974787e-06, + "loss": 0.9218, + "step": 63715 + }, + { + "epoch": 0.91, + "grad_norm": 0.5390625, + "learning_rate": 4.46883121656958e-06, + "loss": 0.8734, + "step": 63720 + }, + { + "epoch": 0.91, + "grad_norm": 0.6953125, + "learning_rate": 4.461433500166834e-06, + "loss": 0.9371, + "step": 63725 + }, + { + "epoch": 0.91, + "grad_norm": 0.55859375, + "learning_rate": 4.454041772230244e-06, + "loss": 1.0467, + "step": 63730 + }, + { + "epoch": 0.91, + "grad_norm": 0.56640625, + "learning_rate": 4.446656033223129e-06, + "loss": 0.9309, + "step": 63735 + }, + { + "epoch": 0.91, + "grad_norm": 0.53125, + "learning_rate": 4.439276283608451e-06, + "loss": 0.9906, + "step": 63740 + }, + { + "epoch": 0.91, + "grad_norm": 0.5859375, + "learning_rate": 4.431902523848774e-06, + "loss": 0.8913, + "step": 63745 + }, + { + "epoch": 0.91, + "grad_norm": 0.5625, + "learning_rate": 4.424534754406273e-06, + "loss": 0.8362, + "step": 63750 + }, + { + "epoch": 0.91, + "grad_norm": 0.69140625, + "learning_rate": 4.417172975742789e-06, + "loss": 0.8761, + "step": 63755 + }, + { + "epoch": 0.91, + "grad_norm": 0.58984375, + "learning_rate": 4.409817188319776e-06, + "loss": 0.9338, + "step": 63760 + }, + { + "epoch": 0.91, + "grad_norm": 0.52734375, + "learning_rate": 4.4024673925982755e-06, + "loss": 0.9317, + "step": 63765 + }, + { + "epoch": 0.91, + "grad_norm": 0.640625, + "learning_rate": 4.395123589038996e-06, + "loss": 1.0069, + "step": 63770 + }, + { + "epoch": 0.91, + "grad_norm": 0.57421875, + "learning_rate": 4.38778577810226e-06, + "loss": 1.0037, + "step": 63775 + }, + { + "epoch": 0.91, + "grad_norm": 0.62109375, + "learning_rate": 4.380453960247999e-06, + "loss": 0.8736, + "step": 63780 + }, + { + "epoch": 0.91, + "grad_norm": 0.56640625, + "learning_rate": 4.373128135935789e-06, + "loss": 1.0341, + "step": 63785 + }, + { + "epoch": 0.92, + "grad_norm": 0.578125, + "learning_rate": 4.365808305624819e-06, + "loss": 1.0015, + "step": 63790 + }, + { + "epoch": 0.92, + "grad_norm": 0.5625, + "learning_rate": 4.35849446977391e-06, + "loss": 0.9519, + "step": 63795 + }, + { + "epoch": 0.92, + "grad_norm": 0.4921875, + "learning_rate": 4.351186628841486e-06, + "loss": 0.9249, + "step": 63800 + }, + { + "epoch": 0.92, + "grad_norm": 0.52734375, + "learning_rate": 4.343884783285623e-06, + "loss": 0.9506, + "step": 63805 + }, + { + "epoch": 0.92, + "grad_norm": 0.5625, + "learning_rate": 4.33658893356399e-06, + "loss": 0.9668, + "step": 63810 + }, + { + "epoch": 0.92, + "grad_norm": 0.546875, + "learning_rate": 4.3292990801339196e-06, + "loss": 0.8758, + "step": 63815 + }, + { + "epoch": 0.92, + "grad_norm": 0.5703125, + "learning_rate": 4.322015223452358e-06, + "loss": 1.2518, + "step": 63820 + }, + { + "epoch": 0.92, + "grad_norm": 0.5546875, + "learning_rate": 4.314737363975829e-06, + "loss": 0.8206, + "step": 63825 + }, + { + "epoch": 0.92, + "grad_norm": 0.6484375, + "learning_rate": 4.307465502160546e-06, + "loss": 0.9852, + "step": 63830 + }, + { + "epoch": 0.92, + "grad_norm": 0.55859375, + "learning_rate": 4.300199638462321e-06, + "loss": 0.9431, + "step": 63835 + }, + { + "epoch": 0.92, + "grad_norm": 0.478515625, + "learning_rate": 4.292939773336569e-06, + "loss": 0.9671, + "step": 63840 + }, + { + "epoch": 0.92, + "grad_norm": 0.5078125, + "learning_rate": 4.285685907238346e-06, + "loss": 1.0574, + "step": 63845 + }, + { + "epoch": 0.92, + "grad_norm": 0.609375, + "learning_rate": 4.278438040622346e-06, + "loss": 0.9514, + "step": 63850 + }, + { + "epoch": 0.92, + "grad_norm": 0.51171875, + "learning_rate": 4.271196173942882e-06, + "loss": 0.8474, + "step": 63855 + }, + { + "epoch": 0.92, + "grad_norm": 0.578125, + "learning_rate": 4.263960307653847e-06, + "loss": 0.9748, + "step": 63860 + }, + { + "epoch": 0.92, + "grad_norm": 0.55859375, + "learning_rate": 4.256730442208812e-06, + "loss": 1.079, + "step": 63865 + }, + { + "epoch": 0.92, + "grad_norm": 0.51953125, + "learning_rate": 4.249506578060969e-06, + "loss": 0.8845, + "step": 63870 + }, + { + "epoch": 0.92, + "grad_norm": 0.625, + "learning_rate": 4.2422887156630894e-06, + "loss": 1.0111, + "step": 63875 + }, + { + "epoch": 0.92, + "grad_norm": 0.5, + "learning_rate": 4.235076855467623e-06, + "loss": 1.1052, + "step": 63880 + }, + { + "epoch": 0.92, + "grad_norm": 0.5390625, + "learning_rate": 4.227870997926609e-06, + "loss": 0.8858, + "step": 63885 + }, + { + "epoch": 0.92, + "grad_norm": 0.546875, + "learning_rate": 4.220671143491705e-06, + "loss": 0.944, + "step": 63890 + }, + { + "epoch": 0.92, + "grad_norm": 0.6484375, + "learning_rate": 4.213477292614221e-06, + "loss": 1.1542, + "step": 63895 + }, + { + "epoch": 0.92, + "grad_norm": 0.44140625, + "learning_rate": 4.206289445745093e-06, + "loss": 0.8764, + "step": 63900 + }, + { + "epoch": 0.92, + "grad_norm": 0.51953125, + "learning_rate": 4.199107603334818e-06, + "loss": 0.9639, + "step": 63905 + }, + { + "epoch": 0.92, + "grad_norm": 0.6015625, + "learning_rate": 4.19193176583359e-06, + "loss": 0.9392, + "step": 63910 + }, + { + "epoch": 0.92, + "grad_norm": 0.60546875, + "learning_rate": 4.184761933691206e-06, + "loss": 1.0341, + "step": 63915 + }, + { + "epoch": 0.92, + "grad_norm": 0.5078125, + "learning_rate": 4.177598107357061e-06, + "loss": 1.0983, + "step": 63920 + }, + { + "epoch": 0.92, + "grad_norm": 0.66796875, + "learning_rate": 4.170440287280186e-06, + "loss": 0.9701, + "step": 63925 + }, + { + "epoch": 0.92, + "grad_norm": 0.5859375, + "learning_rate": 4.163288473909277e-06, + "loss": 0.9325, + "step": 63930 + }, + { + "epoch": 0.92, + "grad_norm": 0.69921875, + "learning_rate": 4.1561426676925864e-06, + "loss": 0.9986, + "step": 63935 + }, + { + "epoch": 0.92, + "grad_norm": 0.578125, + "learning_rate": 4.1490028690780225e-06, + "loss": 0.9749, + "step": 63940 + }, + { + "epoch": 0.92, + "grad_norm": 0.546875, + "learning_rate": 4.1418690785131384e-06, + "loss": 0.9181, + "step": 63945 + }, + { + "epoch": 0.92, + "grad_norm": 0.50390625, + "learning_rate": 4.134741296445055e-06, + "loss": 1.0318, + "step": 63950 + }, + { + "epoch": 0.92, + "grad_norm": 0.63671875, + "learning_rate": 4.127619523320592e-06, + "loss": 0.9893, + "step": 63955 + }, + { + "epoch": 0.92, + "grad_norm": 0.515625, + "learning_rate": 4.120503759586103e-06, + "loss": 1.0783, + "step": 63960 + }, + { + "epoch": 0.92, + "grad_norm": 0.58203125, + "learning_rate": 4.113394005687654e-06, + "loss": 0.9217, + "step": 63965 + }, + { + "epoch": 0.92, + "grad_norm": 0.58984375, + "learning_rate": 4.106290262070867e-06, + "loss": 1.029, + "step": 63970 + }, + { + "epoch": 0.92, + "grad_norm": 0.5234375, + "learning_rate": 4.099192529181018e-06, + "loss": 1.0799, + "step": 63975 + }, + { + "epoch": 0.92, + "grad_norm": 0.5859375, + "learning_rate": 4.092100807463017e-06, + "loss": 0.9321, + "step": 63980 + }, + { + "epoch": 0.92, + "grad_norm": 0.59375, + "learning_rate": 4.085015097361344e-06, + "loss": 0.9397, + "step": 63985 + }, + { + "epoch": 0.92, + "grad_norm": 0.60546875, + "learning_rate": 4.0779353993201765e-06, + "loss": 0.9828, + "step": 63990 + }, + { + "epoch": 0.92, + "grad_norm": 0.5703125, + "learning_rate": 4.07086171378327e-06, + "loss": 0.8303, + "step": 63995 + }, + { + "epoch": 0.92, + "grad_norm": 0.5390625, + "learning_rate": 4.0637940411939916e-06, + "loss": 0.9768, + "step": 64000 + }, + { + "epoch": 0.92, + "grad_norm": 0.5234375, + "learning_rate": 4.0567323819953765e-06, + "loss": 1.1028, + "step": 64005 + }, + { + "epoch": 0.92, + "grad_norm": 0.5234375, + "learning_rate": 4.049676736630048e-06, + "loss": 0.8675, + "step": 64010 + }, + { + "epoch": 0.92, + "grad_norm": 0.48046875, + "learning_rate": 4.042627105540253e-06, + "loss": 1.008, + "step": 64015 + }, + { + "epoch": 0.92, + "grad_norm": 0.5625, + "learning_rate": 4.0355834891678714e-06, + "loss": 0.84, + "step": 64020 + }, + { + "epoch": 0.92, + "grad_norm": 0.5703125, + "learning_rate": 4.028545887954416e-06, + "loss": 0.9896, + "step": 64025 + }, + { + "epoch": 0.92, + "grad_norm": 0.5703125, + "learning_rate": 4.021514302341012e-06, + "loss": 1.0693, + "step": 64030 + }, + { + "epoch": 0.92, + "grad_norm": 0.6328125, + "learning_rate": 4.014488732768385e-06, + "loss": 1.0449, + "step": 64035 + }, + { + "epoch": 0.92, + "grad_norm": 0.5703125, + "learning_rate": 4.007469179676948e-06, + "loss": 0.9926, + "step": 64040 + }, + { + "epoch": 0.92, + "grad_norm": 0.474609375, + "learning_rate": 4.000455643506651e-06, + "loss": 0.8867, + "step": 64045 + }, + { + "epoch": 0.92, + "grad_norm": 0.50390625, + "learning_rate": 3.99344812469713e-06, + "loss": 0.8337, + "step": 64050 + }, + { + "epoch": 0.92, + "grad_norm": 0.58203125, + "learning_rate": 3.986446623687623e-06, + "loss": 0.9334, + "step": 64055 + }, + { + "epoch": 0.92, + "grad_norm": 0.51171875, + "learning_rate": 3.979451140917012e-06, + "loss": 1.0358, + "step": 64060 + }, + { + "epoch": 0.92, + "grad_norm": 0.58203125, + "learning_rate": 3.972461676823735e-06, + "loss": 0.996, + "step": 64065 + }, + { + "epoch": 0.92, + "grad_norm": 0.56640625, + "learning_rate": 3.965478231845932e-06, + "loss": 0.9938, + "step": 64070 + }, + { + "epoch": 0.92, + "grad_norm": 0.60546875, + "learning_rate": 3.958500806421339e-06, + "loss": 0.9653, + "step": 64075 + }, + { + "epoch": 0.92, + "grad_norm": 0.69140625, + "learning_rate": 3.9515294009872865e-06, + "loss": 0.9159, + "step": 64080 + }, + { + "epoch": 0.92, + "grad_norm": 0.578125, + "learning_rate": 3.9445640159807565e-06, + "loss": 0.8874, + "step": 64085 + }, + { + "epoch": 0.92, + "grad_norm": 0.52734375, + "learning_rate": 3.937604651838367e-06, + "loss": 1.082, + "step": 64090 + }, + { + "epoch": 0.92, + "grad_norm": 0.5703125, + "learning_rate": 3.9306513089963135e-06, + "loss": 0.9247, + "step": 64095 + }, + { + "epoch": 0.92, + "grad_norm": 0.62109375, + "learning_rate": 3.923703987890448e-06, + "loss": 0.9998, + "step": 64100 + }, + { + "epoch": 0.92, + "grad_norm": 0.5234375, + "learning_rate": 3.916762688956255e-06, + "loss": 1.0188, + "step": 64105 + }, + { + "epoch": 0.92, + "grad_norm": 0.58203125, + "learning_rate": 3.909827412628797e-06, + "loss": 0.8621, + "step": 64110 + }, + { + "epoch": 0.92, + "grad_norm": 0.640625, + "learning_rate": 3.9028981593427935e-06, + "loss": 0.9959, + "step": 64115 + }, + { + "epoch": 0.92, + "grad_norm": 0.55078125, + "learning_rate": 3.895974929532587e-06, + "loss": 0.9092, + "step": 64120 + }, + { + "epoch": 0.92, + "grad_norm": 0.6875, + "learning_rate": 3.889057723632106e-06, + "loss": 0.9389, + "step": 64125 + }, + { + "epoch": 0.92, + "grad_norm": 0.5, + "learning_rate": 3.88214654207496e-06, + "loss": 0.8261, + "step": 64130 + }, + { + "epoch": 0.92, + "grad_norm": 0.5390625, + "learning_rate": 3.875241385294337e-06, + "loss": 1.281, + "step": 64135 + }, + { + "epoch": 0.92, + "grad_norm": 0.5234375, + "learning_rate": 3.868342253723056e-06, + "loss": 0.9396, + "step": 64140 + }, + { + "epoch": 0.92, + "grad_norm": 0.58984375, + "learning_rate": 3.861449147793561e-06, + "loss": 0.9935, + "step": 64145 + }, + { + "epoch": 0.92, + "grad_norm": 0.578125, + "learning_rate": 3.85456206793795e-06, + "loss": 0.8586, + "step": 64150 + }, + { + "epoch": 0.92, + "grad_norm": 0.63671875, + "learning_rate": 3.8476810145878675e-06, + "loss": 0.957, + "step": 64155 + }, + { + "epoch": 0.92, + "grad_norm": 0.58984375, + "learning_rate": 3.840805988174656e-06, + "loss": 0.8097, + "step": 64160 + }, + { + "epoch": 0.92, + "grad_norm": 0.52734375, + "learning_rate": 3.833936989129239e-06, + "loss": 0.9352, + "step": 64165 + }, + { + "epoch": 0.92, + "grad_norm": 0.5859375, + "learning_rate": 3.827074017882193e-06, + "loss": 0.8878, + "step": 64170 + }, + { + "epoch": 0.92, + "grad_norm": 0.53125, + "learning_rate": 3.820217074863652e-06, + "loss": 1.034, + "step": 64175 + }, + { + "epoch": 0.92, + "grad_norm": 0.625, + "learning_rate": 3.813366160503451e-06, + "loss": 0.9547, + "step": 64180 + }, + { + "epoch": 0.92, + "grad_norm": 0.6875, + "learning_rate": 3.8065212752310232e-06, + "loss": 0.8868, + "step": 64185 + }, + { + "epoch": 0.92, + "grad_norm": 0.609375, + "learning_rate": 3.79968241947537e-06, + "loss": 0.9713, + "step": 64190 + }, + { + "epoch": 0.92, + "grad_norm": 0.57421875, + "learning_rate": 3.7928495936652043e-06, + "loss": 1.0498, + "step": 64195 + }, + { + "epoch": 0.92, + "grad_norm": 0.52734375, + "learning_rate": 3.7860227982287943e-06, + "loss": 1.0994, + "step": 64200 + }, + { + "epoch": 0.92, + "grad_norm": 0.55859375, + "learning_rate": 3.7792020335940537e-06, + "loss": 1.0165, + "step": 64205 + }, + { + "epoch": 0.92, + "grad_norm": 0.6640625, + "learning_rate": 3.772387300188507e-06, + "loss": 1.0315, + "step": 64210 + }, + { + "epoch": 0.92, + "grad_norm": 0.6015625, + "learning_rate": 3.7655785984393455e-06, + "loss": 0.9926, + "step": 64215 + }, + { + "epoch": 0.92, + "grad_norm": 0.57421875, + "learning_rate": 3.7587759287732948e-06, + "loss": 1.0681, + "step": 64220 + }, + { + "epoch": 0.92, + "grad_norm": 0.47265625, + "learning_rate": 3.7519792916167808e-06, + "loss": 0.9187, + "step": 64225 + }, + { + "epoch": 0.92, + "grad_norm": 0.478515625, + "learning_rate": 3.7451886873958285e-06, + "loss": 0.9417, + "step": 64230 + }, + { + "epoch": 0.92, + "grad_norm": 0.53515625, + "learning_rate": 3.738404116536065e-06, + "loss": 0.92, + "step": 64235 + }, + { + "epoch": 0.92, + "grad_norm": 0.83203125, + "learning_rate": 3.731625579462761e-06, + "loss": 1.0117, + "step": 64240 + }, + { + "epoch": 0.92, + "grad_norm": 0.578125, + "learning_rate": 3.7248530766008315e-06, + "loss": 1.0202, + "step": 64245 + }, + { + "epoch": 0.92, + "grad_norm": 0.66796875, + "learning_rate": 3.7180866083747377e-06, + "loss": 0.9853, + "step": 64250 + }, + { + "epoch": 0.92, + "grad_norm": 0.5, + "learning_rate": 3.7113261752086294e-06, + "loss": 0.934, + "step": 64255 + }, + { + "epoch": 0.92, + "grad_norm": 0.55859375, + "learning_rate": 3.7045717775262777e-06, + "loss": 0.9742, + "step": 64260 + }, + { + "epoch": 0.92, + "grad_norm": 0.58984375, + "learning_rate": 3.697823415751023e-06, + "loss": 0.9932, + "step": 64265 + }, + { + "epoch": 0.92, + "grad_norm": 0.5078125, + "learning_rate": 3.6910810903058813e-06, + "loss": 0.9429, + "step": 64270 + }, + { + "epoch": 0.92, + "grad_norm": 0.53125, + "learning_rate": 3.6843448016134596e-06, + "loss": 0.9631, + "step": 64275 + }, + { + "epoch": 0.92, + "grad_norm": 0.640625, + "learning_rate": 3.6776145500960093e-06, + "loss": 0.9709, + "step": 64280 + }, + { + "epoch": 0.92, + "grad_norm": 0.50390625, + "learning_rate": 3.67089033617537e-06, + "loss": 0.8357, + "step": 64285 + }, + { + "epoch": 0.92, + "grad_norm": 0.57421875, + "learning_rate": 3.6641721602730273e-06, + "loss": 1.0281, + "step": 64290 + }, + { + "epoch": 0.92, + "grad_norm": 0.5703125, + "learning_rate": 3.657460022810111e-06, + "loss": 0.8819, + "step": 64295 + }, + { + "epoch": 0.92, + "grad_norm": 0.51171875, + "learning_rate": 3.6507539242073172e-06, + "loss": 0.9193, + "step": 64300 + }, + { + "epoch": 0.92, + "grad_norm": 0.55859375, + "learning_rate": 3.644053864884989e-06, + "loss": 0.9544, + "step": 64305 + }, + { + "epoch": 0.92, + "grad_norm": 0.5625, + "learning_rate": 3.637359845263122e-06, + "loss": 0.9234, + "step": 64310 + }, + { + "epoch": 0.92, + "grad_norm": 0.48828125, + "learning_rate": 3.6306718657612816e-06, + "loss": 0.9635, + "step": 64315 + }, + { + "epoch": 0.92, + "grad_norm": 0.55859375, + "learning_rate": 3.6239899267986877e-06, + "loss": 1.0724, + "step": 64320 + }, + { + "epoch": 0.92, + "grad_norm": 0.52734375, + "learning_rate": 3.617314028794161e-06, + "loss": 0.8347, + "step": 64325 + }, + { + "epoch": 0.92, + "grad_norm": 0.54296875, + "learning_rate": 3.6106441721661666e-06, + "loss": 0.904, + "step": 64330 + }, + { + "epoch": 0.92, + "grad_norm": 0.55859375, + "learning_rate": 3.6039803573327704e-06, + "loss": 0.9772, + "step": 64335 + }, + { + "epoch": 0.92, + "grad_norm": 0.6171875, + "learning_rate": 3.597322584711682e-06, + "loss": 1.1015, + "step": 64340 + }, + { + "epoch": 0.92, + "grad_norm": 0.578125, + "learning_rate": 3.5906708547201907e-06, + "loss": 0.8824, + "step": 64345 + }, + { + "epoch": 0.92, + "grad_norm": 0.58984375, + "learning_rate": 3.5840251677752514e-06, + "loss": 0.9461, + "step": 64350 + }, + { + "epoch": 0.92, + "grad_norm": 0.60546875, + "learning_rate": 3.5773855242934417e-06, + "loss": 0.9427, + "step": 64355 + }, + { + "epoch": 0.92, + "grad_norm": 0.546875, + "learning_rate": 3.5707519246909073e-06, + "loss": 0.8533, + "step": 64360 + }, + { + "epoch": 0.92, + "grad_norm": 0.66796875, + "learning_rate": 3.56412436938347e-06, + "loss": 1.1611, + "step": 64365 + }, + { + "epoch": 0.92, + "grad_norm": 0.55078125, + "learning_rate": 3.5575028587865546e-06, + "loss": 1.0923, + "step": 64370 + }, + { + "epoch": 0.92, + "grad_norm": 0.55078125, + "learning_rate": 3.550887393315205e-06, + "loss": 1.052, + "step": 64375 + }, + { + "epoch": 0.92, + "grad_norm": 0.52734375, + "learning_rate": 3.544277973384058e-06, + "loss": 0.8655, + "step": 64380 + }, + { + "epoch": 0.92, + "grad_norm": 0.52734375, + "learning_rate": 3.5376745994074257e-06, + "loss": 0.8928, + "step": 64385 + }, + { + "epoch": 0.92, + "grad_norm": 0.55859375, + "learning_rate": 3.5310772717992215e-06, + "loss": 0.907, + "step": 64390 + }, + { + "epoch": 0.92, + "grad_norm": 0.56640625, + "learning_rate": 3.524485990972959e-06, + "loss": 0.957, + "step": 64395 + }, + { + "epoch": 0.92, + "grad_norm": 0.62109375, + "learning_rate": 3.5179007573417853e-06, + "loss": 1.0026, + "step": 64400 + }, + { + "epoch": 0.92, + "grad_norm": 0.61328125, + "learning_rate": 3.5113215713184934e-06, + "loss": 1.0308, + "step": 64405 + }, + { + "epoch": 0.92, + "grad_norm": 0.6328125, + "learning_rate": 3.5047484333154303e-06, + "loss": 1.0068, + "step": 64410 + }, + { + "epoch": 0.92, + "grad_norm": 0.59375, + "learning_rate": 3.498181343744644e-06, + "loss": 0.8248, + "step": 64415 + }, + { + "epoch": 0.92, + "grad_norm": 0.515625, + "learning_rate": 3.491620303017773e-06, + "loss": 0.9557, + "step": 64420 + }, + { + "epoch": 0.92, + "grad_norm": 0.54296875, + "learning_rate": 3.485065311546054e-06, + "loss": 1.0306, + "step": 64425 + }, + { + "epoch": 0.92, + "grad_norm": 0.515625, + "learning_rate": 3.4785163697403477e-06, + "loss": 0.9902, + "step": 64430 + }, + { + "epoch": 0.92, + "grad_norm": 0.5234375, + "learning_rate": 3.471973478011159e-06, + "loss": 1.0089, + "step": 64435 + }, + { + "epoch": 0.92, + "grad_norm": 0.55078125, + "learning_rate": 3.4654366367686263e-06, + "loss": 0.9222, + "step": 64440 + }, + { + "epoch": 0.92, + "grad_norm": 0.5625, + "learning_rate": 3.4589058464224554e-06, + "loss": 0.8768, + "step": 64445 + }, + { + "epoch": 0.92, + "grad_norm": 0.515625, + "learning_rate": 3.452381107382019e-06, + "loss": 0.9449, + "step": 64450 + }, + { + "epoch": 0.92, + "grad_norm": 0.52734375, + "learning_rate": 3.4458624200563116e-06, + "loss": 0.9348, + "step": 64455 + }, + { + "epoch": 0.92, + "grad_norm": 0.59765625, + "learning_rate": 3.4393497848538956e-06, + "loss": 0.8981, + "step": 64460 + }, + { + "epoch": 0.92, + "grad_norm": 0.48046875, + "learning_rate": 3.4328432021830004e-06, + "loss": 0.8987, + "step": 64465 + }, + { + "epoch": 0.92, + "grad_norm": 0.53515625, + "learning_rate": 3.426342672451499e-06, + "loss": 1.2103, + "step": 64470 + }, + { + "epoch": 0.92, + "grad_norm": 0.5546875, + "learning_rate": 3.4198481960668103e-06, + "loss": 0.9515, + "step": 64475 + }, + { + "epoch": 0.92, + "grad_norm": 0.6484375, + "learning_rate": 3.4133597734360423e-06, + "loss": 1.0422, + "step": 64480 + }, + { + "epoch": 0.93, + "grad_norm": 0.546875, + "learning_rate": 3.4068774049658913e-06, + "loss": 0.9403, + "step": 64485 + }, + { + "epoch": 0.93, + "grad_norm": 0.51171875, + "learning_rate": 3.400401091062655e-06, + "loss": 0.9389, + "step": 64490 + }, + { + "epoch": 0.93, + "grad_norm": 0.5390625, + "learning_rate": 3.393930832132297e-06, + "loss": 0.9773, + "step": 64495 + }, + { + "epoch": 0.93, + "grad_norm": 0.54296875, + "learning_rate": 3.3874666285803936e-06, + "loss": 0.9369, + "step": 64500 + }, + { + "epoch": 0.93, + "grad_norm": 0.54296875, + "learning_rate": 3.381008480812109e-06, + "loss": 0.8346, + "step": 64505 + }, + { + "epoch": 0.93, + "grad_norm": 0.5625, + "learning_rate": 3.374556389232253e-06, + "loss": 0.8604, + "step": 64510 + }, + { + "epoch": 0.93, + "grad_norm": 0.51171875, + "learning_rate": 3.368110354245246e-06, + "loss": 0.9006, + "step": 64515 + }, + { + "epoch": 0.93, + "grad_norm": 0.76171875, + "learning_rate": 3.3616703762551437e-06, + "loss": 1.0335, + "step": 64520 + }, + { + "epoch": 0.93, + "grad_norm": 0.60546875, + "learning_rate": 3.355236455665589e-06, + "loss": 0.9049, + "step": 64525 + }, + { + "epoch": 0.93, + "grad_norm": 0.5546875, + "learning_rate": 3.348808592879904e-06, + "loss": 0.9769, + "step": 64530 + }, + { + "epoch": 0.93, + "grad_norm": 0.6171875, + "learning_rate": 3.3423867883009664e-06, + "loss": 0.979, + "step": 64535 + }, + { + "epoch": 0.93, + "grad_norm": 0.466796875, + "learning_rate": 3.3359710423312984e-06, + "loss": 0.9246, + "step": 64540 + }, + { + "epoch": 0.93, + "grad_norm": 0.546875, + "learning_rate": 3.3295613553730566e-06, + "loss": 1.0752, + "step": 64545 + }, + { + "epoch": 0.93, + "grad_norm": 0.6328125, + "learning_rate": 3.323157727828019e-06, + "loss": 1.1276, + "step": 64550 + }, + { + "epoch": 0.93, + "grad_norm": 0.66015625, + "learning_rate": 3.316760160097554e-06, + "loss": 1.0467, + "step": 64555 + }, + { + "epoch": 0.93, + "grad_norm": 0.54296875, + "learning_rate": 3.310368652582674e-06, + "loss": 0.9048, + "step": 64560 + }, + { + "epoch": 0.93, + "grad_norm": 0.5625, + "learning_rate": 3.303983205684014e-06, + "loss": 0.8651, + "step": 64565 + }, + { + "epoch": 0.93, + "grad_norm": 0.54296875, + "learning_rate": 3.297603819801809e-06, + "loss": 0.9898, + "step": 64570 + }, + { + "epoch": 0.93, + "grad_norm": 0.5078125, + "learning_rate": 3.2912304953359173e-06, + "loss": 0.8427, + "step": 64575 + }, + { + "epoch": 0.93, + "grad_norm": 0.578125, + "learning_rate": 3.2848632326858643e-06, + "loss": 1.0477, + "step": 64580 + }, + { + "epoch": 0.93, + "grad_norm": 0.578125, + "learning_rate": 3.27850203225073e-06, + "loss": 1.0262, + "step": 64585 + }, + { + "epoch": 0.93, + "grad_norm": 0.5234375, + "learning_rate": 3.27214689442924e-06, + "loss": 0.9684, + "step": 64590 + }, + { + "epoch": 0.93, + "grad_norm": 0.59375, + "learning_rate": 3.2657978196197426e-06, + "loss": 1.0751, + "step": 64595 + }, + { + "epoch": 0.93, + "grad_norm": 0.56640625, + "learning_rate": 3.259454808220208e-06, + "loss": 1.0147, + "step": 64600 + }, + { + "epoch": 0.93, + "grad_norm": 0.5390625, + "learning_rate": 3.2531178606282297e-06, + "loss": 1.0055, + "step": 64605 + }, + { + "epoch": 0.93, + "grad_norm": 0.53515625, + "learning_rate": 3.246786977241012e-06, + "loss": 1.0449, + "step": 64610 + }, + { + "epoch": 0.93, + "grad_norm": 0.58984375, + "learning_rate": 3.2404621584553707e-06, + "loss": 1.0597, + "step": 64615 + }, + { + "epoch": 0.93, + "grad_norm": 0.6015625, + "learning_rate": 3.2341434046677554e-06, + "loss": 0.9698, + "step": 64620 + }, + { + "epoch": 0.93, + "grad_norm": 0.55859375, + "learning_rate": 3.2278307162742607e-06, + "loss": 1.0122, + "step": 64625 + }, + { + "epoch": 0.93, + "grad_norm": 0.5234375, + "learning_rate": 3.221524093670536e-06, + "loss": 0.989, + "step": 64630 + }, + { + "epoch": 0.93, + "grad_norm": 0.5390625, + "learning_rate": 3.21522353725191e-06, + "loss": 0.9688, + "step": 64635 + }, + { + "epoch": 0.93, + "grad_norm": 0.52734375, + "learning_rate": 3.2089290474132893e-06, + "loss": 0.8625, + "step": 64640 + }, + { + "epoch": 0.93, + "grad_norm": 0.458984375, + "learning_rate": 3.2026406245492467e-06, + "loss": 0.8252, + "step": 64645 + }, + { + "epoch": 0.93, + "grad_norm": 0.5234375, + "learning_rate": 3.1963582690539117e-06, + "loss": 0.9317, + "step": 64650 + }, + { + "epoch": 0.93, + "grad_norm": 0.49609375, + "learning_rate": 3.1900819813211025e-06, + "loss": 0.9196, + "step": 64655 + }, + { + "epoch": 0.93, + "grad_norm": 0.609375, + "learning_rate": 3.1838117617442266e-06, + "loss": 0.9512, + "step": 64660 + }, + { + "epoch": 0.93, + "grad_norm": 0.5859375, + "learning_rate": 3.1775476107162695e-06, + "loss": 0.9112, + "step": 64665 + }, + { + "epoch": 0.93, + "grad_norm": 0.57421875, + "learning_rate": 3.171289528629906e-06, + "loss": 0.9008, + "step": 64670 + }, + { + "epoch": 0.93, + "grad_norm": 0.5234375, + "learning_rate": 3.1650375158774004e-06, + "loss": 0.9157, + "step": 64675 + }, + { + "epoch": 0.93, + "grad_norm": 0.5078125, + "learning_rate": 3.158791572850628e-06, + "loss": 0.8422, + "step": 64680 + }, + { + "epoch": 0.93, + "grad_norm": 0.5859375, + "learning_rate": 3.1525516999410866e-06, + "loss": 0.9628, + "step": 64685 + }, + { + "epoch": 0.93, + "grad_norm": 0.55859375, + "learning_rate": 3.14631789753993e-06, + "loss": 1.0034, + "step": 64690 + }, + { + "epoch": 0.93, + "grad_norm": 0.52734375, + "learning_rate": 3.1400901660378458e-06, + "loss": 1.0796, + "step": 64695 + }, + { + "epoch": 0.93, + "grad_norm": 0.5859375, + "learning_rate": 3.1338685058252216e-06, + "loss": 0.8844, + "step": 64700 + }, + { + "epoch": 0.93, + "grad_norm": 0.62890625, + "learning_rate": 3.1276529172920675e-06, + "loss": 0.9305, + "step": 64705 + }, + { + "epoch": 0.93, + "grad_norm": 0.57421875, + "learning_rate": 3.121443400827928e-06, + "loss": 1.0306, + "step": 64710 + }, + { + "epoch": 0.93, + "grad_norm": 0.59765625, + "learning_rate": 3.115239956822058e-06, + "loss": 0.8335, + "step": 64715 + }, + { + "epoch": 0.93, + "grad_norm": 0.5390625, + "learning_rate": 3.109042585663313e-06, + "loss": 0.9649, + "step": 64720 + }, + { + "epoch": 0.93, + "grad_norm": 0.60546875, + "learning_rate": 3.102851287740105e-06, + "loss": 0.9282, + "step": 64725 + }, + { + "epoch": 0.93, + "grad_norm": 0.439453125, + "learning_rate": 3.0966660634405344e-06, + "loss": 0.681, + "step": 64730 + }, + { + "epoch": 0.93, + "grad_norm": 0.60546875, + "learning_rate": 3.0904869131523128e-06, + "loss": 0.8902, + "step": 64735 + }, + { + "epoch": 0.93, + "grad_norm": 0.54296875, + "learning_rate": 3.0843138372627424e-06, + "loss": 1.1055, + "step": 64740 + }, + { + "epoch": 0.93, + "grad_norm": 0.50390625, + "learning_rate": 3.078146836158746e-06, + "loss": 0.9402, + "step": 64745 + }, + { + "epoch": 0.93, + "grad_norm": 0.5546875, + "learning_rate": 3.0719859102268933e-06, + "loss": 0.8482, + "step": 64750 + }, + { + "epoch": 0.93, + "grad_norm": 0.515625, + "learning_rate": 3.0658310598533633e-06, + "loss": 1.1807, + "step": 64755 + }, + { + "epoch": 0.93, + "grad_norm": 0.52734375, + "learning_rate": 3.059682285423926e-06, + "loss": 0.9468, + "step": 64760 + }, + { + "epoch": 0.93, + "grad_norm": 0.515625, + "learning_rate": 3.053539587324017e-06, + "loss": 0.9515, + "step": 64765 + }, + { + "epoch": 0.93, + "grad_norm": 0.62890625, + "learning_rate": 3.047402965938673e-06, + "loss": 0.9918, + "step": 64770 + }, + { + "epoch": 0.93, + "grad_norm": 0.58203125, + "learning_rate": 3.041272421652508e-06, + "loss": 1.0043, + "step": 64775 + }, + { + "epoch": 0.93, + "grad_norm": 0.5390625, + "learning_rate": 3.035147954849826e-06, + "loss": 0.98, + "step": 64780 + }, + { + "epoch": 0.93, + "grad_norm": 0.59375, + "learning_rate": 3.0290295659145094e-06, + "loss": 1.0729, + "step": 64785 + }, + { + "epoch": 0.93, + "grad_norm": 0.58984375, + "learning_rate": 3.02291725523004e-06, + "loss": 0.9641, + "step": 64790 + }, + { + "epoch": 0.93, + "grad_norm": 0.546875, + "learning_rate": 3.0168110231795887e-06, + "loss": 1.0132, + "step": 64795 + }, + { + "epoch": 0.93, + "grad_norm": 0.6171875, + "learning_rate": 3.010710870145872e-06, + "loss": 0.9996, + "step": 64800 + }, + { + "epoch": 0.93, + "grad_norm": 0.67578125, + "learning_rate": 3.0046167965112504e-06, + "loss": 1.2168, + "step": 64805 + }, + { + "epoch": 0.93, + "grad_norm": 0.5, + "learning_rate": 2.9985288026577184e-06, + "loss": 0.8434, + "step": 64810 + }, + { + "epoch": 0.93, + "grad_norm": 0.53125, + "learning_rate": 2.9924468889668824e-06, + "loss": 0.8563, + "step": 64815 + }, + { + "epoch": 0.93, + "grad_norm": 0.52734375, + "learning_rate": 2.9863710558199474e-06, + "loss": 1.0859, + "step": 64820 + }, + { + "epoch": 0.93, + "grad_norm": 0.58203125, + "learning_rate": 2.980301303597777e-06, + "loss": 1.0357, + "step": 64825 + }, + { + "epoch": 0.93, + "grad_norm": 0.62890625, + "learning_rate": 2.9742376326808095e-06, + "loss": 0.8976, + "step": 64830 + }, + { + "epoch": 0.93, + "grad_norm": 0.52734375, + "learning_rate": 2.9681800434491315e-06, + "loss": 1.0682, + "step": 64835 + }, + { + "epoch": 0.93, + "grad_norm": 0.51953125, + "learning_rate": 2.9621285362824268e-06, + "loss": 0.8746, + "step": 64840 + }, + { + "epoch": 0.93, + "grad_norm": 0.58203125, + "learning_rate": 2.9560831115600486e-06, + "loss": 0.9363, + "step": 64845 + }, + { + "epoch": 0.93, + "grad_norm": 0.65625, + "learning_rate": 2.9500437696608928e-06, + "loss": 0.9036, + "step": 64850 + }, + { + "epoch": 0.93, + "grad_norm": 0.61328125, + "learning_rate": 2.944010510963513e-06, + "loss": 0.9912, + "step": 64855 + }, + { + "epoch": 0.93, + "grad_norm": 0.5, + "learning_rate": 2.937983335846095e-06, + "loss": 0.797, + "step": 64860 + }, + { + "epoch": 0.93, + "grad_norm": 0.6171875, + "learning_rate": 2.931962244686437e-06, + "loss": 1.0168, + "step": 64865 + }, + { + "epoch": 0.93, + "grad_norm": 0.60546875, + "learning_rate": 2.925947237861926e-06, + "loss": 1.0282, + "step": 64870 + }, + { + "epoch": 0.93, + "grad_norm": 0.62109375, + "learning_rate": 2.919938315749604e-06, + "loss": 0.974, + "step": 64875 + }, + { + "epoch": 0.93, + "grad_norm": 0.578125, + "learning_rate": 2.9139354787261265e-06, + "loss": 1.0569, + "step": 64880 + }, + { + "epoch": 0.93, + "grad_norm": 0.625, + "learning_rate": 2.907938727167725e-06, + "loss": 1.0904, + "step": 64885 + }, + { + "epoch": 0.93, + "grad_norm": 0.48828125, + "learning_rate": 2.901948061450299e-06, + "loss": 1.0113, + "step": 64890 + }, + { + "epoch": 0.93, + "grad_norm": 0.52734375, + "learning_rate": 2.895963481949371e-06, + "loss": 0.9074, + "step": 64895 + }, + { + "epoch": 0.93, + "grad_norm": 0.546875, + "learning_rate": 2.889984989040051e-06, + "loss": 0.9813, + "step": 64900 + }, + { + "epoch": 0.93, + "grad_norm": 0.55859375, + "learning_rate": 2.884012583097051e-06, + "loss": 0.8772, + "step": 64905 + }, + { + "epoch": 0.93, + "grad_norm": 0.5859375, + "learning_rate": 2.8780462644947603e-06, + "loss": 0.923, + "step": 64910 + }, + { + "epoch": 0.93, + "grad_norm": 0.58203125, + "learning_rate": 2.8720860336071355e-06, + "loss": 1.0726, + "step": 64915 + }, + { + "epoch": 0.93, + "grad_norm": 0.64453125, + "learning_rate": 2.866131890807777e-06, + "loss": 0.9847, + "step": 64920 + }, + { + "epoch": 0.93, + "grad_norm": 0.88671875, + "learning_rate": 2.8601838364699097e-06, + "loss": 0.9785, + "step": 64925 + }, + { + "epoch": 0.93, + "grad_norm": 0.5703125, + "learning_rate": 2.8542418709663343e-06, + "loss": 0.9507, + "step": 64930 + }, + { + "epoch": 0.93, + "grad_norm": 0.73828125, + "learning_rate": 2.84830599466952e-06, + "loss": 0.8207, + "step": 64935 + }, + { + "epoch": 0.93, + "grad_norm": 0.5703125, + "learning_rate": 2.842376207951547e-06, + "loss": 0.9632, + "step": 64940 + }, + { + "epoch": 0.93, + "grad_norm": 0.5078125, + "learning_rate": 2.8364525111840735e-06, + "loss": 1.0299, + "step": 64945 + }, + { + "epoch": 0.93, + "grad_norm": 0.640625, + "learning_rate": 2.8305349047384355e-06, + "loss": 0.9159, + "step": 64950 + }, + { + "epoch": 0.93, + "grad_norm": 0.52734375, + "learning_rate": 2.8246233889855145e-06, + "loss": 0.9085, + "step": 64955 + }, + { + "epoch": 0.93, + "grad_norm": 0.55859375, + "learning_rate": 2.8187179642958916e-06, + "loss": 0.9428, + "step": 64960 + }, + { + "epoch": 0.93, + "grad_norm": 0.59375, + "learning_rate": 2.812818631039682e-06, + "loss": 0.9958, + "step": 64965 + }, + { + "epoch": 0.93, + "grad_norm": 0.5, + "learning_rate": 2.806925389586701e-06, + "loss": 0.8781, + "step": 64970 + }, + { + "epoch": 0.93, + "grad_norm": 0.59765625, + "learning_rate": 2.801038240306342e-06, + "loss": 0.8886, + "step": 64975 + }, + { + "epoch": 0.93, + "grad_norm": 0.5078125, + "learning_rate": 2.795157183567587e-06, + "loss": 0.9409, + "step": 64980 + }, + { + "epoch": 0.93, + "grad_norm": 0.625, + "learning_rate": 2.7892822197390978e-06, + "loss": 1.0129, + "step": 64985 + }, + { + "epoch": 0.93, + "grad_norm": 0.55078125, + "learning_rate": 2.7834133491891123e-06, + "loss": 0.8587, + "step": 64990 + }, + { + "epoch": 0.93, + "grad_norm": 0.53515625, + "learning_rate": 2.777550572285492e-06, + "loss": 0.9335, + "step": 64995 + }, + { + "epoch": 0.93, + "grad_norm": 0.6171875, + "learning_rate": 2.7716938893957323e-06, + "loss": 1.0092, + "step": 65000 + }, + { + "epoch": 0.93, + "grad_norm": 0.59375, + "learning_rate": 2.765843300886939e-06, + "loss": 0.9026, + "step": 65005 + }, + { + "epoch": 0.93, + "grad_norm": 0.609375, + "learning_rate": 2.75999880712583e-06, + "loss": 0.9924, + "step": 65010 + }, + { + "epoch": 0.93, + "grad_norm": 0.52734375, + "learning_rate": 2.7541604084787343e-06, + "loss": 0.8775, + "step": 65015 + }, + { + "epoch": 0.93, + "grad_norm": 0.609375, + "learning_rate": 2.748328105311626e-06, + "loss": 0.7592, + "step": 65020 + }, + { + "epoch": 0.93, + "grad_norm": 0.6796875, + "learning_rate": 2.7425018979900574e-06, + "loss": 1.1202, + "step": 65025 + }, + { + "epoch": 0.93, + "grad_norm": 0.65234375, + "learning_rate": 2.736681786879247e-06, + "loss": 1.1027, + "step": 65030 + }, + { + "epoch": 0.93, + "grad_norm": 0.6328125, + "learning_rate": 2.7308677723440033e-06, + "loss": 1.0419, + "step": 65035 + }, + { + "epoch": 0.93, + "grad_norm": 0.61328125, + "learning_rate": 2.725059854748735e-06, + "loss": 1.0767, + "step": 65040 + }, + { + "epoch": 0.93, + "grad_norm": 0.578125, + "learning_rate": 2.7192580344574948e-06, + "loss": 0.9814, + "step": 65045 + }, + { + "epoch": 0.93, + "grad_norm": 0.61328125, + "learning_rate": 2.7134623118339807e-06, + "loss": 1.1068, + "step": 65050 + }, + { + "epoch": 0.93, + "grad_norm": 0.53515625, + "learning_rate": 2.7076726872414358e-06, + "loss": 0.924, + "step": 65055 + }, + { + "epoch": 0.93, + "grad_norm": 0.5390625, + "learning_rate": 2.70188916104277e-06, + "loss": 0.9105, + "step": 65060 + }, + { + "epoch": 0.93, + "grad_norm": 0.59375, + "learning_rate": 2.6961117336005036e-06, + "loss": 0.968, + "step": 65065 + }, + { + "epoch": 0.93, + "grad_norm": 0.6171875, + "learning_rate": 2.690340405276781e-06, + "loss": 0.9132, + "step": 65070 + }, + { + "epoch": 0.93, + "grad_norm": 0.53515625, + "learning_rate": 2.6845751764333347e-06, + "loss": 0.9445, + "step": 65075 + }, + { + "epoch": 0.93, + "grad_norm": 0.494140625, + "learning_rate": 2.678816047431554e-06, + "loss": 0.9016, + "step": 65080 + }, + { + "epoch": 0.93, + "grad_norm": 0.6015625, + "learning_rate": 2.6730630186324378e-06, + "loss": 1.0218, + "step": 65085 + }, + { + "epoch": 0.93, + "grad_norm": 0.51171875, + "learning_rate": 2.6673160903965656e-06, + "loss": 0.9595, + "step": 65090 + }, + { + "epoch": 0.93, + "grad_norm": 0.578125, + "learning_rate": 2.6615752630841707e-06, + "loss": 1.0249, + "step": 65095 + }, + { + "epoch": 0.93, + "grad_norm": 0.486328125, + "learning_rate": 2.6558405370551097e-06, + "loss": 0.867, + "step": 65100 + }, + { + "epoch": 0.93, + "grad_norm": 0.5703125, + "learning_rate": 2.650111912668818e-06, + "loss": 1.1201, + "step": 65105 + }, + { + "epoch": 0.93, + "grad_norm": 0.6328125, + "learning_rate": 2.6443893902843853e-06, + "loss": 0.765, + "step": 65110 + }, + { + "epoch": 0.93, + "grad_norm": 0.56640625, + "learning_rate": 2.6386729702605027e-06, + "loss": 0.8427, + "step": 65115 + }, + { + "epoch": 0.93, + "grad_norm": 0.5859375, + "learning_rate": 2.632962652955495e-06, + "loss": 0.9295, + "step": 65120 + }, + { + "epoch": 0.93, + "grad_norm": 0.51953125, + "learning_rate": 2.6272584387272757e-06, + "loss": 0.8598, + "step": 65125 + }, + { + "epoch": 0.93, + "grad_norm": 0.57421875, + "learning_rate": 2.621560327933381e-06, + "loss": 0.9254, + "step": 65130 + }, + { + "epoch": 0.93, + "grad_norm": 0.640625, + "learning_rate": 2.6158683209310142e-06, + "loss": 0.9867, + "step": 65135 + }, + { + "epoch": 0.93, + "grad_norm": 0.77734375, + "learning_rate": 2.610182418076912e-06, + "loss": 1.1187, + "step": 65140 + }, + { + "epoch": 0.93, + "grad_norm": 0.5859375, + "learning_rate": 2.6045026197275e-06, + "loss": 0.8453, + "step": 65145 + }, + { + "epoch": 0.93, + "grad_norm": 0.625, + "learning_rate": 2.5988289262387944e-06, + "loss": 0.8491, + "step": 65150 + }, + { + "epoch": 0.93, + "grad_norm": 0.6640625, + "learning_rate": 2.59316133796641e-06, + "loss": 1.0447, + "step": 65155 + }, + { + "epoch": 0.93, + "grad_norm": 0.56640625, + "learning_rate": 2.5874998552656293e-06, + "loss": 0.9154, + "step": 65160 + }, + { + "epoch": 0.93, + "grad_norm": 0.61328125, + "learning_rate": 2.581844478491291e-06, + "loss": 1.0599, + "step": 65165 + }, + { + "epoch": 0.93, + "grad_norm": 0.5859375, + "learning_rate": 2.576195207997889e-06, + "loss": 1.068, + "step": 65170 + }, + { + "epoch": 0.93, + "grad_norm": 0.578125, + "learning_rate": 2.5705520441395183e-06, + "loss": 0.9493, + "step": 65175 + }, + { + "epoch": 0.93, + "grad_norm": 0.494140625, + "learning_rate": 2.5649149872699287e-06, + "loss": 1.04, + "step": 65180 + }, + { + "epoch": 0.94, + "grad_norm": 0.78515625, + "learning_rate": 2.5592840377424153e-06, + "loss": 1.0012, + "step": 65185 + }, + { + "epoch": 0.94, + "grad_norm": 0.64453125, + "learning_rate": 2.5536591959099634e-06, + "loss": 0.9686, + "step": 65190 + }, + { + "epoch": 0.94, + "grad_norm": 0.5703125, + "learning_rate": 2.5480404621251453e-06, + "loss": 0.8808, + "step": 65195 + }, + { + "epoch": 0.94, + "grad_norm": 0.55078125, + "learning_rate": 2.5424278367401244e-06, + "loss": 0.8381, + "step": 65200 + }, + { + "epoch": 0.94, + "grad_norm": 0.546875, + "learning_rate": 2.5368213201067192e-06, + "loss": 0.9821, + "step": 65205 + }, + { + "epoch": 0.94, + "grad_norm": 0.5546875, + "learning_rate": 2.531220912576371e-06, + "loss": 0.9479, + "step": 65210 + }, + { + "epoch": 0.94, + "grad_norm": 0.515625, + "learning_rate": 2.5256266145000986e-06, + "loss": 0.9484, + "step": 65215 + }, + { + "epoch": 0.94, + "grad_norm": 0.578125, + "learning_rate": 2.5200384262285658e-06, + "loss": 0.9905, + "step": 65220 + }, + { + "epoch": 0.94, + "grad_norm": 0.55078125, + "learning_rate": 2.5144563481120263e-06, + "loss": 0.9472, + "step": 65225 + }, + { + "epoch": 0.94, + "grad_norm": 0.50390625, + "learning_rate": 2.508880380500411e-06, + "loss": 1.0241, + "step": 65230 + }, + { + "epoch": 0.94, + "grad_norm": 0.66015625, + "learning_rate": 2.5033105237431964e-06, + "loss": 0.9847, + "step": 65235 + }, + { + "epoch": 0.94, + "grad_norm": 0.63671875, + "learning_rate": 2.4977467781895138e-06, + "loss": 1.1087, + "step": 65240 + }, + { + "epoch": 0.94, + "grad_norm": 0.59375, + "learning_rate": 2.4921891441881286e-06, + "loss": 0.9616, + "step": 65245 + }, + { + "epoch": 0.94, + "grad_norm": 0.6796875, + "learning_rate": 2.486637622087362e-06, + "loss": 1.0369, + "step": 65250 + }, + { + "epoch": 0.94, + "grad_norm": 0.5390625, + "learning_rate": 2.4810922122352032e-06, + "loss": 0.9011, + "step": 65255 + }, + { + "epoch": 0.94, + "grad_norm": 0.50390625, + "learning_rate": 2.475552914979273e-06, + "loss": 1.0724, + "step": 65260 + }, + { + "epoch": 0.94, + "grad_norm": 0.51953125, + "learning_rate": 2.4700197306667393e-06, + "loss": 1.0771, + "step": 65265 + }, + { + "epoch": 0.94, + "grad_norm": 0.59765625, + "learning_rate": 2.4644926596444464e-06, + "loss": 0.9856, + "step": 65270 + }, + { + "epoch": 0.94, + "grad_norm": 0.625, + "learning_rate": 2.45897170225885e-06, + "loss": 1.0245, + "step": 65275 + }, + { + "epoch": 0.94, + "grad_norm": 0.578125, + "learning_rate": 2.4534568588559847e-06, + "loss": 0.9473, + "step": 65280 + }, + { + "epoch": 0.94, + "grad_norm": 0.55859375, + "learning_rate": 2.447948129781541e-06, + "loss": 1.0451, + "step": 65285 + }, + { + "epoch": 0.94, + "grad_norm": 0.5, + "learning_rate": 2.4424455153808202e-06, + "loss": 0.8536, + "step": 65290 + }, + { + "epoch": 0.94, + "grad_norm": 0.56640625, + "learning_rate": 2.4369490159987018e-06, + "loss": 0.8815, + "step": 65295 + }, + { + "epoch": 0.94, + "grad_norm": 0.63671875, + "learning_rate": 2.431458631979744e-06, + "loss": 1.0601, + "step": 65300 + }, + { + "epoch": 0.94, + "grad_norm": 0.53125, + "learning_rate": 2.425974363668093e-06, + "loss": 0.8539, + "step": 65305 + }, + { + "epoch": 0.94, + "grad_norm": 0.546875, + "learning_rate": 2.420496211407486e-06, + "loss": 0.9943, + "step": 65310 + }, + { + "epoch": 0.94, + "grad_norm": 0.51953125, + "learning_rate": 2.4150241755413026e-06, + "loss": 0.9937, + "step": 65315 + }, + { + "epoch": 0.94, + "grad_norm": 0.53125, + "learning_rate": 2.4095582564125584e-06, + "loss": 0.9439, + "step": 65320 + }, + { + "epoch": 0.94, + "grad_norm": 0.5078125, + "learning_rate": 2.4040984543638567e-06, + "loss": 0.8602, + "step": 65325 + }, + { + "epoch": 0.94, + "grad_norm": 0.56640625, + "learning_rate": 2.3986447697374013e-06, + "loss": 0.8506, + "step": 65330 + }, + { + "epoch": 0.94, + "grad_norm": 0.52734375, + "learning_rate": 2.3931972028750414e-06, + "loss": 0.8138, + "step": 65335 + }, + { + "epoch": 0.94, + "grad_norm": 0.51171875, + "learning_rate": 2.3877557541182704e-06, + "loss": 0.8287, + "step": 65340 + }, + { + "epoch": 0.94, + "grad_norm": 0.58203125, + "learning_rate": 2.3823204238081153e-06, + "loss": 0.9755, + "step": 65345 + }, + { + "epoch": 0.94, + "grad_norm": 0.53125, + "learning_rate": 2.376891212285304e-06, + "loss": 0.9008, + "step": 65350 + }, + { + "epoch": 0.94, + "grad_norm": 0.5625, + "learning_rate": 2.371468119890141e-06, + "loss": 1.1037, + "step": 65355 + }, + { + "epoch": 0.94, + "grad_norm": 0.5546875, + "learning_rate": 2.3660511469625336e-06, + "loss": 0.9623, + "step": 65360 + }, + { + "epoch": 0.94, + "grad_norm": 0.55078125, + "learning_rate": 2.3606402938420425e-06, + "loss": 0.8876, + "step": 65365 + }, + { + "epoch": 0.94, + "grad_norm": 0.60546875, + "learning_rate": 2.35523556086783e-06, + "loss": 1.1445, + "step": 65370 + }, + { + "epoch": 0.94, + "grad_norm": 0.4921875, + "learning_rate": 2.3498369483786476e-06, + "loss": 0.9246, + "step": 65375 + }, + { + "epoch": 0.94, + "grad_norm": 0.6484375, + "learning_rate": 2.3444444567128907e-06, + "loss": 1.0409, + "step": 65380 + }, + { + "epoch": 0.94, + "grad_norm": 0.5234375, + "learning_rate": 2.339058086208601e-06, + "loss": 1.1015, + "step": 65385 + }, + { + "epoch": 0.94, + "grad_norm": 0.52734375, + "learning_rate": 2.333677837203352e-06, + "loss": 0.8732, + "step": 65390 + }, + { + "epoch": 0.94, + "grad_norm": 0.61328125, + "learning_rate": 2.3283037100344187e-06, + "loss": 0.7876, + "step": 65395 + }, + { + "epoch": 0.94, + "grad_norm": 0.53125, + "learning_rate": 2.3229357050386536e-06, + "loss": 0.8576, + "step": 65400 + }, + { + "epoch": 0.94, + "grad_norm": 0.57421875, + "learning_rate": 2.31757382255251e-06, + "loss": 0.9901, + "step": 65405 + }, + { + "epoch": 0.94, + "grad_norm": 0.51953125, + "learning_rate": 2.312218062912097e-06, + "loss": 0.8474, + "step": 65410 + }, + { + "epoch": 0.94, + "grad_norm": 0.6171875, + "learning_rate": 2.3068684264531236e-06, + "loss": 0.8593, + "step": 65415 + }, + { + "epoch": 0.94, + "grad_norm": 0.51171875, + "learning_rate": 2.3015249135108883e-06, + "loss": 0.9145, + "step": 65420 + }, + { + "epoch": 0.94, + "grad_norm": 0.58984375, + "learning_rate": 2.296187524420346e-06, + "loss": 1.052, + "step": 65425 + }, + { + "epoch": 0.94, + "grad_norm": 0.59375, + "learning_rate": 2.290856259516039e-06, + "loss": 0.948, + "step": 65430 + }, + { + "epoch": 0.94, + "grad_norm": 0.625, + "learning_rate": 2.285531119132145e-06, + "loss": 1.0625, + "step": 65435 + }, + { + "epoch": 0.94, + "grad_norm": 0.51953125, + "learning_rate": 2.2802121036024528e-06, + "loss": 0.9666, + "step": 65440 + }, + { + "epoch": 0.94, + "grad_norm": 0.6328125, + "learning_rate": 2.274899213260351e-06, + "loss": 1.0447, + "step": 65445 + }, + { + "epoch": 0.94, + "grad_norm": 0.54296875, + "learning_rate": 2.269592448438873e-06, + "loss": 1.0886, + "step": 65450 + }, + { + "epoch": 0.94, + "grad_norm": 0.51953125, + "learning_rate": 2.2642918094706423e-06, + "loss": 0.8001, + "step": 65455 + }, + { + "epoch": 0.94, + "grad_norm": 0.53125, + "learning_rate": 2.2589972966879037e-06, + "loss": 0.9675, + "step": 65460 + }, + { + "epoch": 0.94, + "grad_norm": 0.5078125, + "learning_rate": 2.253708910422547e-06, + "loss": 0.8762, + "step": 65465 + }, + { + "epoch": 0.94, + "grad_norm": 0.5703125, + "learning_rate": 2.2484266510060416e-06, + "loss": 0.9948, + "step": 65470 + }, + { + "epoch": 0.94, + "grad_norm": 0.5390625, + "learning_rate": 2.2431505187694767e-06, + "loss": 0.8915, + "step": 65475 + }, + { + "epoch": 0.94, + "grad_norm": 0.55859375, + "learning_rate": 2.2378805140435778e-06, + "loss": 0.9509, + "step": 65480 + }, + { + "epoch": 0.94, + "grad_norm": 0.546875, + "learning_rate": 2.232616637158669e-06, + "loss": 0.9404, + "step": 65485 + }, + { + "epoch": 0.94, + "grad_norm": 0.5703125, + "learning_rate": 2.227358888444686e-06, + "loss": 0.9962, + "step": 65490 + }, + { + "epoch": 0.94, + "grad_norm": 0.47265625, + "learning_rate": 2.2221072682312103e-06, + "loss": 0.9881, + "step": 65495 + }, + { + "epoch": 0.94, + "grad_norm": 0.5625, + "learning_rate": 2.216861776847412e-06, + "loss": 1.0974, + "step": 65500 + }, + { + "epoch": 0.94, + "grad_norm": 0.494140625, + "learning_rate": 2.211622414622072e-06, + "loss": 1.0143, + "step": 65505 + }, + { + "epoch": 0.94, + "grad_norm": 0.5703125, + "learning_rate": 2.206389181883628e-06, + "loss": 0.9016, + "step": 65510 + }, + { + "epoch": 0.94, + "grad_norm": 0.515625, + "learning_rate": 2.201162078960073e-06, + "loss": 1.0309, + "step": 65515 + }, + { + "epoch": 0.94, + "grad_norm": 0.59375, + "learning_rate": 2.195941106179067e-06, + "loss": 0.9086, + "step": 65520 + }, + { + "epoch": 0.94, + "grad_norm": 0.46484375, + "learning_rate": 2.1907262638678705e-06, + "loss": 0.9846, + "step": 65525 + }, + { + "epoch": 0.94, + "grad_norm": 0.63671875, + "learning_rate": 2.1855175523533443e-06, + "loss": 0.8841, + "step": 65530 + }, + { + "epoch": 0.94, + "grad_norm": 0.52734375, + "learning_rate": 2.180314971961972e-06, + "loss": 0.8873, + "step": 65535 + }, + { + "epoch": 0.94, + "grad_norm": 0.53125, + "learning_rate": 2.175118523019859e-06, + "loss": 0.9228, + "step": 65540 + }, + { + "epoch": 0.94, + "grad_norm": 0.52734375, + "learning_rate": 2.169928205852745e-06, + "loss": 0.8389, + "step": 65545 + }, + { + "epoch": 0.94, + "grad_norm": 0.59375, + "learning_rate": 2.164744020785936e-06, + "loss": 0.9474, + "step": 65550 + }, + { + "epoch": 0.94, + "grad_norm": 0.50390625, + "learning_rate": 2.159565968144406e-06, + "loss": 0.7416, + "step": 65555 + }, + { + "epoch": 0.94, + "grad_norm": 0.54296875, + "learning_rate": 2.154394048252717e-06, + "loss": 0.8988, + "step": 65560 + }, + { + "epoch": 0.94, + "grad_norm": 0.56640625, + "learning_rate": 2.1492282614350323e-06, + "loss": 0.9856, + "step": 65565 + }, + { + "epoch": 0.94, + "grad_norm": 0.5390625, + "learning_rate": 2.1440686080151708e-06, + "loss": 0.9949, + "step": 65570 + }, + { + "epoch": 0.94, + "grad_norm": 0.58203125, + "learning_rate": 2.1389150883165397e-06, + "loss": 0.9206, + "step": 65575 + }, + { + "epoch": 0.94, + "grad_norm": 0.56640625, + "learning_rate": 2.1337677026621707e-06, + "loss": 0.8943, + "step": 65580 + }, + { + "epoch": 0.94, + "grad_norm": 0.56640625, + "learning_rate": 2.1286264513746822e-06, + "loss": 0.9125, + "step": 65585 + }, + { + "epoch": 0.94, + "grad_norm": 0.486328125, + "learning_rate": 2.123491334776373e-06, + "loss": 0.9924, + "step": 65590 + }, + { + "epoch": 0.94, + "grad_norm": 0.4921875, + "learning_rate": 2.1183623531890963e-06, + "loss": 0.9117, + "step": 65595 + }, + { + "epoch": 0.94, + "grad_norm": 0.625, + "learning_rate": 2.113239506934328e-06, + "loss": 0.9869, + "step": 65600 + }, + { + "epoch": 0.94, + "grad_norm": 0.58984375, + "learning_rate": 2.1081227963332116e-06, + "loss": 0.9346, + "step": 65605 + }, + { + "epoch": 0.94, + "grad_norm": 0.5859375, + "learning_rate": 2.103012221706435e-06, + "loss": 0.9464, + "step": 65610 + }, + { + "epoch": 0.94, + "grad_norm": 0.57421875, + "learning_rate": 2.0979077833743532e-06, + "loss": 0.9183, + "step": 65615 + }, + { + "epoch": 0.94, + "grad_norm": 0.53125, + "learning_rate": 2.09280948165691e-06, + "loss": 0.9272, + "step": 65620 + }, + { + "epoch": 0.94, + "grad_norm": 0.5625, + "learning_rate": 2.087717316873683e-06, + "loss": 0.9441, + "step": 65625 + }, + { + "epoch": 0.94, + "grad_norm": 0.546875, + "learning_rate": 2.082631289343828e-06, + "loss": 0.8887, + "step": 65630 + }, + { + "epoch": 0.94, + "grad_norm": 0.57421875, + "learning_rate": 2.0775513993861905e-06, + "loss": 0.941, + "step": 65635 + }, + { + "epoch": 0.94, + "grad_norm": 0.55859375, + "learning_rate": 2.0724776473191374e-06, + "loss": 0.9125, + "step": 65640 + }, + { + "epoch": 0.94, + "grad_norm": 0.55859375, + "learning_rate": 2.067410033460726e-06, + "loss": 0.9141, + "step": 65645 + }, + { + "epoch": 0.94, + "grad_norm": 0.546875, + "learning_rate": 2.0623485581285795e-06, + "loss": 1.0082, + "step": 65650 + }, + { + "epoch": 0.94, + "grad_norm": 0.54296875, + "learning_rate": 2.0572932216399665e-06, + "loss": 1.0309, + "step": 65655 + }, + { + "epoch": 0.94, + "grad_norm": 0.5546875, + "learning_rate": 2.052244024311767e-06, + "loss": 0.931, + "step": 65660 + }, + { + "epoch": 0.94, + "grad_norm": 0.61328125, + "learning_rate": 2.0472009664604606e-06, + "loss": 1.0014, + "step": 65665 + }, + { + "epoch": 0.94, + "grad_norm": 0.6015625, + "learning_rate": 2.042164048402162e-06, + "loss": 0.996, + "step": 65670 + }, + { + "epoch": 0.94, + "grad_norm": 0.55859375, + "learning_rate": 2.0371332704525847e-06, + "loss": 0.9044, + "step": 65675 + }, + { + "epoch": 0.94, + "grad_norm": 0.5390625, + "learning_rate": 2.0321086329270547e-06, + "loss": 0.854, + "step": 65680 + }, + { + "epoch": 0.94, + "grad_norm": 0.5546875, + "learning_rate": 2.0270901361405412e-06, + "loss": 1.0305, + "step": 65685 + }, + { + "epoch": 0.94, + "grad_norm": 0.58203125, + "learning_rate": 2.0220777804076053e-06, + "loss": 0.9537, + "step": 65690 + }, + { + "epoch": 0.94, + "grad_norm": 0.5390625, + "learning_rate": 2.0170715660424166e-06, + "loss": 0.7839, + "step": 65695 + }, + { + "epoch": 0.94, + "grad_norm": 0.51953125, + "learning_rate": 2.0120714933587803e-06, + "loss": 1.1057, + "step": 65700 + }, + { + "epoch": 0.94, + "grad_norm": 0.65234375, + "learning_rate": 2.00707756267009e-06, + "loss": 0.9495, + "step": 65705 + }, + { + "epoch": 0.94, + "grad_norm": 0.58984375, + "learning_rate": 2.002089774289384e-06, + "loss": 0.9803, + "step": 65710 + }, + { + "epoch": 0.94, + "grad_norm": 0.6640625, + "learning_rate": 1.9971081285293013e-06, + "loss": 0.9487, + "step": 65715 + }, + { + "epoch": 0.94, + "grad_norm": 0.546875, + "learning_rate": 1.9921326257021035e-06, + "loss": 1.0803, + "step": 65720 + }, + { + "epoch": 0.94, + "grad_norm": 0.64453125, + "learning_rate": 1.9871632661196406e-06, + "loss": 0.9914, + "step": 65725 + }, + { + "epoch": 0.94, + "grad_norm": 0.49609375, + "learning_rate": 1.98220005009343e-06, + "loss": 0.8314, + "step": 65730 + }, + { + "epoch": 0.94, + "grad_norm": 0.62890625, + "learning_rate": 1.9772429779345457e-06, + "loss": 0.8738, + "step": 65735 + }, + { + "epoch": 0.94, + "grad_norm": 0.59375, + "learning_rate": 1.9722920499537168e-06, + "loss": 0.9574, + "step": 65740 + }, + { + "epoch": 0.94, + "grad_norm": 0.578125, + "learning_rate": 1.96734726646125e-06, + "loss": 1.0062, + "step": 65745 + }, + { + "epoch": 0.94, + "grad_norm": 0.5234375, + "learning_rate": 1.9624086277671206e-06, + "loss": 0.8644, + "step": 65750 + }, + { + "epoch": 0.94, + "grad_norm": 0.5703125, + "learning_rate": 1.957476134180869e-06, + "loss": 1.0087, + "step": 65755 + }, + { + "epoch": 0.94, + "grad_norm": 0.470703125, + "learning_rate": 1.9525497860116703e-06, + "loss": 0.7952, + "step": 65760 + }, + { + "epoch": 0.94, + "grad_norm": 0.4765625, + "learning_rate": 1.9476295835683223e-06, + "loss": 0.9089, + "step": 65765 + }, + { + "epoch": 0.94, + "grad_norm": 0.50390625, + "learning_rate": 1.9427155271592224e-06, + "loss": 0.8125, + "step": 65770 + }, + { + "epoch": 0.94, + "grad_norm": 0.63671875, + "learning_rate": 1.937807617092391e-06, + "loss": 1.1625, + "step": 65775 + }, + { + "epoch": 0.94, + "grad_norm": 0.71484375, + "learning_rate": 1.9329058536754598e-06, + "loss": 0.9517, + "step": 65780 + }, + { + "epoch": 0.94, + "grad_norm": 0.625, + "learning_rate": 1.9280102372156827e-06, + "loss": 0.8917, + "step": 65785 + }, + { + "epoch": 0.94, + "grad_norm": 0.54296875, + "learning_rate": 1.923120768019926e-06, + "loss": 0.9509, + "step": 65790 + }, + { + "epoch": 0.94, + "grad_norm": 0.57421875, + "learning_rate": 1.918237446394644e-06, + "loss": 1.0978, + "step": 65795 + }, + { + "epoch": 0.94, + "grad_norm": 0.498046875, + "learning_rate": 1.913360272645959e-06, + "loss": 0.8706, + "step": 65800 + }, + { + "epoch": 0.94, + "grad_norm": 0.55078125, + "learning_rate": 1.908489247079548e-06, + "loss": 0.9537, + "step": 65805 + }, + { + "epoch": 0.94, + "grad_norm": 0.5625, + "learning_rate": 1.9036243700007562e-06, + "loss": 0.8734, + "step": 65810 + }, + { + "epoch": 0.94, + "grad_norm": 0.53125, + "learning_rate": 1.8987656417145173e-06, + "loss": 1.0161, + "step": 65815 + }, + { + "epoch": 0.94, + "grad_norm": 0.53515625, + "learning_rate": 1.8939130625253653e-06, + "loss": 1.0725, + "step": 65820 + }, + { + "epoch": 0.94, + "grad_norm": 0.53125, + "learning_rate": 1.8890666327374795e-06, + "loss": 0.8345, + "step": 65825 + }, + { + "epoch": 0.94, + "grad_norm": 0.486328125, + "learning_rate": 1.884226352654639e-06, + "loss": 0.912, + "step": 65830 + }, + { + "epoch": 0.94, + "grad_norm": 0.58203125, + "learning_rate": 1.879392222580234e-06, + "loss": 1.0161, + "step": 65835 + }, + { + "epoch": 0.94, + "grad_norm": 0.58203125, + "learning_rate": 1.8745642428172783e-06, + "loss": 0.7901, + "step": 65840 + }, + { + "epoch": 0.94, + "grad_norm": 0.58203125, + "learning_rate": 1.8697424136683961e-06, + "loss": 1.0204, + "step": 65845 + }, + { + "epoch": 0.94, + "grad_norm": 0.5703125, + "learning_rate": 1.8649267354358012e-06, + "loss": 0.9864, + "step": 65850 + }, + { + "epoch": 0.94, + "grad_norm": 0.57421875, + "learning_rate": 1.8601172084213747e-06, + "loss": 0.8536, + "step": 65855 + }, + { + "epoch": 0.94, + "grad_norm": 0.53515625, + "learning_rate": 1.855313832926575e-06, + "loss": 0.9017, + "step": 65860 + }, + { + "epoch": 0.94, + "grad_norm": 0.58203125, + "learning_rate": 1.850516609252484e-06, + "loss": 1.0248, + "step": 65865 + }, + { + "epoch": 0.94, + "grad_norm": 0.4921875, + "learning_rate": 1.8457255376997828e-06, + "loss": 0.915, + "step": 65870 + }, + { + "epoch": 0.94, + "grad_norm": 0.546875, + "learning_rate": 1.8409406185688093e-06, + "loss": 0.8835, + "step": 65875 + }, + { + "epoch": 0.95, + "grad_norm": 0.60546875, + "learning_rate": 1.8361618521594571e-06, + "loss": 1.0339, + "step": 65880 + }, + { + "epoch": 0.95, + "grad_norm": 0.63671875, + "learning_rate": 1.831389238771286e-06, + "loss": 1.0657, + "step": 65885 + }, + { + "epoch": 0.95, + "grad_norm": 0.59375, + "learning_rate": 1.8266227787034463e-06, + "loss": 0.9807, + "step": 65890 + }, + { + "epoch": 0.95, + "grad_norm": 0.55078125, + "learning_rate": 1.8218624722547095e-06, + "loss": 0.9648, + "step": 65895 + }, + { + "epoch": 0.95, + "grad_norm": 0.54296875, + "learning_rate": 1.8171083197234262e-06, + "loss": 0.9703, + "step": 65900 + }, + { + "epoch": 0.95, + "grad_norm": 0.54296875, + "learning_rate": 1.8123603214076246e-06, + "loss": 0.8993, + "step": 65905 + }, + { + "epoch": 0.95, + "grad_norm": 0.58984375, + "learning_rate": 1.807618477604911e-06, + "loss": 1.0617, + "step": 65910 + }, + { + "epoch": 0.95, + "grad_norm": 0.55078125, + "learning_rate": 1.8028827886124922e-06, + "loss": 0.9137, + "step": 65915 + }, + { + "epoch": 0.95, + "grad_norm": 0.5859375, + "learning_rate": 1.7981532547272195e-06, + "loss": 0.8972, + "step": 65920 + }, + { + "epoch": 0.95, + "grad_norm": 0.51171875, + "learning_rate": 1.793429876245545e-06, + "loss": 0.8466, + "step": 65925 + }, + { + "epoch": 0.95, + "grad_norm": 0.54296875, + "learning_rate": 1.7887126534635311e-06, + "loss": 1.1436, + "step": 65930 + }, + { + "epoch": 0.95, + "grad_norm": 0.5390625, + "learning_rate": 1.7840015866768645e-06, + "loss": 0.8009, + "step": 65935 + }, + { + "epoch": 0.95, + "grad_norm": 0.54296875, + "learning_rate": 1.7792966761808416e-06, + "loss": 0.9147, + "step": 65940 + }, + { + "epoch": 0.95, + "grad_norm": 0.6484375, + "learning_rate": 1.7745979222703602e-06, + "loss": 0.9281, + "step": 65945 + }, + { + "epoch": 0.95, + "grad_norm": 0.5703125, + "learning_rate": 1.769905325239951e-06, + "loss": 1.0265, + "step": 65950 + }, + { + "epoch": 0.95, + "grad_norm": 0.5546875, + "learning_rate": 1.765218885383757e-06, + "loss": 0.8913, + "step": 65955 + }, + { + "epoch": 0.95, + "grad_norm": 0.498046875, + "learning_rate": 1.7605386029955205e-06, + "loss": 0.9417, + "step": 65960 + }, + { + "epoch": 0.95, + "grad_norm": 0.578125, + "learning_rate": 1.7558644783685962e-06, + "loss": 0.9741, + "step": 65965 + }, + { + "epoch": 0.95, + "grad_norm": 0.5234375, + "learning_rate": 1.751196511795994e-06, + "loss": 0.8871, + "step": 65970 + }, + { + "epoch": 0.95, + "grad_norm": 0.6328125, + "learning_rate": 1.7465347035702795e-06, + "loss": 0.8722, + "step": 65975 + }, + { + "epoch": 0.95, + "grad_norm": 0.55859375, + "learning_rate": 1.741879053983675e-06, + "loss": 0.9352, + "step": 65980 + }, + { + "epoch": 0.95, + "grad_norm": 0.51953125, + "learning_rate": 1.7372295633280023e-06, + "loss": 0.8443, + "step": 65985 + }, + { + "epoch": 0.95, + "grad_norm": 0.58984375, + "learning_rate": 1.7325862318946951e-06, + "loss": 1.0828, + "step": 65990 + }, + { + "epoch": 0.95, + "grad_norm": 0.58984375, + "learning_rate": 1.727949059974787e-06, + "loss": 0.8578, + "step": 65995 + }, + { + "epoch": 0.95, + "grad_norm": 0.6015625, + "learning_rate": 1.7233180478589683e-06, + "loss": 0.886, + "step": 66000 + }, + { + "epoch": 0.95, + "grad_norm": 0.65625, + "learning_rate": 1.7186931958374953e-06, + "loss": 1.0852, + "step": 66005 + }, + { + "epoch": 0.95, + "grad_norm": 0.5625, + "learning_rate": 1.7140745042002694e-06, + "loss": 0.9812, + "step": 66010 + }, + { + "epoch": 0.95, + "grad_norm": 0.578125, + "learning_rate": 1.7094619732367812e-06, + "loss": 0.923, + "step": 66015 + }, + { + "epoch": 0.95, + "grad_norm": 0.52734375, + "learning_rate": 1.7048556032361773e-06, + "loss": 0.9416, + "step": 66020 + }, + { + "epoch": 0.95, + "grad_norm": 0.5703125, + "learning_rate": 1.7002553944871602e-06, + "loss": 0.9781, + "step": 66025 + }, + { + "epoch": 0.95, + "grad_norm": 0.53515625, + "learning_rate": 1.6956613472780881e-06, + "loss": 0.8714, + "step": 66030 + }, + { + "epoch": 0.95, + "grad_norm": 0.62890625, + "learning_rate": 1.6910734618969304e-06, + "loss": 0.8347, + "step": 66035 + }, + { + "epoch": 0.95, + "grad_norm": 0.49609375, + "learning_rate": 1.6864917386312462e-06, + "loss": 0.8215, + "step": 66040 + }, + { + "epoch": 0.95, + "grad_norm": 0.55859375, + "learning_rate": 1.681916177768228e-06, + "loss": 0.9532, + "step": 66045 + }, + { + "epoch": 0.95, + "grad_norm": 0.56640625, + "learning_rate": 1.6773467795946796e-06, + "loss": 0.9491, + "step": 66050 + }, + { + "epoch": 0.95, + "grad_norm": 0.5859375, + "learning_rate": 1.6727835443970275e-06, + "loss": 1.0347, + "step": 66055 + }, + { + "epoch": 0.95, + "grad_norm": 0.53515625, + "learning_rate": 1.6682264724612762e-06, + "loss": 0.9638, + "step": 66060 + }, + { + "epoch": 0.95, + "grad_norm": 0.53515625, + "learning_rate": 1.6636755640730862e-06, + "loss": 0.7433, + "step": 66065 + }, + { + "epoch": 0.95, + "grad_norm": 0.4765625, + "learning_rate": 1.6591308195176958e-06, + "loss": 0.9629, + "step": 66070 + }, + { + "epoch": 0.95, + "grad_norm": 0.63671875, + "learning_rate": 1.6545922390799994e-06, + "loss": 0.9322, + "step": 66075 + }, + { + "epoch": 0.95, + "grad_norm": 0.5, + "learning_rate": 1.6500598230444586e-06, + "loss": 0.7957, + "step": 66080 + }, + { + "epoch": 0.95, + "grad_norm": 0.53125, + "learning_rate": 1.6455335716951793e-06, + "loss": 0.9208, + "step": 66085 + }, + { + "epoch": 0.95, + "grad_norm": 0.54296875, + "learning_rate": 1.6410134853158786e-06, + "loss": 0.9595, + "step": 66090 + }, + { + "epoch": 0.95, + "grad_norm": 0.546875, + "learning_rate": 1.636499564189875e-06, + "loss": 0.8989, + "step": 66095 + }, + { + "epoch": 0.95, + "grad_norm": 0.4921875, + "learning_rate": 1.6319918086000974e-06, + "loss": 0.8626, + "step": 66100 + }, + { + "epoch": 0.95, + "grad_norm": 0.55859375, + "learning_rate": 1.6274902188291197e-06, + "loss": 0.8508, + "step": 66105 + }, + { + "epoch": 0.95, + "grad_norm": 0.546875, + "learning_rate": 1.6229947951590718e-06, + "loss": 0.9814, + "step": 66110 + }, + { + "epoch": 0.95, + "grad_norm": 0.58203125, + "learning_rate": 1.6185055378717728e-06, + "loss": 0.954, + "step": 66115 + }, + { + "epoch": 0.95, + "grad_norm": 0.5859375, + "learning_rate": 1.6140224472485755e-06, + "loss": 1.0494, + "step": 66120 + }, + { + "epoch": 0.95, + "grad_norm": 0.51953125, + "learning_rate": 1.6095455235705104e-06, + "loss": 0.913, + "step": 66125 + }, + { + "epoch": 0.95, + "grad_norm": 0.5234375, + "learning_rate": 1.6050747671181864e-06, + "loss": 0.9894, + "step": 66130 + }, + { + "epoch": 0.95, + "grad_norm": 0.515625, + "learning_rate": 1.6006101781718463e-06, + "loss": 0.7572, + "step": 66135 + }, + { + "epoch": 0.95, + "grad_norm": 0.5703125, + "learning_rate": 1.5961517570113215e-06, + "loss": 1.012, + "step": 66140 + }, + { + "epoch": 0.95, + "grad_norm": 0.56640625, + "learning_rate": 1.5916995039160775e-06, + "loss": 0.8833, + "step": 66145 + }, + { + "epoch": 0.95, + "grad_norm": 0.58203125, + "learning_rate": 1.58725341916518e-06, + "loss": 1.0355, + "step": 66150 + }, + { + "epoch": 0.95, + "grad_norm": 0.45703125, + "learning_rate": 1.5828135030373282e-06, + "loss": 0.8036, + "step": 66155 + }, + { + "epoch": 0.95, + "grad_norm": 0.58984375, + "learning_rate": 1.5783797558108326e-06, + "loss": 0.9307, + "step": 66160 + }, + { + "epoch": 0.95, + "grad_norm": 0.55078125, + "learning_rate": 1.5739521777635601e-06, + "loss": 0.9648, + "step": 66165 + }, + { + "epoch": 0.95, + "grad_norm": 0.5703125, + "learning_rate": 1.5695307691730665e-06, + "loss": 0.9466, + "step": 66170 + }, + { + "epoch": 0.95, + "grad_norm": 0.58203125, + "learning_rate": 1.5651155303164967e-06, + "loss": 1.1032, + "step": 66175 + }, + { + "epoch": 0.95, + "grad_norm": 0.6484375, + "learning_rate": 1.5607064614705846e-06, + "loss": 0.763, + "step": 66180 + }, + { + "epoch": 0.95, + "grad_norm": 0.68359375, + "learning_rate": 1.5563035629117095e-06, + "loss": 0.9432, + "step": 66185 + }, + { + "epoch": 0.95, + "grad_norm": 0.5546875, + "learning_rate": 1.5519068349158505e-06, + "loss": 0.8592, + "step": 66190 + }, + { + "epoch": 0.95, + "grad_norm": 0.56640625, + "learning_rate": 1.547516277758576e-06, + "loss": 1.0989, + "step": 66195 + }, + { + "epoch": 0.95, + "grad_norm": 0.5703125, + "learning_rate": 1.5431318917151216e-06, + "loss": 0.9865, + "step": 66200 + }, + { + "epoch": 0.95, + "grad_norm": 0.5546875, + "learning_rate": 1.5387536770603007e-06, + "loss": 0.7946, + "step": 66205 + }, + { + "epoch": 0.95, + "grad_norm": 0.6015625, + "learning_rate": 1.5343816340685268e-06, + "loss": 0.9329, + "step": 66210 + }, + { + "epoch": 0.95, + "grad_norm": 0.6015625, + "learning_rate": 1.530015763013859e-06, + "loss": 1.0905, + "step": 66215 + }, + { + "epoch": 0.95, + "grad_norm": 0.62890625, + "learning_rate": 1.5256560641699446e-06, + "loss": 1.2853, + "step": 66220 + }, + { + "epoch": 0.95, + "grad_norm": 0.515625, + "learning_rate": 1.5213025378100654e-06, + "loss": 0.9044, + "step": 66225 + }, + { + "epoch": 0.95, + "grad_norm": 0.65234375, + "learning_rate": 1.5169551842071028e-06, + "loss": 0.838, + "step": 66230 + }, + { + "epoch": 0.95, + "grad_norm": 0.5234375, + "learning_rate": 1.5126140036335502e-06, + "loss": 0.9923, + "step": 66235 + }, + { + "epoch": 0.95, + "grad_norm": 0.5625, + "learning_rate": 1.5082789963615228e-06, + "loss": 1.1155, + "step": 66240 + }, + { + "epoch": 0.95, + "grad_norm": 0.5703125, + "learning_rate": 1.5039501626627372e-06, + "loss": 0.8375, + "step": 66245 + }, + { + "epoch": 0.95, + "grad_norm": 0.53125, + "learning_rate": 1.4996275028085205e-06, + "loss": 0.9562, + "step": 66250 + }, + { + "epoch": 0.95, + "grad_norm": 0.62109375, + "learning_rate": 1.4953110170698558e-06, + "loss": 0.9143, + "step": 66255 + }, + { + "epoch": 0.95, + "grad_norm": 0.55078125, + "learning_rate": 1.4910007057172714e-06, + "loss": 0.8973, + "step": 66260 + }, + { + "epoch": 0.95, + "grad_norm": 0.46875, + "learning_rate": 1.4866965690209622e-06, + "loss": 0.8352, + "step": 66265 + }, + { + "epoch": 0.95, + "grad_norm": 0.57421875, + "learning_rate": 1.4823986072507123e-06, + "loss": 1.0024, + "step": 66270 + }, + { + "epoch": 0.95, + "grad_norm": 0.53125, + "learning_rate": 1.4781068206759063e-06, + "loss": 0.8752, + "step": 66275 + }, + { + "epoch": 0.95, + "grad_norm": 0.671875, + "learning_rate": 1.4738212095655846e-06, + "loss": 1.0038, + "step": 66280 + }, + { + "epoch": 0.95, + "grad_norm": 0.58203125, + "learning_rate": 1.4695417741883543e-06, + "loss": 0.8395, + "step": 66285 + }, + { + "epoch": 0.95, + "grad_norm": 0.546875, + "learning_rate": 1.4652685148124678e-06, + "loss": 0.925, + "step": 66290 + }, + { + "epoch": 0.95, + "grad_norm": 0.62890625, + "learning_rate": 1.4610014317057664e-06, + "loss": 0.999, + "step": 66295 + }, + { + "epoch": 0.95, + "grad_norm": 0.52734375, + "learning_rate": 1.4567405251357246e-06, + "loss": 0.9159, + "step": 66300 + }, + { + "epoch": 0.95, + "grad_norm": 0.52734375, + "learning_rate": 1.4524857953694183e-06, + "loss": 0.8985, + "step": 66305 + }, + { + "epoch": 0.95, + "grad_norm": 0.44921875, + "learning_rate": 1.4482372426735337e-06, + "loss": 0.8518, + "step": 66310 + }, + { + "epoch": 0.95, + "grad_norm": 0.59765625, + "learning_rate": 1.4439948673143799e-06, + "loss": 0.967, + "step": 66315 + }, + { + "epoch": 0.95, + "grad_norm": 0.6328125, + "learning_rate": 1.439758669557889e-06, + "loss": 0.9035, + "step": 66320 + }, + { + "epoch": 0.95, + "grad_norm": 0.56640625, + "learning_rate": 1.4355286496695598e-06, + "loss": 0.9379, + "step": 66325 + }, + { + "epoch": 0.95, + "grad_norm": 0.5546875, + "learning_rate": 1.4313048079145463e-06, + "loss": 1.0857, + "step": 66330 + }, + { + "epoch": 0.95, + "grad_norm": 0.60546875, + "learning_rate": 1.4270871445576262e-06, + "loss": 0.9745, + "step": 66335 + }, + { + "epoch": 0.95, + "grad_norm": 0.5703125, + "learning_rate": 1.4228756598631322e-06, + "loss": 0.8944, + "step": 66340 + }, + { + "epoch": 0.95, + "grad_norm": 0.58984375, + "learning_rate": 1.418670354095064e-06, + "loss": 0.9383, + "step": 66345 + }, + { + "epoch": 0.95, + "grad_norm": 0.5234375, + "learning_rate": 1.4144712275170224e-06, + "loss": 0.984, + "step": 66350 + }, + { + "epoch": 0.95, + "grad_norm": 0.50390625, + "learning_rate": 1.4102782803921966e-06, + "loss": 0.8358, + "step": 66355 + }, + { + "epoch": 0.95, + "grad_norm": 0.6015625, + "learning_rate": 1.4060915129834096e-06, + "loss": 0.9024, + "step": 66360 + }, + { + "epoch": 0.95, + "grad_norm": 0.5546875, + "learning_rate": 1.401910925553107e-06, + "loss": 0.9139, + "step": 66365 + }, + { + "epoch": 0.95, + "grad_norm": 0.640625, + "learning_rate": 1.397736518363324e-06, + "loss": 0.9026, + "step": 66370 + }, + { + "epoch": 0.95, + "grad_norm": 0.546875, + "learning_rate": 1.3935682916756953e-06, + "loss": 0.9661, + "step": 66375 + }, + { + "epoch": 0.95, + "grad_norm": 0.609375, + "learning_rate": 1.3894062457515233e-06, + "loss": 0.8531, + "step": 66380 + }, + { + "epoch": 0.95, + "grad_norm": 0.64453125, + "learning_rate": 1.3852503808516771e-06, + "loss": 0.9482, + "step": 66385 + }, + { + "epoch": 0.95, + "grad_norm": 0.625, + "learning_rate": 1.381100697236637e-06, + "loss": 0.934, + "step": 66390 + }, + { + "epoch": 0.95, + "grad_norm": 0.53125, + "learning_rate": 1.3769571951665393e-06, + "loss": 1.1681, + "step": 66395 + }, + { + "epoch": 0.95, + "grad_norm": 0.56640625, + "learning_rate": 1.3728198749010657e-06, + "loss": 0.8876, + "step": 66400 + }, + { + "epoch": 0.95, + "grad_norm": 0.478515625, + "learning_rate": 1.368688736699586e-06, + "loss": 0.8038, + "step": 66405 + }, + { + "epoch": 0.95, + "grad_norm": 0.51171875, + "learning_rate": 1.3645637808210266e-06, + "loss": 0.9515, + "step": 66410 + }, + { + "epoch": 0.95, + "grad_norm": 0.5625, + "learning_rate": 1.360445007523936e-06, + "loss": 0.914, + "step": 66415 + }, + { + "epoch": 0.95, + "grad_norm": 0.5546875, + "learning_rate": 1.3563324170665082e-06, + "loss": 0.891, + "step": 66420 + }, + { + "epoch": 0.95, + "grad_norm": 0.55078125, + "learning_rate": 1.3522260097064921e-06, + "loss": 1.0436, + "step": 66425 + }, + { + "epoch": 0.95, + "grad_norm": 0.50390625, + "learning_rate": 1.3481257857013152e-06, + "loss": 1.1661, + "step": 66430 + }, + { + "epoch": 0.95, + "grad_norm": 0.51953125, + "learning_rate": 1.3440317453079609e-06, + "loss": 0.8857, + "step": 66435 + }, + { + "epoch": 0.95, + "grad_norm": 0.55859375, + "learning_rate": 1.3399438887830572e-06, + "loss": 0.807, + "step": 66440 + }, + { + "epoch": 0.95, + "grad_norm": 0.515625, + "learning_rate": 1.3358622163828438e-06, + "loss": 0.9729, + "step": 66445 + }, + { + "epoch": 0.95, + "grad_norm": 0.50390625, + "learning_rate": 1.331786728363149e-06, + "loss": 0.8429, + "step": 66450 + }, + { + "epoch": 0.95, + "grad_norm": 0.5546875, + "learning_rate": 1.3277174249794355e-06, + "loss": 0.7673, + "step": 66455 + }, + { + "epoch": 0.95, + "grad_norm": 0.578125, + "learning_rate": 1.3236543064867769e-06, + "loss": 0.9777, + "step": 66460 + }, + { + "epoch": 0.95, + "grad_norm": 0.478515625, + "learning_rate": 1.319597373139836e-06, + "loss": 0.9414, + "step": 66465 + }, + { + "epoch": 0.95, + "grad_norm": 0.5546875, + "learning_rate": 1.3155466251929316e-06, + "loss": 0.8902, + "step": 66470 + }, + { + "epoch": 0.95, + "grad_norm": 0.6015625, + "learning_rate": 1.3115020628999498e-06, + "loss": 0.8715, + "step": 66475 + }, + { + "epoch": 0.95, + "grad_norm": 0.55859375, + "learning_rate": 1.3074636865144206e-06, + "loss": 0.9018, + "step": 66480 + }, + { + "epoch": 0.95, + "grad_norm": 0.51171875, + "learning_rate": 1.3034314962894756e-06, + "loss": 0.8096, + "step": 66485 + }, + { + "epoch": 0.95, + "grad_norm": 0.56640625, + "learning_rate": 1.2994054924778342e-06, + "loss": 1.0186, + "step": 66490 + }, + { + "epoch": 0.95, + "grad_norm": 0.61328125, + "learning_rate": 1.2953856753318838e-06, + "loss": 0.9389, + "step": 66495 + }, + { + "epoch": 0.95, + "grad_norm": 0.56640625, + "learning_rate": 1.291372045103556e-06, + "loss": 1.0381, + "step": 66500 + }, + { + "epoch": 0.95, + "grad_norm": 0.5234375, + "learning_rate": 1.2873646020444608e-06, + "loss": 0.8876, + "step": 66505 + }, + { + "epoch": 0.95, + "grad_norm": 0.6015625, + "learning_rate": 1.2833633464057748e-06, + "loss": 0.9844, + "step": 66510 + }, + { + "epoch": 0.95, + "grad_norm": 0.455078125, + "learning_rate": 1.2793682784382977e-06, + "loss": 0.9653, + "step": 66515 + }, + { + "epoch": 0.95, + "grad_norm": 0.55859375, + "learning_rate": 1.2753793983924622e-06, + "loss": 0.9561, + "step": 66520 + }, + { + "epoch": 0.95, + "grad_norm": 0.48828125, + "learning_rate": 1.2713967065182797e-06, + "loss": 0.9512, + "step": 66525 + }, + { + "epoch": 0.95, + "grad_norm": 0.546875, + "learning_rate": 1.2674202030653836e-06, + "loss": 1.0748, + "step": 66530 + }, + { + "epoch": 0.95, + "grad_norm": 0.490234375, + "learning_rate": 1.2634498882830415e-06, + "loss": 0.9455, + "step": 66535 + }, + { + "epoch": 0.95, + "grad_norm": 0.55078125, + "learning_rate": 1.2594857624201207e-06, + "loss": 1.1208, + "step": 66540 + }, + { + "epoch": 0.95, + "grad_norm": 0.50390625, + "learning_rate": 1.255527825725078e-06, + "loss": 0.8845, + "step": 66545 + }, + { + "epoch": 0.95, + "grad_norm": 0.55078125, + "learning_rate": 1.251576078446015e-06, + "loss": 1.1462, + "step": 66550 + }, + { + "epoch": 0.95, + "grad_norm": 0.546875, + "learning_rate": 1.2476305208306226e-06, + "loss": 0.9087, + "step": 66555 + }, + { + "epoch": 0.95, + "grad_norm": 0.69921875, + "learning_rate": 1.243691153126225e-06, + "loss": 1.0049, + "step": 66560 + }, + { + "epoch": 0.95, + "grad_norm": 0.55078125, + "learning_rate": 1.2397579755797361e-06, + "loss": 0.9512, + "step": 66565 + }, + { + "epoch": 0.95, + "grad_norm": 0.609375, + "learning_rate": 1.2358309884377029e-06, + "loss": 0.9347, + "step": 66570 + }, + { + "epoch": 0.95, + "grad_norm": 0.5234375, + "learning_rate": 1.2319101919462616e-06, + "loss": 0.9896, + "step": 66575 + }, + { + "epoch": 0.96, + "grad_norm": 0.58203125, + "learning_rate": 1.2279955863511826e-06, + "loss": 0.9109, + "step": 66580 + }, + { + "epoch": 0.96, + "grad_norm": 0.54296875, + "learning_rate": 1.2240871718978253e-06, + "loss": 0.9015, + "step": 66585 + }, + { + "epoch": 0.96, + "grad_norm": 0.5625, + "learning_rate": 1.2201849488311822e-06, + "loss": 0.9944, + "step": 66590 + }, + { + "epoch": 0.96, + "grad_norm": 0.56640625, + "learning_rate": 1.216288917395847e-06, + "loss": 1.1017, + "step": 66595 + }, + { + "epoch": 0.96, + "grad_norm": 0.625, + "learning_rate": 1.2123990778360238e-06, + "loss": 0.9436, + "step": 66600 + }, + { + "epoch": 0.96, + "grad_norm": 0.5703125, + "learning_rate": 1.2085154303955515e-06, + "loss": 0.9388, + "step": 66605 + }, + { + "epoch": 0.96, + "grad_norm": 0.52734375, + "learning_rate": 1.2046379753178238e-06, + "loss": 0.912, + "step": 66610 + }, + { + "epoch": 0.96, + "grad_norm": 0.5390625, + "learning_rate": 1.2007667128459133e-06, + "loss": 0.9379, + "step": 66615 + }, + { + "epoch": 0.96, + "grad_norm": 0.515625, + "learning_rate": 1.196901643222481e-06, + "loss": 0.9229, + "step": 66620 + }, + { + "epoch": 0.96, + "grad_norm": 0.546875, + "learning_rate": 1.193042766689767e-06, + "loss": 1.0321, + "step": 66625 + }, + { + "epoch": 0.96, + "grad_norm": 0.5546875, + "learning_rate": 1.1891900834896775e-06, + "loss": 0.9575, + "step": 66630 + }, + { + "epoch": 0.96, + "grad_norm": 0.59375, + "learning_rate": 1.1853435938636858e-06, + "loss": 1.1775, + "step": 66635 + }, + { + "epoch": 0.96, + "grad_norm": 0.65234375, + "learning_rate": 1.1815032980528885e-06, + "loss": 0.9579, + "step": 66640 + }, + { + "epoch": 0.96, + "grad_norm": 0.5078125, + "learning_rate": 1.1776691962980146e-06, + "loss": 0.9174, + "step": 66645 + }, + { + "epoch": 0.96, + "grad_norm": 0.953125, + "learning_rate": 1.1738412888393835e-06, + "loss": 0.9915, + "step": 66650 + }, + { + "epoch": 0.96, + "grad_norm": 0.55078125, + "learning_rate": 1.1700195759169364e-06, + "loss": 0.9272, + "step": 66655 + }, + { + "epoch": 0.96, + "grad_norm": 0.5234375, + "learning_rate": 1.1662040577702148e-06, + "loss": 0.9615, + "step": 66660 + }, + { + "epoch": 0.96, + "grad_norm": 0.498046875, + "learning_rate": 1.1623947346383946e-06, + "loss": 0.9528, + "step": 66665 + }, + { + "epoch": 0.96, + "grad_norm": 0.462890625, + "learning_rate": 1.1585916067602286e-06, + "loss": 0.8361, + "step": 66670 + }, + { + "epoch": 0.96, + "grad_norm": 0.578125, + "learning_rate": 1.1547946743741155e-06, + "loss": 1.0168, + "step": 66675 + }, + { + "epoch": 0.96, + "grad_norm": 0.53125, + "learning_rate": 1.1510039377180536e-06, + "loss": 0.9082, + "step": 66680 + }, + { + "epoch": 0.96, + "grad_norm": 0.498046875, + "learning_rate": 1.1472193970296419e-06, + "loss": 0.9601, + "step": 66685 + }, + { + "epoch": 0.96, + "grad_norm": 0.56640625, + "learning_rate": 1.1434410525461014e-06, + "loss": 0.9541, + "step": 66690 + }, + { + "epoch": 0.96, + "grad_norm": 0.5625, + "learning_rate": 1.139668904504254e-06, + "loss": 1.0303, + "step": 66695 + }, + { + "epoch": 0.96, + "grad_norm": 0.61328125, + "learning_rate": 1.1359029531405662e-06, + "loss": 0.8633, + "step": 66700 + }, + { + "epoch": 0.96, + "grad_norm": 0.515625, + "learning_rate": 1.1321431986910712e-06, + "loss": 0.9686, + "step": 66705 + }, + { + "epoch": 0.96, + "grad_norm": 0.5703125, + "learning_rate": 1.1283896413914362e-06, + "loss": 0.9233, + "step": 66710 + }, + { + "epoch": 0.96, + "grad_norm": 0.6640625, + "learning_rate": 1.1246422814769509e-06, + "loss": 1.0196, + "step": 66715 + }, + { + "epoch": 0.96, + "grad_norm": 0.5, + "learning_rate": 1.1209011191824935e-06, + "loss": 0.9566, + "step": 66720 + }, + { + "epoch": 0.96, + "grad_norm": 0.546875, + "learning_rate": 1.1171661547425772e-06, + "loss": 0.9021, + "step": 66725 + }, + { + "epoch": 0.96, + "grad_norm": 0.5, + "learning_rate": 1.1134373883913029e-06, + "loss": 0.8414, + "step": 66730 + }, + { + "epoch": 0.96, + "grad_norm": 0.58984375, + "learning_rate": 1.1097148203623841e-06, + "loss": 1.0286, + "step": 66735 + }, + { + "epoch": 0.96, + "grad_norm": 0.6953125, + "learning_rate": 1.1059984508891785e-06, + "loss": 0.9377, + "step": 66740 + }, + { + "epoch": 0.96, + "grad_norm": 0.57421875, + "learning_rate": 1.1022882802046218e-06, + "loss": 0.9322, + "step": 66745 + }, + { + "epoch": 0.96, + "grad_norm": 0.55078125, + "learning_rate": 1.0985843085412617e-06, + "loss": 0.9119, + "step": 66750 + }, + { + "epoch": 0.96, + "grad_norm": 0.50390625, + "learning_rate": 1.0948865361312676e-06, + "loss": 1.0042, + "step": 66755 + }, + { + "epoch": 0.96, + "grad_norm": 0.546875, + "learning_rate": 1.0911949632064434e-06, + "loss": 0.9995, + "step": 66760 + }, + { + "epoch": 0.96, + "grad_norm": 0.58203125, + "learning_rate": 1.0875095899981702e-06, + "loss": 1.1144, + "step": 66765 + }, + { + "epoch": 0.96, + "grad_norm": 0.5546875, + "learning_rate": 1.0838304167374302e-06, + "loss": 0.8735, + "step": 66770 + }, + { + "epoch": 0.96, + "grad_norm": 0.5625, + "learning_rate": 1.0801574436548722e-06, + "loss": 0.8851, + "step": 66775 + }, + { + "epoch": 0.96, + "grad_norm": 0.51953125, + "learning_rate": 1.076490670980701e-06, + "loss": 0.9889, + "step": 66780 + }, + { + "epoch": 0.96, + "grad_norm": 0.5390625, + "learning_rate": 1.0728300989447548e-06, + "loss": 1.0086, + "step": 66785 + }, + { + "epoch": 0.96, + "grad_norm": 0.53515625, + "learning_rate": 1.0691757277764946e-06, + "loss": 0.995, + "step": 66790 + }, + { + "epoch": 0.96, + "grad_norm": 0.55859375, + "learning_rate": 1.0655275577049706e-06, + "loss": 0.9718, + "step": 66795 + }, + { + "epoch": 0.96, + "grad_norm": 0.6484375, + "learning_rate": 1.0618855889588552e-06, + "loss": 0.9534, + "step": 66800 + }, + { + "epoch": 0.96, + "grad_norm": 0.6015625, + "learning_rate": 1.0582498217664215e-06, + "loss": 1.0658, + "step": 66805 + }, + { + "epoch": 0.96, + "grad_norm": 0.54296875, + "learning_rate": 1.0546202563555874e-06, + "loss": 0.9804, + "step": 66810 + }, + { + "epoch": 0.96, + "grad_norm": 0.60546875, + "learning_rate": 1.0509968929538484e-06, + "loss": 1.0017, + "step": 66815 + }, + { + "epoch": 0.96, + "grad_norm": 0.53125, + "learning_rate": 1.0473797317883005e-06, + "loss": 0.9555, + "step": 66820 + }, + { + "epoch": 0.96, + "grad_norm": 0.5859375, + "learning_rate": 1.043768773085707e-06, + "loss": 1.0213, + "step": 66825 + }, + { + "epoch": 0.96, + "grad_norm": 0.498046875, + "learning_rate": 1.0401640170723758e-06, + "loss": 0.9763, + "step": 66830 + }, + { + "epoch": 0.96, + "grad_norm": 0.60546875, + "learning_rate": 1.0365654639742706e-06, + "loss": 1.0952, + "step": 66835 + }, + { + "epoch": 0.96, + "grad_norm": 0.4765625, + "learning_rate": 1.0329731140169663e-06, + "loss": 1.2155, + "step": 66840 + }, + { + "epoch": 0.96, + "grad_norm": 0.546875, + "learning_rate": 1.0293869674256051e-06, + "loss": 0.8288, + "step": 66845 + }, + { + "epoch": 0.96, + "grad_norm": 0.56640625, + "learning_rate": 1.025807024424985e-06, + "loss": 0.9911, + "step": 66850 + }, + { + "epoch": 0.96, + "grad_norm": 0.5390625, + "learning_rate": 1.0222332852395156e-06, + "loss": 0.9902, + "step": 66855 + }, + { + "epoch": 0.96, + "grad_norm": 0.55078125, + "learning_rate": 1.018665750093184e-06, + "loss": 0.83, + "step": 66860 + }, + { + "epoch": 0.96, + "grad_norm": 0.58203125, + "learning_rate": 1.0151044192096115e-06, + "loss": 0.8954, + "step": 66865 + }, + { + "epoch": 0.96, + "grad_norm": 0.55078125, + "learning_rate": 1.0115492928120306e-06, + "loss": 1.0404, + "step": 66870 + }, + { + "epoch": 0.96, + "grad_norm": 0.5234375, + "learning_rate": 1.0080003711232743e-06, + "loss": 0.8523, + "step": 66875 + }, + { + "epoch": 0.96, + "grad_norm": 0.58984375, + "learning_rate": 1.0044576543658092e-06, + "loss": 0.97, + "step": 66880 + }, + { + "epoch": 0.96, + "grad_norm": 0.48828125, + "learning_rate": 1.0009211427616794e-06, + "loss": 0.8799, + "step": 66885 + }, + { + "epoch": 0.96, + "grad_norm": 0.58984375, + "learning_rate": 9.973908365325525e-07, + "loss": 1.0422, + "step": 66890 + }, + { + "epoch": 0.96, + "grad_norm": 0.5859375, + "learning_rate": 9.9386673589974e-07, + "loss": 1.0017, + "step": 66895 + }, + { + "epoch": 0.96, + "grad_norm": 0.546875, + "learning_rate": 9.903488410840988e-07, + "loss": 0.9493, + "step": 66900 + }, + { + "epoch": 0.96, + "grad_norm": 0.53515625, + "learning_rate": 9.868371523061748e-07, + "loss": 0.9286, + "step": 66905 + }, + { + "epoch": 0.96, + "grad_norm": 0.5546875, + "learning_rate": 9.833316697860474e-07, + "loss": 0.9287, + "step": 66910 + }, + { + "epoch": 0.96, + "grad_norm": 0.486328125, + "learning_rate": 9.798323937434739e-07, + "loss": 0.9732, + "step": 66915 + }, + { + "epoch": 0.96, + "grad_norm": 0.53125, + "learning_rate": 9.763393243977792e-07, + "loss": 0.9871, + "step": 66920 + }, + { + "epoch": 0.96, + "grad_norm": 0.5703125, + "learning_rate": 9.728524619678991e-07, + "loss": 0.8952, + "step": 66925 + }, + { + "epoch": 0.96, + "grad_norm": 0.6796875, + "learning_rate": 9.693718066724256e-07, + "loss": 0.9673, + "step": 66930 + }, + { + "epoch": 0.96, + "grad_norm": 0.68359375, + "learning_rate": 9.658973587295062e-07, + "loss": 0.8788, + "step": 66935 + }, + { + "epoch": 0.96, + "grad_norm": 0.546875, + "learning_rate": 9.624291183569334e-07, + "loss": 0.8784, + "step": 66940 + }, + { + "epoch": 0.96, + "grad_norm": 0.48828125, + "learning_rate": 9.589670857721e-07, + "loss": 1.0006, + "step": 66945 + }, + { + "epoch": 0.96, + "grad_norm": 0.515625, + "learning_rate": 9.555112611920104e-07, + "loss": 0.9158, + "step": 66950 + }, + { + "epoch": 0.96, + "grad_norm": 0.52734375, + "learning_rate": 9.520616448332686e-07, + "loss": 0.9912, + "step": 66955 + }, + { + "epoch": 0.96, + "grad_norm": 0.57421875, + "learning_rate": 9.486182369121132e-07, + "loss": 0.9214, + "step": 66960 + }, + { + "epoch": 0.96, + "grad_norm": 0.52734375, + "learning_rate": 9.451810376443826e-07, + "loss": 1.0211, + "step": 66965 + }, + { + "epoch": 0.96, + "grad_norm": 0.48828125, + "learning_rate": 9.417500472455043e-07, + "loss": 0.9691, + "step": 66970 + }, + { + "epoch": 0.96, + "grad_norm": 0.5859375, + "learning_rate": 9.383252659305619e-07, + "loss": 1.106, + "step": 66975 + }, + { + "epoch": 0.96, + "grad_norm": 0.54296875, + "learning_rate": 9.34906693914217e-07, + "loss": 0.9614, + "step": 66980 + }, + { + "epoch": 0.96, + "grad_norm": 0.59375, + "learning_rate": 9.314943314107316e-07, + "loss": 0.981, + "step": 66985 + }, + { + "epoch": 0.96, + "grad_norm": 0.54296875, + "learning_rate": 9.280881786340124e-07, + "loss": 0.894, + "step": 66990 + }, + { + "epoch": 0.96, + "grad_norm": 0.58984375, + "learning_rate": 9.246882357975772e-07, + "loss": 0.8769, + "step": 66995 + }, + { + "epoch": 0.96, + "grad_norm": 0.74609375, + "learning_rate": 9.212945031145115e-07, + "loss": 1.0055, + "step": 67000 + }, + { + "epoch": 0.96, + "grad_norm": 0.498046875, + "learning_rate": 9.179069807975449e-07, + "loss": 0.8045, + "step": 67005 + }, + { + "epoch": 0.96, + "grad_norm": 0.57421875, + "learning_rate": 9.145256690590076e-07, + "loss": 1.0224, + "step": 67010 + }, + { + "epoch": 0.96, + "grad_norm": 0.53125, + "learning_rate": 9.111505681108634e-07, + "loss": 0.907, + "step": 67015 + }, + { + "epoch": 0.96, + "grad_norm": 0.55859375, + "learning_rate": 9.077816781646431e-07, + "loss": 1.1019, + "step": 67020 + }, + { + "epoch": 0.96, + "grad_norm": 0.56640625, + "learning_rate": 9.044189994315333e-07, + "loss": 0.8189, + "step": 67025 + }, + { + "epoch": 0.96, + "grad_norm": 0.55859375, + "learning_rate": 9.010625321222987e-07, + "loss": 1.0068, + "step": 67030 + }, + { + "epoch": 0.96, + "grad_norm": 0.625, + "learning_rate": 8.977122764473267e-07, + "loss": 0.916, + "step": 67035 + }, + { + "epoch": 0.96, + "grad_norm": 0.5703125, + "learning_rate": 8.943682326166159e-07, + "loss": 0.9092, + "step": 67040 + }, + { + "epoch": 0.96, + "grad_norm": 0.625, + "learning_rate": 8.910304008397763e-07, + "loss": 0.9385, + "step": 67045 + }, + { + "epoch": 0.96, + "grad_norm": 0.58984375, + "learning_rate": 8.876987813260407e-07, + "loss": 0.968, + "step": 67050 + }, + { + "epoch": 0.96, + "grad_norm": 0.58203125, + "learning_rate": 8.843733742842086e-07, + "loss": 1.0223, + "step": 67055 + }, + { + "epoch": 0.96, + "grad_norm": 0.5625, + "learning_rate": 8.810541799227356e-07, + "loss": 0.9759, + "step": 67060 + }, + { + "epoch": 0.96, + "grad_norm": 0.52734375, + "learning_rate": 8.777411984496775e-07, + "loss": 0.9314, + "step": 67065 + }, + { + "epoch": 0.96, + "grad_norm": 0.453125, + "learning_rate": 8.744344300726904e-07, + "loss": 0.9452, + "step": 67070 + }, + { + "epoch": 0.96, + "grad_norm": 0.55078125, + "learning_rate": 8.711338749990528e-07, + "loss": 0.8704, + "step": 67075 + }, + { + "epoch": 0.96, + "grad_norm": 0.55859375, + "learning_rate": 8.678395334356437e-07, + "loss": 0.8862, + "step": 67080 + }, + { + "epoch": 0.96, + "grad_norm": 0.55078125, + "learning_rate": 8.645514055889425e-07, + "loss": 1.1477, + "step": 67085 + }, + { + "epoch": 0.96, + "grad_norm": 0.6015625, + "learning_rate": 8.612694916650843e-07, + "loss": 0.9368, + "step": 67090 + }, + { + "epoch": 0.96, + "grad_norm": 0.515625, + "learning_rate": 8.579937918697489e-07, + "loss": 0.9919, + "step": 67095 + }, + { + "epoch": 0.96, + "grad_norm": 0.6171875, + "learning_rate": 8.547243064082721e-07, + "loss": 0.9392, + "step": 67100 + }, + { + "epoch": 0.96, + "grad_norm": 0.5078125, + "learning_rate": 8.514610354856123e-07, + "loss": 0.8963, + "step": 67105 + }, + { + "epoch": 0.96, + "grad_norm": 0.515625, + "learning_rate": 8.482039793062835e-07, + "loss": 0.9462, + "step": 67110 + }, + { + "epoch": 0.96, + "grad_norm": 0.5234375, + "learning_rate": 8.449531380744558e-07, + "loss": 0.9711, + "step": 67115 + }, + { + "epoch": 0.96, + "grad_norm": 0.57421875, + "learning_rate": 8.417085119938883e-07, + "loss": 0.9606, + "step": 67120 + }, + { + "epoch": 0.96, + "grad_norm": 0.53515625, + "learning_rate": 8.38470101267963e-07, + "loss": 1.0648, + "step": 67125 + }, + { + "epoch": 0.96, + "grad_norm": 0.59375, + "learning_rate": 8.352379060996729e-07, + "loss": 0.9065, + "step": 67130 + }, + { + "epoch": 0.96, + "grad_norm": 0.57421875, + "learning_rate": 8.320119266916115e-07, + "loss": 0.9925, + "step": 67135 + }, + { + "epoch": 0.96, + "grad_norm": 0.53125, + "learning_rate": 8.287921632459838e-07, + "loss": 0.8692, + "step": 67140 + }, + { + "epoch": 0.96, + "grad_norm": 0.578125, + "learning_rate": 8.255786159646062e-07, + "loss": 1.0614, + "step": 67145 + }, + { + "epoch": 0.96, + "grad_norm": 0.60546875, + "learning_rate": 8.223712850489063e-07, + "loss": 0.8723, + "step": 67150 + }, + { + "epoch": 0.96, + "grad_norm": 0.5546875, + "learning_rate": 8.191701706999344e-07, + "loss": 0.9797, + "step": 67155 + }, + { + "epoch": 0.96, + "grad_norm": 0.58203125, + "learning_rate": 8.159752731183412e-07, + "loss": 0.8765, + "step": 67160 + }, + { + "epoch": 0.96, + "grad_norm": 0.56640625, + "learning_rate": 8.127865925043665e-07, + "loss": 0.8819, + "step": 67165 + }, + { + "epoch": 0.96, + "grad_norm": 0.59375, + "learning_rate": 8.096041290578948e-07, + "loss": 1.06, + "step": 67170 + }, + { + "epoch": 0.96, + "grad_norm": 0.48046875, + "learning_rate": 8.064278829784111e-07, + "loss": 1.0899, + "step": 67175 + }, + { + "epoch": 0.96, + "grad_norm": 0.55078125, + "learning_rate": 8.032578544650004e-07, + "loss": 1.0346, + "step": 67180 + }, + { + "epoch": 0.96, + "grad_norm": 0.51171875, + "learning_rate": 8.000940437163595e-07, + "loss": 0.9365, + "step": 67185 + }, + { + "epoch": 0.96, + "grad_norm": 0.6015625, + "learning_rate": 7.969364509308075e-07, + "loss": 0.9604, + "step": 67190 + }, + { + "epoch": 0.96, + "grad_norm": 0.56640625, + "learning_rate": 7.937850763062527e-07, + "loss": 1.0253, + "step": 67195 + }, + { + "epoch": 0.96, + "grad_norm": 0.58984375, + "learning_rate": 7.90639920040237e-07, + "loss": 1.0417, + "step": 67200 + }, + { + "epoch": 0.96, + "grad_norm": 0.55078125, + "learning_rate": 7.875009823299029e-07, + "loss": 0.7986, + "step": 67205 + }, + { + "epoch": 0.96, + "grad_norm": 0.59765625, + "learning_rate": 7.84368263372004e-07, + "loss": 0.8278, + "step": 67210 + }, + { + "epoch": 0.96, + "grad_norm": 0.55859375, + "learning_rate": 7.812417633628943e-07, + "loss": 0.9306, + "step": 67215 + }, + { + "epoch": 0.96, + "grad_norm": 0.56640625, + "learning_rate": 7.781214824985617e-07, + "loss": 0.9969, + "step": 67220 + }, + { + "epoch": 0.96, + "grad_norm": 0.478515625, + "learning_rate": 7.750074209745717e-07, + "loss": 0.8955, + "step": 67225 + }, + { + "epoch": 0.96, + "grad_norm": 0.51171875, + "learning_rate": 7.718995789861238e-07, + "loss": 0.9978, + "step": 67230 + }, + { + "epoch": 0.96, + "grad_norm": 0.6328125, + "learning_rate": 7.68797956728029e-07, + "loss": 0.9923, + "step": 67235 + }, + { + "epoch": 0.96, + "grad_norm": 0.58984375, + "learning_rate": 7.65702554394676e-07, + "loss": 1.0853, + "step": 67240 + }, + { + "epoch": 0.96, + "grad_norm": 0.5078125, + "learning_rate": 7.62613372180121e-07, + "loss": 0.9365, + "step": 67245 + }, + { + "epoch": 0.96, + "grad_norm": 0.55859375, + "learning_rate": 7.595304102779754e-07, + "loss": 0.8599, + "step": 67250 + }, + { + "epoch": 0.96, + "grad_norm": 0.515625, + "learning_rate": 7.564536688814849e-07, + "loss": 1.0178, + "step": 67255 + }, + { + "epoch": 0.96, + "grad_norm": 0.56640625, + "learning_rate": 7.533831481835174e-07, + "loss": 0.9237, + "step": 67260 + }, + { + "epoch": 0.96, + "grad_norm": 0.439453125, + "learning_rate": 7.503188483765189e-07, + "loss": 1.0412, + "step": 67265 + }, + { + "epoch": 0.96, + "grad_norm": 0.490234375, + "learning_rate": 7.472607696525801e-07, + "loss": 0.848, + "step": 67270 + }, + { + "epoch": 0.97, + "grad_norm": 0.6484375, + "learning_rate": 7.442089122033702e-07, + "loss": 0.8946, + "step": 67275 + }, + { + "epoch": 0.97, + "grad_norm": 0.5234375, + "learning_rate": 7.411632762201804e-07, + "loss": 0.8829, + "step": 67280 + }, + { + "epoch": 0.97, + "grad_norm": 0.53125, + "learning_rate": 7.381238618939357e-07, + "loss": 1.0094, + "step": 67285 + }, + { + "epoch": 0.97, + "grad_norm": 0.609375, + "learning_rate": 7.350906694151283e-07, + "loss": 1.258, + "step": 67290 + }, + { + "epoch": 0.97, + "grad_norm": 0.5078125, + "learning_rate": 7.32063698973895e-07, + "loss": 0.8682, + "step": 67295 + }, + { + "epoch": 0.97, + "grad_norm": 0.57421875, + "learning_rate": 7.290429507599616e-07, + "loss": 0.9659, + "step": 67300 + }, + { + "epoch": 0.97, + "grad_norm": 0.640625, + "learning_rate": 7.26028424962677e-07, + "loss": 0.9875, + "step": 67305 + }, + { + "epoch": 0.97, + "grad_norm": 0.53125, + "learning_rate": 7.230201217710009e-07, + "loss": 0.8878, + "step": 67310 + }, + { + "epoch": 0.97, + "grad_norm": 0.5078125, + "learning_rate": 7.200180413734936e-07, + "loss": 0.9329, + "step": 67315 + }, + { + "epoch": 0.97, + "grad_norm": 0.56640625, + "learning_rate": 7.170221839583158e-07, + "loss": 0.8614, + "step": 67320 + }, + { + "epoch": 0.97, + "grad_norm": 0.5234375, + "learning_rate": 7.140325497132616e-07, + "loss": 0.9333, + "step": 67325 + }, + { + "epoch": 0.97, + "grad_norm": 0.546875, + "learning_rate": 7.110491388257367e-07, + "loss": 0.7535, + "step": 67330 + }, + { + "epoch": 0.97, + "grad_norm": 0.515625, + "learning_rate": 7.080719514827139e-07, + "loss": 1.0076, + "step": 67335 + }, + { + "epoch": 0.97, + "grad_norm": 0.6796875, + "learning_rate": 7.051009878708326e-07, + "loss": 0.916, + "step": 67340 + }, + { + "epoch": 0.97, + "grad_norm": 0.5390625, + "learning_rate": 7.021362481763106e-07, + "loss": 0.8847, + "step": 67345 + }, + { + "epoch": 0.97, + "grad_norm": 0.51953125, + "learning_rate": 6.991777325849769e-07, + "loss": 0.981, + "step": 67350 + }, + { + "epoch": 0.97, + "grad_norm": 0.59375, + "learning_rate": 6.962254412822833e-07, + "loss": 0.9348, + "step": 67355 + }, + { + "epoch": 0.97, + "grad_norm": 0.53125, + "learning_rate": 6.932793744532707e-07, + "loss": 0.9446, + "step": 67360 + }, + { + "epoch": 0.97, + "grad_norm": 0.55078125, + "learning_rate": 6.903395322826134e-07, + "loss": 0.9172, + "step": 67365 + }, + { + "epoch": 0.97, + "grad_norm": 0.5, + "learning_rate": 6.874059149545753e-07, + "loss": 0.7879, + "step": 67370 + }, + { + "epoch": 0.97, + "grad_norm": 0.6171875, + "learning_rate": 6.844785226530426e-07, + "loss": 0.9321, + "step": 67375 + }, + { + "epoch": 0.97, + "grad_norm": 0.53125, + "learning_rate": 6.815573555615018e-07, + "loss": 0.8774, + "step": 67380 + }, + { + "epoch": 0.97, + "grad_norm": 0.53515625, + "learning_rate": 6.78642413863062e-07, + "loss": 0.8636, + "step": 67385 + }, + { + "epoch": 0.97, + "grad_norm": 0.65234375, + "learning_rate": 6.757336977404327e-07, + "loss": 0.9958, + "step": 67390 + }, + { + "epoch": 0.97, + "grad_norm": 0.447265625, + "learning_rate": 6.728312073759458e-07, + "loss": 0.9543, + "step": 67395 + }, + { + "epoch": 0.97, + "grad_norm": 0.51953125, + "learning_rate": 6.699349429515111e-07, + "loss": 0.8073, + "step": 67400 + }, + { + "epoch": 0.97, + "grad_norm": 0.45703125, + "learning_rate": 6.670449046486837e-07, + "loss": 0.7714, + "step": 67405 + }, + { + "epoch": 0.97, + "grad_norm": 0.5625, + "learning_rate": 6.641610926486186e-07, + "loss": 1.0004, + "step": 67410 + }, + { + "epoch": 0.97, + "grad_norm": 0.5390625, + "learning_rate": 6.612835071320711e-07, + "loss": 0.9713, + "step": 67415 + }, + { + "epoch": 0.97, + "grad_norm": 0.56640625, + "learning_rate": 6.584121482794082e-07, + "loss": 1.0336, + "step": 67420 + }, + { + "epoch": 0.97, + "grad_norm": 0.578125, + "learning_rate": 6.55547016270619e-07, + "loss": 0.9656, + "step": 67425 + }, + { + "epoch": 0.97, + "grad_norm": 0.55078125, + "learning_rate": 6.526881112852711e-07, + "loss": 0.8656, + "step": 67430 + }, + { + "epoch": 0.97, + "grad_norm": 0.5390625, + "learning_rate": 6.498354335025881e-07, + "loss": 0.9085, + "step": 67435 + }, + { + "epoch": 0.97, + "grad_norm": 0.59765625, + "learning_rate": 6.469889831013709e-07, + "loss": 1.0413, + "step": 67440 + }, + { + "epoch": 0.97, + "grad_norm": 0.4921875, + "learning_rate": 6.441487602600327e-07, + "loss": 0.8726, + "step": 67445 + }, + { + "epoch": 0.97, + "grad_norm": 0.5234375, + "learning_rate": 6.413147651566088e-07, + "loss": 1.0768, + "step": 67450 + }, + { + "epoch": 0.97, + "grad_norm": 0.58984375, + "learning_rate": 6.384869979687347e-07, + "loss": 0.9141, + "step": 67455 + }, + { + "epoch": 0.97, + "grad_norm": 0.54296875, + "learning_rate": 6.356654588736688e-07, + "loss": 0.9616, + "step": 67460 + }, + { + "epoch": 0.97, + "grad_norm": 0.578125, + "learning_rate": 6.328501480482474e-07, + "loss": 1.0049, + "step": 67465 + }, + { + "epoch": 0.97, + "grad_norm": 0.59765625, + "learning_rate": 6.300410656689515e-07, + "loss": 0.9068, + "step": 67470 + }, + { + "epoch": 0.97, + "grad_norm": 0.66796875, + "learning_rate": 6.272382119118625e-07, + "loss": 1.0419, + "step": 67475 + }, + { + "epoch": 0.97, + "grad_norm": 0.5390625, + "learning_rate": 6.244415869526398e-07, + "loss": 0.9802, + "step": 67480 + }, + { + "epoch": 0.97, + "grad_norm": 0.640625, + "learning_rate": 6.216511909666101e-07, + "loss": 1.0195, + "step": 67485 + }, + { + "epoch": 0.97, + "grad_norm": 0.6015625, + "learning_rate": 6.188670241286665e-07, + "loss": 1.1275, + "step": 67490 + }, + { + "epoch": 0.97, + "grad_norm": 0.60546875, + "learning_rate": 6.16089086613325e-07, + "loss": 0.9283, + "step": 67495 + }, + { + "epoch": 0.97, + "grad_norm": 0.5546875, + "learning_rate": 6.133173785947022e-07, + "loss": 1.1244, + "step": 67500 + }, + { + "epoch": 0.97, + "grad_norm": 0.6328125, + "learning_rate": 6.105519002465365e-07, + "loss": 0.9914, + "step": 67505 + }, + { + "epoch": 0.97, + "grad_norm": 0.462890625, + "learning_rate": 6.077926517421784e-07, + "loss": 0.7442, + "step": 67510 + }, + { + "epoch": 0.97, + "grad_norm": 0.57421875, + "learning_rate": 6.050396332545782e-07, + "loss": 0.9948, + "step": 67515 + }, + { + "epoch": 0.97, + "grad_norm": 0.63671875, + "learning_rate": 6.02292844956287e-07, + "loss": 0.9302, + "step": 67520 + }, + { + "epoch": 0.97, + "grad_norm": 0.4921875, + "learning_rate": 5.995522870194891e-07, + "loss": 1.0059, + "step": 67525 + }, + { + "epoch": 0.97, + "grad_norm": 0.5390625, + "learning_rate": 5.968179596159584e-07, + "loss": 0.9251, + "step": 67530 + }, + { + "epoch": 0.97, + "grad_norm": 0.56640625, + "learning_rate": 5.940898629171021e-07, + "loss": 0.9118, + "step": 67535 + }, + { + "epoch": 0.97, + "grad_norm": 0.546875, + "learning_rate": 5.913679970938946e-07, + "loss": 1.0089, + "step": 67540 + }, + { + "epoch": 0.97, + "grad_norm": 0.5625, + "learning_rate": 5.886523623169548e-07, + "loss": 1.1221, + "step": 67545 + }, + { + "epoch": 0.97, + "grad_norm": 0.546875, + "learning_rate": 5.859429587565136e-07, + "loss": 0.9205, + "step": 67550 + }, + { + "epoch": 0.97, + "grad_norm": 0.515625, + "learning_rate": 5.832397865823791e-07, + "loss": 0.9345, + "step": 67555 + }, + { + "epoch": 0.97, + "grad_norm": 0.53125, + "learning_rate": 5.805428459640161e-07, + "loss": 0.9954, + "step": 67560 + }, + { + "epoch": 0.97, + "grad_norm": 0.55859375, + "learning_rate": 5.778521370704448e-07, + "loss": 0.9087, + "step": 67565 + }, + { + "epoch": 0.97, + "grad_norm": 0.5625, + "learning_rate": 5.751676600703415e-07, + "loss": 0.8738, + "step": 67570 + }, + { + "epoch": 0.97, + "grad_norm": 0.609375, + "learning_rate": 5.724894151319604e-07, + "loss": 0.8994, + "step": 67575 + }, + { + "epoch": 0.97, + "grad_norm": 0.6484375, + "learning_rate": 5.698174024231895e-07, + "loss": 0.9824, + "step": 67580 + }, + { + "epoch": 0.97, + "grad_norm": 0.625, + "learning_rate": 5.671516221114947e-07, + "loss": 0.8964, + "step": 67585 + }, + { + "epoch": 0.97, + "grad_norm": 0.57421875, + "learning_rate": 5.64492074363987e-07, + "loss": 0.9816, + "step": 67590 + }, + { + "epoch": 0.97, + "grad_norm": 0.66015625, + "learning_rate": 5.61838759347355e-07, + "loss": 0.9253, + "step": 67595 + }, + { + "epoch": 0.97, + "grad_norm": 0.5078125, + "learning_rate": 5.591916772279326e-07, + "loss": 0.9326, + "step": 67600 + }, + { + "epoch": 0.97, + "grad_norm": 0.6328125, + "learning_rate": 5.565508281716203e-07, + "loss": 0.9426, + "step": 67605 + }, + { + "epoch": 0.97, + "grad_norm": 0.6171875, + "learning_rate": 5.539162123439634e-07, + "loss": 1.0343, + "step": 67610 + }, + { + "epoch": 0.97, + "grad_norm": 0.59375, + "learning_rate": 5.512878299100966e-07, + "loss": 0.9433, + "step": 67615 + }, + { + "epoch": 0.97, + "grad_norm": 0.5703125, + "learning_rate": 5.486656810347657e-07, + "loss": 0.9195, + "step": 67620 + }, + { + "epoch": 0.97, + "grad_norm": 0.54296875, + "learning_rate": 5.460497658823393e-07, + "loss": 1.1784, + "step": 67625 + }, + { + "epoch": 0.97, + "grad_norm": 0.515625, + "learning_rate": 5.434400846167864e-07, + "loss": 0.9351, + "step": 67630 + }, + { + "epoch": 0.97, + "grad_norm": 0.60546875, + "learning_rate": 5.40836637401676e-07, + "loss": 0.9793, + "step": 67635 + }, + { + "epoch": 0.97, + "grad_norm": 0.578125, + "learning_rate": 5.382394244001998e-07, + "loss": 0.9489, + "step": 67640 + }, + { + "epoch": 0.97, + "grad_norm": 0.66796875, + "learning_rate": 5.356484457751609e-07, + "loss": 0.9235, + "step": 67645 + }, + { + "epoch": 0.97, + "grad_norm": 0.6796875, + "learning_rate": 5.330637016889517e-07, + "loss": 0.9436, + "step": 67650 + }, + { + "epoch": 0.97, + "grad_norm": 0.55078125, + "learning_rate": 5.30485192303587e-07, + "loss": 1.1746, + "step": 67655 + }, + { + "epoch": 0.97, + "grad_norm": 0.51171875, + "learning_rate": 5.27912917780704e-07, + "loss": 0.9042, + "step": 67660 + }, + { + "epoch": 0.97, + "grad_norm": 0.62890625, + "learning_rate": 5.253468782815296e-07, + "loss": 0.9448, + "step": 67665 + }, + { + "epoch": 0.97, + "grad_norm": 0.515625, + "learning_rate": 5.227870739669017e-07, + "loss": 0.8279, + "step": 67670 + }, + { + "epoch": 0.97, + "grad_norm": 0.51171875, + "learning_rate": 5.202335049972806e-07, + "loss": 0.969, + "step": 67675 + }, + { + "epoch": 0.97, + "grad_norm": 0.57421875, + "learning_rate": 5.176861715327163e-07, + "loss": 0.8491, + "step": 67680 + }, + { + "epoch": 0.97, + "grad_norm": 0.58984375, + "learning_rate": 5.151450737328811e-07, + "loss": 0.9091, + "step": 67685 + }, + { + "epoch": 0.97, + "grad_norm": 0.578125, + "learning_rate": 5.126102117570586e-07, + "loss": 0.8733, + "step": 67690 + }, + { + "epoch": 0.97, + "grad_norm": 0.54296875, + "learning_rate": 5.100815857641439e-07, + "loss": 1.0445, + "step": 67695 + }, + { + "epoch": 0.97, + "grad_norm": 0.57421875, + "learning_rate": 5.075591959126103e-07, + "loss": 0.9023, + "step": 67700 + }, + { + "epoch": 0.97, + "grad_norm": 0.54296875, + "learning_rate": 5.050430423605868e-07, + "loss": 0.7164, + "step": 67705 + }, + { + "epoch": 0.97, + "grad_norm": 0.57421875, + "learning_rate": 5.025331252657806e-07, + "loss": 0.7707, + "step": 67710 + }, + { + "epoch": 0.97, + "grad_norm": 0.5390625, + "learning_rate": 5.000294447855103e-07, + "loss": 1.0261, + "step": 67715 + }, + { + "epoch": 0.97, + "grad_norm": 0.53125, + "learning_rate": 4.975320010767171e-07, + "loss": 0.8371, + "step": 67720 + }, + { + "epoch": 0.97, + "grad_norm": 0.54296875, + "learning_rate": 4.950407942959534e-07, + "loss": 0.9907, + "step": 67725 + }, + { + "epoch": 0.97, + "grad_norm": 0.5078125, + "learning_rate": 4.9255582459935e-07, + "loss": 0.9361, + "step": 67730 + }, + { + "epoch": 0.97, + "grad_norm": 0.5390625, + "learning_rate": 4.900770921426712e-07, + "loss": 0.874, + "step": 67735 + }, + { + "epoch": 0.97, + "grad_norm": 0.49609375, + "learning_rate": 4.876045970813037e-07, + "loss": 0.9589, + "step": 67740 + }, + { + "epoch": 0.97, + "grad_norm": 0.61328125, + "learning_rate": 4.851383395702125e-07, + "loss": 1.0567, + "step": 67745 + }, + { + "epoch": 0.97, + "grad_norm": 0.46484375, + "learning_rate": 4.82678319763985e-07, + "loss": 0.8231, + "step": 67750 + }, + { + "epoch": 0.97, + "grad_norm": 0.5390625, + "learning_rate": 4.802245378168202e-07, + "loss": 1.0487, + "step": 67755 + }, + { + "epoch": 0.97, + "grad_norm": 0.55078125, + "learning_rate": 4.777769938825283e-07, + "loss": 0.9396, + "step": 67760 + }, + { + "epoch": 0.97, + "grad_norm": 0.58984375, + "learning_rate": 4.753356881145199e-07, + "loss": 0.9664, + "step": 67765 + }, + { + "epoch": 0.97, + "grad_norm": 0.52734375, + "learning_rate": 4.729006206658171e-07, + "loss": 1.0031, + "step": 67770 + }, + { + "epoch": 0.97, + "grad_norm": 0.53125, + "learning_rate": 4.704717916890533e-07, + "loss": 0.8784, + "step": 67775 + }, + { + "epoch": 0.97, + "grad_norm": 0.56640625, + "learning_rate": 4.6804920133647343e-07, + "loss": 0.8679, + "step": 67780 + }, + { + "epoch": 0.97, + "grad_norm": 0.67578125, + "learning_rate": 4.656328497599338e-07, + "loss": 1.0123, + "step": 67785 + }, + { + "epoch": 0.97, + "grad_norm": 0.466796875, + "learning_rate": 4.6322273711089103e-07, + "loss": 0.8113, + "step": 67790 + }, + { + "epoch": 0.97, + "grad_norm": 0.5078125, + "learning_rate": 4.6081886354040207e-07, + "loss": 0.9404, + "step": 67795 + }, + { + "epoch": 0.97, + "grad_norm": 0.54296875, + "learning_rate": 4.584212291991463e-07, + "loss": 0.9276, + "step": 67800 + }, + { + "epoch": 0.97, + "grad_norm": 0.5390625, + "learning_rate": 4.5602983423742584e-07, + "loss": 0.999, + "step": 67805 + }, + { + "epoch": 0.97, + "grad_norm": 0.55078125, + "learning_rate": 4.5364467880512076e-07, + "loss": 0.9367, + "step": 67810 + }, + { + "epoch": 0.97, + "grad_norm": 0.62890625, + "learning_rate": 4.5126576305174474e-07, + "loss": 1.0302, + "step": 67815 + }, + { + "epoch": 0.97, + "grad_norm": 0.57421875, + "learning_rate": 4.488930871264008e-07, + "loss": 0.9275, + "step": 67820 + }, + { + "epoch": 0.97, + "grad_norm": 0.53125, + "learning_rate": 4.4652665117782545e-07, + "loss": 1.0678, + "step": 67825 + }, + { + "epoch": 0.97, + "grad_norm": 0.55078125, + "learning_rate": 4.441664553543334e-07, + "loss": 0.9375, + "step": 67830 + }, + { + "epoch": 0.97, + "grad_norm": 0.5859375, + "learning_rate": 4.4181249980388416e-07, + "loss": 0.9544, + "step": 67835 + }, + { + "epoch": 0.97, + "grad_norm": 0.62890625, + "learning_rate": 4.394647846740041e-07, + "loss": 0.8992, + "step": 67840 + }, + { + "epoch": 0.97, + "grad_norm": 0.58203125, + "learning_rate": 4.3712331011186433e-07, + "loss": 1.0277, + "step": 67845 + }, + { + "epoch": 0.97, + "grad_norm": 0.58203125, + "learning_rate": 4.3478807626422536e-07, + "loss": 1.0133, + "step": 67850 + }, + { + "epoch": 0.97, + "grad_norm": 0.55859375, + "learning_rate": 4.3245908327747e-07, + "loss": 1.0299, + "step": 67855 + }, + { + "epoch": 0.97, + "grad_norm": 0.6640625, + "learning_rate": 4.3013633129758146e-07, + "loss": 0.9226, + "step": 67860 + }, + { + "epoch": 0.97, + "grad_norm": 0.6953125, + "learning_rate": 4.2781982047014337e-07, + "loss": 0.8341, + "step": 67865 + }, + { + "epoch": 0.97, + "grad_norm": 0.5703125, + "learning_rate": 4.255095509403617e-07, + "loss": 0.9564, + "step": 67870 + }, + { + "epoch": 0.97, + "grad_norm": 0.546875, + "learning_rate": 4.2320552285304296e-07, + "loss": 0.8604, + "step": 67875 + }, + { + "epoch": 0.97, + "grad_norm": 0.5546875, + "learning_rate": 4.209077363526159e-07, + "loss": 0.9471, + "step": 67880 + }, + { + "epoch": 0.97, + "grad_norm": 0.53515625, + "learning_rate": 4.1861619158309883e-07, + "loss": 0.9266, + "step": 67885 + }, + { + "epoch": 0.97, + "grad_norm": 0.5546875, + "learning_rate": 4.163308886881323e-07, + "loss": 0.8708, + "step": 67890 + }, + { + "epoch": 0.97, + "grad_norm": 0.51953125, + "learning_rate": 4.140518278109684e-07, + "loss": 0.9368, + "step": 67895 + }, + { + "epoch": 0.97, + "grad_norm": 0.5, + "learning_rate": 4.1177900909445953e-07, + "loss": 0.9397, + "step": 67900 + }, + { + "epoch": 0.97, + "grad_norm": 0.470703125, + "learning_rate": 4.095124326810473e-07, + "loss": 0.9737, + "step": 67905 + }, + { + "epoch": 0.97, + "grad_norm": 0.5234375, + "learning_rate": 4.072520987128292e-07, + "loss": 0.8897, + "step": 67910 + }, + { + "epoch": 0.97, + "grad_norm": 0.5703125, + "learning_rate": 4.0499800733148074e-07, + "loss": 0.895, + "step": 67915 + }, + { + "epoch": 0.97, + "grad_norm": 0.5546875, + "learning_rate": 4.027501586782778e-07, + "loss": 1.0609, + "step": 67920 + }, + { + "epoch": 0.97, + "grad_norm": 0.52734375, + "learning_rate": 4.005085528941299e-07, + "loss": 0.8732, + "step": 67925 + }, + { + "epoch": 0.97, + "grad_norm": 0.60546875, + "learning_rate": 3.982731901195358e-07, + "loss": 0.9002, + "step": 67930 + }, + { + "epoch": 0.97, + "grad_norm": 0.6328125, + "learning_rate": 3.9604407049461667e-07, + "loss": 0.9728, + "step": 67935 + }, + { + "epoch": 0.97, + "grad_norm": 0.5078125, + "learning_rate": 3.9382119415909413e-07, + "loss": 0.8315, + "step": 67940 + }, + { + "epoch": 0.97, + "grad_norm": 0.578125, + "learning_rate": 3.916045612523123e-07, + "loss": 0.9386, + "step": 67945 + }, + { + "epoch": 0.97, + "grad_norm": 0.625, + "learning_rate": 3.8939417191319327e-07, + "loss": 1.23, + "step": 67950 + }, + { + "epoch": 0.97, + "grad_norm": 0.578125, + "learning_rate": 3.87190026280293e-07, + "loss": 0.8856, + "step": 67955 + }, + { + "epoch": 0.97, + "grad_norm": 0.5625, + "learning_rate": 3.8499212449176757e-07, + "loss": 0.987, + "step": 67960 + }, + { + "epoch": 0.97, + "grad_norm": 0.5859375, + "learning_rate": 3.828004666853957e-07, + "loss": 0.8913, + "step": 67965 + }, + { + "epoch": 0.97, + "grad_norm": 0.55078125, + "learning_rate": 3.8061505299854525e-07, + "loss": 0.9654, + "step": 67970 + }, + { + "epoch": 0.98, + "grad_norm": 0.6171875, + "learning_rate": 3.784358835681956e-07, + "loss": 1.0292, + "step": 67975 + }, + { + "epoch": 0.98, + "grad_norm": 0.6328125, + "learning_rate": 3.762629585309374e-07, + "loss": 0.9346, + "step": 67980 + }, + { + "epoch": 0.98, + "grad_norm": 0.56640625, + "learning_rate": 3.740962780229951e-07, + "loss": 0.9542, + "step": 67985 + }, + { + "epoch": 0.98, + "grad_norm": 0.50390625, + "learning_rate": 3.7193584218014886e-07, + "loss": 0.9173, + "step": 67990 + }, + { + "epoch": 0.98, + "grad_norm": 0.58203125, + "learning_rate": 3.697816511378349e-07, + "loss": 0.949, + "step": 67995 + }, + { + "epoch": 0.98, + "grad_norm": 0.5390625, + "learning_rate": 3.6763370503107855e-07, + "loss": 0.8603, + "step": 68000 + }, + { + "epoch": 0.98, + "grad_norm": 0.5625, + "learning_rate": 3.6549200399451646e-07, + "loss": 0.9199, + "step": 68005 + }, + { + "epoch": 0.98, + "grad_norm": 0.6484375, + "learning_rate": 3.633565481623857e-07, + "loss": 1.018, + "step": 68010 + }, + { + "epoch": 0.98, + "grad_norm": 0.5078125, + "learning_rate": 3.61227337668546e-07, + "loss": 0.953, + "step": 68015 + }, + { + "epoch": 0.98, + "grad_norm": 0.53125, + "learning_rate": 3.59104372646446e-07, + "loss": 0.9271, + "step": 68020 + }, + { + "epoch": 0.98, + "grad_norm": 0.55078125, + "learning_rate": 3.5698765322917935e-07, + "loss": 1.0202, + "step": 68025 + }, + { + "epoch": 0.98, + "grad_norm": 0.609375, + "learning_rate": 3.5487717954939547e-07, + "loss": 0.9461, + "step": 68030 + }, + { + "epoch": 0.98, + "grad_norm": 0.5234375, + "learning_rate": 3.527729517394107e-07, + "loss": 0.9437, + "step": 68035 + }, + { + "epoch": 0.98, + "grad_norm": 0.64453125, + "learning_rate": 3.506749699310974e-07, + "loss": 0.9535, + "step": 68040 + }, + { + "epoch": 0.98, + "grad_norm": 0.57421875, + "learning_rate": 3.485832342559725e-07, + "loss": 0.8304, + "step": 68045 + }, + { + "epoch": 0.98, + "grad_norm": 0.65625, + "learning_rate": 3.464977448451423e-07, + "loss": 1.1197, + "step": 68050 + }, + { + "epoch": 0.98, + "grad_norm": 0.609375, + "learning_rate": 3.444185018293244e-07, + "loss": 1.1238, + "step": 68055 + }, + { + "epoch": 0.98, + "grad_norm": 0.54296875, + "learning_rate": 3.423455053388591e-07, + "loss": 1.0381, + "step": 68060 + }, + { + "epoch": 0.98, + "grad_norm": 0.578125, + "learning_rate": 3.402787555036757e-07, + "loss": 1.038, + "step": 68065 + }, + { + "epoch": 0.98, + "grad_norm": 0.58203125, + "learning_rate": 3.382182524533262e-07, + "loss": 0.9861, + "step": 68070 + }, + { + "epoch": 0.98, + "grad_norm": 0.609375, + "learning_rate": 3.361639963169627e-07, + "loss": 1.0142, + "step": 68075 + }, + { + "epoch": 0.98, + "grad_norm": 0.5546875, + "learning_rate": 3.341159872233379e-07, + "loss": 0.8862, + "step": 68080 + }, + { + "epoch": 0.98, + "grad_norm": 0.5625, + "learning_rate": 3.32074225300838e-07, + "loss": 0.9435, + "step": 68085 + }, + { + "epoch": 0.98, + "grad_norm": 0.55859375, + "learning_rate": 3.300387106774383e-07, + "loss": 0.9784, + "step": 68090 + }, + { + "epoch": 0.98, + "grad_norm": 0.53515625, + "learning_rate": 3.280094434807257e-07, + "loss": 0.9197, + "step": 68095 + }, + { + "epoch": 0.98, + "grad_norm": 0.546875, + "learning_rate": 3.2598642383789846e-07, + "loss": 0.9625, + "step": 68100 + }, + { + "epoch": 0.98, + "grad_norm": 0.53125, + "learning_rate": 3.239696518757662e-07, + "loss": 0.9105, + "step": 68105 + }, + { + "epoch": 0.98, + "grad_norm": 0.59375, + "learning_rate": 3.21959127720739e-07, + "loss": 0.8981, + "step": 68110 + }, + { + "epoch": 0.98, + "grad_norm": 0.7421875, + "learning_rate": 3.199548514988271e-07, + "loss": 0.9995, + "step": 68115 + }, + { + "epoch": 0.98, + "grad_norm": 0.69140625, + "learning_rate": 3.1795682333567443e-07, + "loss": 1.0054, + "step": 68120 + }, + { + "epoch": 0.98, + "grad_norm": 0.51953125, + "learning_rate": 3.1596504335652533e-07, + "loss": 0.9117, + "step": 68125 + }, + { + "epoch": 0.98, + "grad_norm": 0.5, + "learning_rate": 3.1397951168620207e-07, + "loss": 0.8921, + "step": 68130 + }, + { + "epoch": 0.98, + "grad_norm": 0.53125, + "learning_rate": 3.120002284491941e-07, + "loss": 0.9583, + "step": 68135 + }, + { + "epoch": 0.98, + "grad_norm": 0.490234375, + "learning_rate": 3.100271937695354e-07, + "loss": 1.0138, + "step": 68140 + }, + { + "epoch": 0.98, + "grad_norm": 0.490234375, + "learning_rate": 3.080604077709048e-07, + "loss": 0.8712, + "step": 68145 + }, + { + "epoch": 0.98, + "grad_norm": 0.57421875, + "learning_rate": 3.0609987057660383e-07, + "loss": 1.0037, + "step": 68150 + }, + { + "epoch": 0.98, + "grad_norm": 0.640625, + "learning_rate": 3.0414558230948963e-07, + "loss": 0.9395, + "step": 68155 + }, + { + "epoch": 0.98, + "grad_norm": 0.55078125, + "learning_rate": 3.021975430920865e-07, + "loss": 0.9111, + "step": 68160 + }, + { + "epoch": 0.98, + "grad_norm": 0.53515625, + "learning_rate": 3.002557530464745e-07, + "loss": 0.8204, + "step": 68165 + }, + { + "epoch": 0.98, + "grad_norm": 0.5625, + "learning_rate": 2.9832021229438955e-07, + "loss": 0.9531, + "step": 68170 + }, + { + "epoch": 0.98, + "grad_norm": 0.62109375, + "learning_rate": 2.963909209571458e-07, + "loss": 0.8763, + "step": 68175 + }, + { + "epoch": 0.98, + "grad_norm": 0.59375, + "learning_rate": 2.9446787915565765e-07, + "loss": 0.9364, + "step": 68180 + }, + { + "epoch": 0.98, + "grad_norm": 0.55859375, + "learning_rate": 2.9255108701049525e-07, + "loss": 1.0371, + "step": 68185 + }, + { + "epoch": 0.98, + "grad_norm": 0.55859375, + "learning_rate": 2.906405446417848e-07, + "loss": 0.919, + "step": 68190 + }, + { + "epoch": 0.98, + "grad_norm": 0.61328125, + "learning_rate": 2.887362521692749e-07, + "loss": 0.8907, + "step": 68195 + }, + { + "epoch": 0.98, + "grad_norm": 0.5390625, + "learning_rate": 2.868382097123479e-07, + "loss": 0.9184, + "step": 68200 + }, + { + "epoch": 0.98, + "grad_norm": 0.6171875, + "learning_rate": 2.8494641738996406e-07, + "loss": 0.9982, + "step": 68205 + }, + { + "epoch": 0.98, + "grad_norm": 0.546875, + "learning_rate": 2.8306087532069535e-07, + "loss": 1.0517, + "step": 68210 + }, + { + "epoch": 0.98, + "grad_norm": 0.65234375, + "learning_rate": 2.8118158362275827e-07, + "loss": 0.9018, + "step": 68215 + }, + { + "epoch": 0.98, + "grad_norm": 0.65625, + "learning_rate": 2.793085424139141e-07, + "loss": 1.1069, + "step": 68220 + }, + { + "epoch": 0.98, + "grad_norm": 0.64453125, + "learning_rate": 2.7744175181158015e-07, + "loss": 0.936, + "step": 68225 + }, + { + "epoch": 0.98, + "grad_norm": 0.60546875, + "learning_rate": 2.7558121193278496e-07, + "loss": 0.9315, + "step": 68230 + }, + { + "epoch": 0.98, + "grad_norm": 0.546875, + "learning_rate": 2.737269228941242e-07, + "loss": 1.0781, + "step": 68235 + }, + { + "epoch": 0.98, + "grad_norm": 0.53515625, + "learning_rate": 2.718788848118381e-07, + "loss": 0.9661, + "step": 68240 + }, + { + "epoch": 0.98, + "grad_norm": 0.56640625, + "learning_rate": 2.700370978017785e-07, + "loss": 0.9609, + "step": 68245 + }, + { + "epoch": 0.98, + "grad_norm": 0.67578125, + "learning_rate": 2.682015619793643e-07, + "loss": 1.0629, + "step": 68250 + }, + { + "epoch": 0.98, + "grad_norm": 0.546875, + "learning_rate": 2.6637227745965886e-07, + "loss": 0.9062, + "step": 68255 + }, + { + "epoch": 0.98, + "grad_norm": 0.6015625, + "learning_rate": 2.645492443573372e-07, + "loss": 1.0408, + "step": 68260 + }, + { + "epoch": 0.98, + "grad_norm": 0.515625, + "learning_rate": 2.627324627866523e-07, + "loss": 0.9313, + "step": 68265 + }, + { + "epoch": 0.98, + "grad_norm": 0.55859375, + "learning_rate": 2.609219328614909e-07, + "loss": 0.9128, + "step": 68270 + }, + { + "epoch": 0.98, + "grad_norm": 0.6484375, + "learning_rate": 2.591176546953289e-07, + "loss": 1.0647, + "step": 68275 + }, + { + "epoch": 0.98, + "grad_norm": 0.52734375, + "learning_rate": 2.573196284012758e-07, + "loss": 0.9276, + "step": 68280 + }, + { + "epoch": 0.98, + "grad_norm": 0.6015625, + "learning_rate": 2.555278540920192e-07, + "loss": 1.0192, + "step": 68285 + }, + { + "epoch": 0.98, + "grad_norm": 0.578125, + "learning_rate": 2.537423318798804e-07, + "loss": 0.8791, + "step": 68290 + }, + { + "epoch": 0.98, + "grad_norm": 0.5, + "learning_rate": 2.51963061876781e-07, + "loss": 1.1994, + "step": 68295 + }, + { + "epoch": 0.98, + "grad_norm": 0.52734375, + "learning_rate": 2.501900441942207e-07, + "loss": 0.9692, + "step": 68300 + }, + { + "epoch": 0.98, + "grad_norm": 0.6015625, + "learning_rate": 2.4842327894336603e-07, + "loss": 0.9326, + "step": 68305 + }, + { + "epoch": 0.98, + "grad_norm": 0.51953125, + "learning_rate": 2.466627662349508e-07, + "loss": 1.069, + "step": 68310 + }, + { + "epoch": 0.98, + "grad_norm": 0.59765625, + "learning_rate": 2.449085061793199e-07, + "loss": 0.9998, + "step": 68315 + }, + { + "epoch": 0.98, + "grad_norm": 0.61328125, + "learning_rate": 2.4316049888643e-07, + "loss": 0.9248, + "step": 68320 + }, + { + "epoch": 0.98, + "grad_norm": 0.73828125, + "learning_rate": 2.4141874446585997e-07, + "loss": 0.8767, + "step": 68325 + }, + { + "epoch": 0.98, + "grad_norm": 0.6171875, + "learning_rate": 2.3968324302677815e-07, + "loss": 0.8412, + "step": 68330 + }, + { + "epoch": 0.98, + "grad_norm": 0.5625, + "learning_rate": 2.3795399467796408e-07, + "loss": 1.0728, + "step": 68335 + }, + { + "epoch": 0.98, + "grad_norm": 0.55859375, + "learning_rate": 2.3623099952782002e-07, + "loss": 0.9397, + "step": 68340 + }, + { + "epoch": 0.98, + "grad_norm": 0.53515625, + "learning_rate": 2.3451425768432621e-07, + "loss": 0.8171, + "step": 68345 + }, + { + "epoch": 0.98, + "grad_norm": 0.58203125, + "learning_rate": 2.3280376925511881e-07, + "loss": 1.077, + "step": 68350 + }, + { + "epoch": 0.98, + "grad_norm": 0.474609375, + "learning_rate": 2.3109953434737875e-07, + "loss": 0.8722, + "step": 68355 + }, + { + "epoch": 0.98, + "grad_norm": 0.53515625, + "learning_rate": 2.2940155306796497e-07, + "loss": 0.8994, + "step": 68360 + }, + { + "epoch": 0.98, + "grad_norm": 0.578125, + "learning_rate": 2.2770982552328125e-07, + "loss": 1.0112, + "step": 68365 + }, + { + "epoch": 0.98, + "grad_norm": 0.60546875, + "learning_rate": 2.260243518193761e-07, + "loss": 0.9184, + "step": 68370 + }, + { + "epoch": 0.98, + "grad_norm": 0.60546875, + "learning_rate": 2.2434513206189833e-07, + "loss": 0.9368, + "step": 68375 + }, + { + "epoch": 0.98, + "grad_norm": 0.68359375, + "learning_rate": 2.2267216635609711e-07, + "loss": 1.0861, + "step": 68380 + }, + { + "epoch": 0.98, + "grad_norm": 0.625, + "learning_rate": 2.210054548068552e-07, + "loss": 0.9625, + "step": 68385 + }, + { + "epoch": 0.98, + "grad_norm": 0.609375, + "learning_rate": 2.1934499751862236e-07, + "loss": 0.8768, + "step": 68390 + }, + { + "epoch": 0.98, + "grad_norm": 0.5859375, + "learning_rate": 2.1769079459548204e-07, + "loss": 0.9296, + "step": 68395 + }, + { + "epoch": 0.98, + "grad_norm": 0.482421875, + "learning_rate": 2.1604284614112902e-07, + "loss": 0.856, + "step": 68400 + }, + { + "epoch": 0.98, + "grad_norm": 0.5078125, + "learning_rate": 2.144011522588585e-07, + "loss": 0.8969, + "step": 68405 + }, + { + "epoch": 0.98, + "grad_norm": 0.546875, + "learning_rate": 2.127657130515548e-07, + "loss": 0.8386, + "step": 68410 + }, + { + "epoch": 0.98, + "grad_norm": 0.6796875, + "learning_rate": 2.1113652862175816e-07, + "loss": 0.8712, + "step": 68415 + }, + { + "epoch": 0.98, + "grad_norm": 0.58984375, + "learning_rate": 2.0951359907157575e-07, + "loss": 0.8142, + "step": 68420 + }, + { + "epoch": 0.98, + "grad_norm": 0.58203125, + "learning_rate": 2.0789692450272624e-07, + "loss": 0.9764, + "step": 68425 + }, + { + "epoch": 0.98, + "grad_norm": 0.546875, + "learning_rate": 2.062865050165508e-07, + "loss": 0.8942, + "step": 68430 + }, + { + "epoch": 0.98, + "grad_norm": 0.58984375, + "learning_rate": 2.0468234071400194e-07, + "loss": 0.95, + "step": 68435 + }, + { + "epoch": 0.98, + "grad_norm": 0.59765625, + "learning_rate": 2.0308443169561042e-07, + "loss": 1.1128, + "step": 68440 + }, + { + "epoch": 0.98, + "grad_norm": 0.54296875, + "learning_rate": 2.0149277806155164e-07, + "loss": 0.9583, + "step": 68445 + }, + { + "epoch": 0.98, + "grad_norm": 0.48828125, + "learning_rate": 1.9990737991159024e-07, + "loss": 1.0166, + "step": 68450 + }, + { + "epoch": 0.98, + "grad_norm": 0.51953125, + "learning_rate": 1.983282373450801e-07, + "loss": 0.8655, + "step": 68455 + }, + { + "epoch": 0.98, + "grad_norm": 0.4765625, + "learning_rate": 1.9675535046104198e-07, + "loss": 0.9587, + "step": 68460 + }, + { + "epoch": 0.98, + "grad_norm": 0.4609375, + "learning_rate": 1.951887193580304e-07, + "loss": 0.9855, + "step": 68465 + }, + { + "epoch": 0.98, + "grad_norm": 0.515625, + "learning_rate": 1.936283441342668e-07, + "loss": 0.8688, + "step": 68470 + }, + { + "epoch": 0.98, + "grad_norm": 0.5859375, + "learning_rate": 1.9207422488753957e-07, + "loss": 1.0159, + "step": 68475 + }, + { + "epoch": 0.98, + "grad_norm": 0.57421875, + "learning_rate": 1.9052636171528193e-07, + "loss": 1.0702, + "step": 68480 + }, + { + "epoch": 0.98, + "grad_norm": 0.546875, + "learning_rate": 1.8898475471449405e-07, + "loss": 1.0368, + "step": 68485 + }, + { + "epoch": 0.98, + "grad_norm": 0.57421875, + "learning_rate": 1.8744940398182088e-07, + "loss": 1.0001, + "step": 68490 + }, + { + "epoch": 0.98, + "grad_norm": 0.58203125, + "learning_rate": 1.8592030961349648e-07, + "loss": 1.0321, + "step": 68495 + }, + { + "epoch": 0.98, + "grad_norm": 0.53515625, + "learning_rate": 1.843974717053776e-07, + "loss": 1.0103, + "step": 68500 + }, + { + "epoch": 0.98, + "grad_norm": 0.4375, + "learning_rate": 1.828808903528878e-07, + "loss": 0.9373, + "step": 68505 + }, + { + "epoch": 0.98, + "grad_norm": 0.6171875, + "learning_rate": 1.8137056565111777e-07, + "loss": 0.9273, + "step": 68510 + }, + { + "epoch": 0.98, + "grad_norm": 0.6328125, + "learning_rate": 1.7986649769471398e-07, + "loss": 1.1433, + "step": 68515 + }, + { + "epoch": 0.98, + "grad_norm": 0.470703125, + "learning_rate": 1.7836868657797878e-07, + "loss": 1.0121, + "step": 68520 + }, + { + "epoch": 0.98, + "grad_norm": 0.66015625, + "learning_rate": 1.7687713239477043e-07, + "loss": 0.9412, + "step": 68525 + }, + { + "epoch": 0.98, + "grad_norm": 0.58984375, + "learning_rate": 1.7539183523859192e-07, + "loss": 0.9708, + "step": 68530 + }, + { + "epoch": 0.98, + "grad_norm": 0.53125, + "learning_rate": 1.7391279520254654e-07, + "loss": 0.8521, + "step": 68535 + }, + { + "epoch": 0.98, + "grad_norm": 0.546875, + "learning_rate": 1.7244001237933793e-07, + "loss": 0.8871, + "step": 68540 + }, + { + "epoch": 0.98, + "grad_norm": 0.61328125, + "learning_rate": 1.709734868612922e-07, + "loss": 1.0056, + "step": 68545 + }, + { + "epoch": 0.98, + "grad_norm": 0.57421875, + "learning_rate": 1.6951321874031367e-07, + "loss": 0.9889, + "step": 68550 + }, + { + "epoch": 0.98, + "grad_norm": 0.5859375, + "learning_rate": 1.6805920810795128e-07, + "loss": 0.8824, + "step": 68555 + }, + { + "epoch": 0.98, + "grad_norm": 0.640625, + "learning_rate": 1.6661145505533215e-07, + "loss": 0.9732, + "step": 68560 + }, + { + "epoch": 0.98, + "grad_norm": 0.578125, + "learning_rate": 1.6516995967320593e-07, + "loss": 1.0555, + "step": 68565 + }, + { + "epoch": 0.98, + "grad_norm": 0.55859375, + "learning_rate": 1.6373472205193363e-07, + "loss": 0.9849, + "step": 68570 + }, + { + "epoch": 0.98, + "grad_norm": 0.66015625, + "learning_rate": 1.6230574228146557e-07, + "loss": 0.9778, + "step": 68575 + }, + { + "epoch": 0.98, + "grad_norm": 0.6640625, + "learning_rate": 1.608830204513856e-07, + "loss": 1.0558, + "step": 68580 + }, + { + "epoch": 0.98, + "grad_norm": 0.609375, + "learning_rate": 1.5946655665086685e-07, + "loss": 1.0651, + "step": 68585 + }, + { + "epoch": 0.98, + "grad_norm": 0.5390625, + "learning_rate": 1.5805635096869387e-07, + "loss": 0.8778, + "step": 68590 + }, + { + "epoch": 0.98, + "grad_norm": 0.4921875, + "learning_rate": 1.566524034932515e-07, + "loss": 0.9079, + "step": 68595 + }, + { + "epoch": 0.98, + "grad_norm": 0.50390625, + "learning_rate": 1.5525471431254713e-07, + "loss": 1.0091, + "step": 68600 + }, + { + "epoch": 0.98, + "grad_norm": 0.5703125, + "learning_rate": 1.5386328351419953e-07, + "loss": 0.9609, + "step": 68605 + }, + { + "epoch": 0.98, + "grad_norm": 0.55859375, + "learning_rate": 1.5247811118541676e-07, + "loss": 0.9568, + "step": 68610 + }, + { + "epoch": 0.98, + "grad_norm": 0.54296875, + "learning_rate": 1.5109919741301826e-07, + "loss": 0.9626, + "step": 68615 + }, + { + "epoch": 0.98, + "grad_norm": 0.490234375, + "learning_rate": 1.4972654228343486e-07, + "loss": 0.9385, + "step": 68620 + }, + { + "epoch": 0.98, + "grad_norm": 0.484375, + "learning_rate": 1.4836014588271996e-07, + "loss": 0.8519, + "step": 68625 + }, + { + "epoch": 0.98, + "grad_norm": 0.52734375, + "learning_rate": 1.4700000829651617e-07, + "loss": 0.9085, + "step": 68630 + }, + { + "epoch": 0.98, + "grad_norm": 0.53125, + "learning_rate": 1.456461296100664e-07, + "loss": 0.9853, + "step": 68635 + }, + { + "epoch": 0.98, + "grad_norm": 0.55078125, + "learning_rate": 1.4429850990824723e-07, + "loss": 0.9127, + "step": 68640 + }, + { + "epoch": 0.98, + "grad_norm": 0.6484375, + "learning_rate": 1.429571492755244e-07, + "loss": 0.9672, + "step": 68645 + }, + { + "epoch": 0.98, + "grad_norm": 0.62109375, + "learning_rate": 1.416220477959751e-07, + "loss": 0.9399, + "step": 68650 + }, + { + "epoch": 0.98, + "grad_norm": 0.57421875, + "learning_rate": 1.4029320555327684e-07, + "loss": 0.9394, + "step": 68655 + }, + { + "epoch": 0.98, + "grad_norm": 0.5859375, + "learning_rate": 1.389706226307408e-07, + "loss": 1.0412, + "step": 68660 + }, + { + "epoch": 0.98, + "grad_norm": 0.5234375, + "learning_rate": 1.3765429911124507e-07, + "loss": 0.9976, + "step": 68665 + }, + { + "epoch": 0.99, + "grad_norm": 0.56640625, + "learning_rate": 1.3634423507732362e-07, + "loss": 0.8278, + "step": 68670 + }, + { + "epoch": 0.99, + "grad_norm": 0.578125, + "learning_rate": 1.3504043061107752e-07, + "loss": 1.0012, + "step": 68675 + }, + { + "epoch": 0.99, + "grad_norm": 0.51953125, + "learning_rate": 1.3374288579424132e-07, + "loss": 0.848, + "step": 68680 + }, + { + "epoch": 0.99, + "grad_norm": 0.5859375, + "learning_rate": 1.3245160070812778e-07, + "loss": 1.0536, + "step": 68685 + }, + { + "epoch": 0.99, + "grad_norm": 0.58984375, + "learning_rate": 1.3116657543369436e-07, + "loss": 1.0768, + "step": 68690 + }, + { + "epoch": 0.99, + "grad_norm": 0.6171875, + "learning_rate": 1.2988781005147666e-07, + "loss": 1.0061, + "step": 68695 + }, + { + "epoch": 0.99, + "grad_norm": 0.57421875, + "learning_rate": 1.2861530464163273e-07, + "loss": 0.88, + "step": 68700 + }, + { + "epoch": 0.99, + "grad_norm": 0.6796875, + "learning_rate": 1.2734905928393215e-07, + "loss": 1.0937, + "step": 68705 + }, + { + "epoch": 0.99, + "grad_norm": 0.57421875, + "learning_rate": 1.2608907405773362e-07, + "loss": 1.0532, + "step": 68710 + }, + { + "epoch": 0.99, + "grad_norm": 0.54296875, + "learning_rate": 1.2483534904200734e-07, + "loss": 0.9389, + "step": 68715 + }, + { + "epoch": 0.99, + "grad_norm": 0.55859375, + "learning_rate": 1.2358788431536816e-07, + "loss": 0.9631, + "step": 68720 + }, + { + "epoch": 0.99, + "grad_norm": 0.53125, + "learning_rate": 1.2234667995598693e-07, + "loss": 1.0137, + "step": 68725 + }, + { + "epoch": 0.99, + "grad_norm": 0.490234375, + "learning_rate": 1.2111173604165692e-07, + "loss": 0.9438, + "step": 68730 + }, + { + "epoch": 0.99, + "grad_norm": 0.53125, + "learning_rate": 1.1988305264980516e-07, + "loss": 1.0743, + "step": 68735 + }, + { + "epoch": 0.99, + "grad_norm": 0.5859375, + "learning_rate": 1.1866062985743664e-07, + "loss": 0.9187, + "step": 68740 + }, + { + "epoch": 0.99, + "grad_norm": 0.6171875, + "learning_rate": 1.1744446774116791e-07, + "loss": 0.9324, + "step": 68745 + }, + { + "epoch": 0.99, + "grad_norm": 0.65625, + "learning_rate": 1.1623456637723795e-07, + "loss": 0.9166, + "step": 68750 + }, + { + "epoch": 0.99, + "grad_norm": 0.6953125, + "learning_rate": 1.1503092584148611e-07, + "loss": 1.0688, + "step": 68755 + }, + { + "epoch": 0.99, + "grad_norm": 0.51953125, + "learning_rate": 1.1383354620936315e-07, + "loss": 0.8598, + "step": 68760 + }, + { + "epoch": 0.99, + "grad_norm": 0.56640625, + "learning_rate": 1.1264242755590903e-07, + "loss": 1.0886, + "step": 68765 + }, + { + "epoch": 0.99, + "grad_norm": 0.60546875, + "learning_rate": 1.1145756995578626e-07, + "loss": 0.9614, + "step": 68770 + }, + { + "epoch": 0.99, + "grad_norm": 0.5546875, + "learning_rate": 1.1027897348326877e-07, + "loss": 0.8253, + "step": 68775 + }, + { + "epoch": 0.99, + "grad_norm": 0.45703125, + "learning_rate": 1.0910663821221967e-07, + "loss": 1.011, + "step": 68780 + }, + { + "epoch": 0.99, + "grad_norm": 0.5, + "learning_rate": 1.0794056421614684e-07, + "loss": 0.9053, + "step": 68785 + }, + { + "epoch": 0.99, + "grad_norm": 0.54296875, + "learning_rate": 1.0678075156812517e-07, + "loss": 1.0008, + "step": 68790 + }, + { + "epoch": 0.99, + "grad_norm": 0.51171875, + "learning_rate": 1.0562720034085205e-07, + "loss": 0.9327, + "step": 68795 + }, + { + "epoch": 0.99, + "grad_norm": 0.53515625, + "learning_rate": 1.0447991060663631e-07, + "loss": 0.9186, + "step": 68800 + }, + { + "epoch": 0.99, + "grad_norm": 0.7734375, + "learning_rate": 1.0333888243738709e-07, + "loss": 0.856, + "step": 68805 + }, + { + "epoch": 0.99, + "grad_norm": 0.57421875, + "learning_rate": 1.0220411590463607e-07, + "loss": 0.9629, + "step": 68810 + }, + { + "epoch": 0.99, + "grad_norm": 0.515625, + "learning_rate": 1.0107561107950414e-07, + "loss": 0.9257, + "step": 68815 + }, + { + "epoch": 0.99, + "grad_norm": 0.59375, + "learning_rate": 9.995336803272359e-08, + "loss": 0.933, + "step": 68820 + }, + { + "epoch": 0.99, + "grad_norm": 0.53125, + "learning_rate": 9.883738683464927e-08, + "loss": 0.8716, + "step": 68825 + }, + { + "epoch": 0.99, + "grad_norm": 0.59375, + "learning_rate": 9.772766755522522e-08, + "loss": 0.7858, + "step": 68830 + }, + { + "epoch": 0.99, + "grad_norm": 0.6328125, + "learning_rate": 9.662421026400692e-08, + "loss": 0.8859, + "step": 68835 + }, + { + "epoch": 0.99, + "grad_norm": 0.58203125, + "learning_rate": 9.552701503016126e-08, + "loss": 0.9141, + "step": 68840 + }, + { + "epoch": 0.99, + "grad_norm": 0.68359375, + "learning_rate": 9.443608192246655e-08, + "loss": 0.7986, + "step": 68845 + }, + { + "epoch": 0.99, + "grad_norm": 0.61328125, + "learning_rate": 9.335141100930145e-08, + "loss": 0.8627, + "step": 68850 + }, + { + "epoch": 0.99, + "grad_norm": 0.51953125, + "learning_rate": 9.227300235865599e-08, + "loss": 1.0326, + "step": 68855 + }, + { + "epoch": 0.99, + "grad_norm": 0.5703125, + "learning_rate": 9.120085603812056e-08, + "loss": 0.8892, + "step": 68860 + }, + { + "epoch": 0.99, + "grad_norm": 0.7421875, + "learning_rate": 9.013497211489696e-08, + "loss": 0.8939, + "step": 68865 + }, + { + "epoch": 0.99, + "grad_norm": 0.609375, + "learning_rate": 8.907535065580952e-08, + "loss": 0.8497, + "step": 68870 + }, + { + "epoch": 0.99, + "grad_norm": 0.5859375, + "learning_rate": 8.802199172726066e-08, + "loss": 1.0038, + "step": 68875 + }, + { + "epoch": 0.99, + "grad_norm": 0.5234375, + "learning_rate": 8.697489539529757e-08, + "loss": 0.9418, + "step": 68880 + }, + { + "epoch": 0.99, + "grad_norm": 0.53515625, + "learning_rate": 8.593406172552332e-08, + "loss": 0.8043, + "step": 68885 + }, + { + "epoch": 0.99, + "grad_norm": 0.609375, + "learning_rate": 8.489949078320792e-08, + "loss": 0.9, + "step": 68890 + }, + { + "epoch": 0.99, + "grad_norm": 0.51171875, + "learning_rate": 8.387118263317728e-08, + "loss": 0.934, + "step": 68895 + }, + { + "epoch": 0.99, + "grad_norm": 0.546875, + "learning_rate": 8.284913733990208e-08, + "loss": 1.1524, + "step": 68900 + }, + { + "epoch": 0.99, + "grad_norm": 0.546875, + "learning_rate": 8.183335496743105e-08, + "loss": 0.9928, + "step": 68905 + }, + { + "epoch": 0.99, + "grad_norm": 0.58984375, + "learning_rate": 8.082383557944662e-08, + "loss": 0.9562, + "step": 68910 + }, + { + "epoch": 0.99, + "grad_norm": 0.546875, + "learning_rate": 7.982057923922037e-08, + "loss": 1.0204, + "step": 68915 + }, + { + "epoch": 0.99, + "grad_norm": 0.5546875, + "learning_rate": 7.882358600964646e-08, + "loss": 0.9144, + "step": 68920 + }, + { + "epoch": 0.99, + "grad_norm": 0.5546875, + "learning_rate": 7.783285595320822e-08, + "loss": 0.8462, + "step": 68925 + }, + { + "epoch": 0.99, + "grad_norm": 0.59765625, + "learning_rate": 7.684838913200043e-08, + "loss": 0.9862, + "step": 68930 + }, + { + "epoch": 0.99, + "grad_norm": 0.5703125, + "learning_rate": 7.58701856077515e-08, + "loss": 0.8919, + "step": 68935 + }, + { + "epoch": 0.99, + "grad_norm": 0.578125, + "learning_rate": 7.489824544175683e-08, + "loss": 0.9889, + "step": 68940 + }, + { + "epoch": 0.99, + "grad_norm": 0.6484375, + "learning_rate": 7.393256869494547e-08, + "loss": 1.0822, + "step": 68945 + }, + { + "epoch": 0.99, + "grad_norm": 0.5390625, + "learning_rate": 7.297315542784678e-08, + "loss": 0.8859, + "step": 68950 + }, + { + "epoch": 0.99, + "grad_norm": 0.478515625, + "learning_rate": 7.202000570060153e-08, + "loss": 0.9169, + "step": 68955 + }, + { + "epoch": 0.99, + "grad_norm": 0.625, + "learning_rate": 7.107311957293971e-08, + "loss": 0.9741, + "step": 68960 + }, + { + "epoch": 0.99, + "grad_norm": 0.54296875, + "learning_rate": 7.013249710423608e-08, + "loss": 0.9086, + "step": 68965 + }, + { + "epoch": 0.99, + "grad_norm": 0.53515625, + "learning_rate": 6.919813835343237e-08, + "loss": 0.8493, + "step": 68970 + }, + { + "epoch": 0.99, + "grad_norm": 0.5859375, + "learning_rate": 6.827004337910391e-08, + "loss": 0.9377, + "step": 68975 + }, + { + "epoch": 0.99, + "grad_norm": 0.62890625, + "learning_rate": 6.734821223941535e-08, + "loss": 0.9256, + "step": 68980 + }, + { + "epoch": 0.99, + "grad_norm": 0.62109375, + "learning_rate": 6.643264499216483e-08, + "loss": 0.9293, + "step": 68985 + }, + { + "epoch": 0.99, + "grad_norm": 0.55859375, + "learning_rate": 6.552334169472874e-08, + "loss": 0.992, + "step": 68990 + }, + { + "epoch": 0.99, + "grad_norm": 0.63671875, + "learning_rate": 6.462030240409478e-08, + "loss": 0.8916, + "step": 68995 + }, + { + "epoch": 0.99, + "grad_norm": 0.5234375, + "learning_rate": 6.372352717688434e-08, + "loss": 1.0083, + "step": 69000 + }, + { + "epoch": 0.99, + "grad_norm": 0.55078125, + "learning_rate": 6.283301606930803e-08, + "loss": 0.8847, + "step": 69005 + }, + { + "epoch": 0.99, + "grad_norm": 0.57421875, + "learning_rate": 6.194876913716563e-08, + "loss": 0.9906, + "step": 69010 + }, + { + "epoch": 0.99, + "grad_norm": 0.52734375, + "learning_rate": 6.107078643590169e-08, + "loss": 1.1935, + "step": 69015 + }, + { + "epoch": 0.99, + "grad_norm": 0.53515625, + "learning_rate": 6.019906802053887e-08, + "loss": 0.8415, + "step": 69020 + }, + { + "epoch": 0.99, + "grad_norm": 0.55078125, + "learning_rate": 5.9333613945722344e-08, + "loss": 0.9834, + "step": 69025 + }, + { + "epoch": 0.99, + "grad_norm": 0.58984375, + "learning_rate": 5.84744242656976e-08, + "loss": 0.9222, + "step": 69030 + }, + { + "epoch": 0.99, + "grad_norm": 0.53515625, + "learning_rate": 5.762149903432157e-08, + "loss": 1.1191, + "step": 69035 + }, + { + "epoch": 0.99, + "grad_norm": 0.546875, + "learning_rate": 5.677483830505148e-08, + "loss": 0.9117, + "step": 69040 + }, + { + "epoch": 0.99, + "grad_norm": 0.53125, + "learning_rate": 5.5934442130956e-08, + "loss": 0.9156, + "step": 69045 + }, + { + "epoch": 0.99, + "grad_norm": 0.58984375, + "learning_rate": 5.510031056472631e-08, + "loss": 0.877, + "step": 69050 + }, + { + "epoch": 0.99, + "grad_norm": 0.5234375, + "learning_rate": 5.4272443658631714e-08, + "loss": 1.0652, + "step": 69055 + }, + { + "epoch": 0.99, + "grad_norm": 0.54296875, + "learning_rate": 5.3450841464564027e-08, + "loss": 0.9143, + "step": 69060 + }, + { + "epoch": 0.99, + "grad_norm": 0.55859375, + "learning_rate": 5.263550403402651e-08, + "loss": 0.9403, + "step": 69065 + }, + { + "epoch": 0.99, + "grad_norm": 0.58203125, + "learning_rate": 5.1826431418133816e-08, + "loss": 0.9235, + "step": 69070 + }, + { + "epoch": 0.99, + "grad_norm": 0.55859375, + "learning_rate": 5.1023623667589834e-08, + "loss": 0.8941, + "step": 69075 + }, + { + "epoch": 0.99, + "grad_norm": 0.66015625, + "learning_rate": 5.0227080832720985e-08, + "loss": 0.9721, + "step": 69080 + }, + { + "epoch": 0.99, + "grad_norm": 0.61328125, + "learning_rate": 4.9436802963442886e-08, + "loss": 1.0108, + "step": 69085 + }, + { + "epoch": 0.99, + "grad_norm": 0.546875, + "learning_rate": 4.865279010930479e-08, + "loss": 0.9499, + "step": 69090 + }, + { + "epoch": 0.99, + "grad_norm": 0.66796875, + "learning_rate": 4.787504231944517e-08, + "loss": 1.0272, + "step": 69095 + }, + { + "epoch": 0.99, + "grad_norm": 0.5625, + "learning_rate": 4.710355964262503e-08, + "loss": 0.9583, + "step": 69100 + }, + { + "epoch": 0.99, + "grad_norm": 0.56640625, + "learning_rate": 4.633834212717236e-08, + "loss": 0.9453, + "step": 69105 + }, + { + "epoch": 0.99, + "grad_norm": 0.5703125, + "learning_rate": 4.5579389821082117e-08, + "loss": 1.0074, + "step": 69110 + }, + { + "epoch": 0.99, + "grad_norm": 0.50390625, + "learning_rate": 4.482670277190515e-08, + "loss": 0.8634, + "step": 69115 + }, + { + "epoch": 0.99, + "grad_norm": 0.5390625, + "learning_rate": 4.4080281026837036e-08, + "loss": 0.8821, + "step": 69120 + }, + { + "epoch": 0.99, + "grad_norm": 0.4921875, + "learning_rate": 4.3340124632651465e-08, + "loss": 0.8397, + "step": 69125 + }, + { + "epoch": 0.99, + "grad_norm": 0.59375, + "learning_rate": 4.2606233635755775e-08, + "loss": 1.0284, + "step": 69130 + }, + { + "epoch": 0.99, + "grad_norm": 0.58203125, + "learning_rate": 4.1878608082135397e-08, + "loss": 0.9987, + "step": 69135 + }, + { + "epoch": 0.99, + "grad_norm": 0.5078125, + "learning_rate": 4.115724801740939e-08, + "loss": 0.7926, + "step": 69140 + }, + { + "epoch": 0.99, + "grad_norm": 0.56640625, + "learning_rate": 4.044215348678604e-08, + "loss": 0.8805, + "step": 69145 + }, + { + "epoch": 0.99, + "grad_norm": 0.5625, + "learning_rate": 3.973332453509615e-08, + "loss": 0.9702, + "step": 69150 + }, + { + "epoch": 0.99, + "grad_norm": 0.59375, + "learning_rate": 3.903076120677085e-08, + "loss": 0.995, + "step": 69155 + }, + { + "epoch": 0.99, + "grad_norm": 0.515625, + "learning_rate": 3.833446354584158e-08, + "loss": 0.8779, + "step": 69160 + }, + { + "epoch": 0.99, + "grad_norm": 0.53125, + "learning_rate": 3.7644431595962314e-08, + "loss": 0.8399, + "step": 69165 + }, + { + "epoch": 0.99, + "grad_norm": 0.53125, + "learning_rate": 3.6960665400365136e-08, + "loss": 0.7854, + "step": 69170 + }, + { + "epoch": 0.99, + "grad_norm": 0.51171875, + "learning_rate": 3.628316500192686e-08, + "loss": 0.9082, + "step": 69175 + }, + { + "epoch": 0.99, + "grad_norm": 0.58203125, + "learning_rate": 3.56119304431024e-08, + "loss": 1.0525, + "step": 69180 + }, + { + "epoch": 0.99, + "grad_norm": 0.55078125, + "learning_rate": 3.494696176598034e-08, + "loss": 0.8938, + "step": 69185 + }, + { + "epoch": 0.99, + "grad_norm": 0.5625, + "learning_rate": 3.428825901222732e-08, + "loss": 0.9169, + "step": 69190 + }, + { + "epoch": 0.99, + "grad_norm": 0.609375, + "learning_rate": 3.363582222314365e-08, + "loss": 0.9887, + "step": 69195 + }, + { + "epoch": 0.99, + "grad_norm": 0.5390625, + "learning_rate": 3.298965143961885e-08, + "loss": 0.952, + "step": 69200 + }, + { + "epoch": 0.99, + "grad_norm": 0.53125, + "learning_rate": 3.234974670215385e-08, + "loss": 1.1259, + "step": 69205 + }, + { + "epoch": 0.99, + "grad_norm": 0.671875, + "learning_rate": 3.1716108050861005e-08, + "loss": 0.9038, + "step": 69210 + }, + { + "epoch": 0.99, + "grad_norm": 0.57421875, + "learning_rate": 3.1088735525453e-08, + "loss": 1.0463, + "step": 69215 + }, + { + "epoch": 0.99, + "grad_norm": 0.52734375, + "learning_rate": 3.0467629165265025e-08, + "loss": 0.9903, + "step": 69220 + }, + { + "epoch": 0.99, + "grad_norm": 0.64453125, + "learning_rate": 2.98527890092104e-08, + "loss": 1.0275, + "step": 69225 + }, + { + "epoch": 0.99, + "grad_norm": 0.6015625, + "learning_rate": 2.9244215095847184e-08, + "loss": 1.0958, + "step": 69230 + }, + { + "epoch": 0.99, + "grad_norm": 0.50390625, + "learning_rate": 2.8641907463311524e-08, + "loss": 0.9467, + "step": 69235 + }, + { + "epoch": 0.99, + "grad_norm": 0.58203125, + "learning_rate": 2.804586614936211e-08, + "loss": 0.9539, + "step": 69240 + }, + { + "epoch": 0.99, + "grad_norm": 0.56640625, + "learning_rate": 2.7456091191357945e-08, + "loss": 0.9948, + "step": 69245 + }, + { + "epoch": 0.99, + "grad_norm": 0.5546875, + "learning_rate": 2.6872582626258357e-08, + "loss": 1.0394, + "step": 69250 + }, + { + "epoch": 0.99, + "grad_norm": 0.52734375, + "learning_rate": 2.6295340490645192e-08, + "loss": 1.0524, + "step": 69255 + }, + { + "epoch": 0.99, + "grad_norm": 0.59375, + "learning_rate": 2.5724364820711722e-08, + "loss": 0.9632, + "step": 69260 + }, + { + "epoch": 0.99, + "grad_norm": 0.56640625, + "learning_rate": 2.515965565222933e-08, + "loss": 0.9515, + "step": 69265 + }, + { + "epoch": 0.99, + "grad_norm": 0.4921875, + "learning_rate": 2.4601213020591928e-08, + "loss": 0.8043, + "step": 69270 + }, + { + "epoch": 0.99, + "grad_norm": 0.58984375, + "learning_rate": 2.4049036960827053e-08, + "loss": 0.8569, + "step": 69275 + }, + { + "epoch": 0.99, + "grad_norm": 0.5234375, + "learning_rate": 2.3503127507518153e-08, + "loss": 0.8868, + "step": 69280 + }, + { + "epoch": 0.99, + "grad_norm": 0.51953125, + "learning_rate": 2.2963484694904504e-08, + "loss": 0.8159, + "step": 69285 + }, + { + "epoch": 0.99, + "grad_norm": 0.6484375, + "learning_rate": 2.24301085568146e-08, + "loss": 1.0896, + "step": 69290 + }, + { + "epoch": 0.99, + "grad_norm": 0.5, + "learning_rate": 2.1902999126655054e-08, + "loss": 0.9988, + "step": 69295 + }, + { + "epoch": 0.99, + "grad_norm": 0.5625, + "learning_rate": 2.138215643748831e-08, + "loss": 0.7695, + "step": 69300 + }, + { + "epoch": 0.99, + "grad_norm": 0.53515625, + "learning_rate": 2.086758052194382e-08, + "loss": 1.0433, + "step": 69305 + }, + { + "epoch": 0.99, + "grad_norm": 0.609375, + "learning_rate": 2.0359271412295766e-08, + "loss": 0.9876, + "step": 69310 + }, + { + "epoch": 0.99, + "grad_norm": 0.5859375, + "learning_rate": 1.985722914039645e-08, + "loss": 0.9633, + "step": 69315 + }, + { + "epoch": 0.99, + "grad_norm": 0.5546875, + "learning_rate": 1.936145373770959e-08, + "loss": 1.1043, + "step": 69320 + }, + { + "epoch": 0.99, + "grad_norm": 0.640625, + "learning_rate": 1.887194523532143e-08, + "loss": 1.0057, + "step": 69325 + }, + { + "epoch": 0.99, + "grad_norm": 0.578125, + "learning_rate": 1.8388703663907436e-08, + "loss": 1.0526, + "step": 69330 + }, + { + "epoch": 0.99, + "grad_norm": 0.5703125, + "learning_rate": 1.791172905375449e-08, + "loss": 0.9354, + "step": 69335 + }, + { + "epoch": 0.99, + "grad_norm": 0.55859375, + "learning_rate": 1.7441021434760896e-08, + "loss": 0.8695, + "step": 69340 + }, + { + "epoch": 0.99, + "grad_norm": 0.5546875, + "learning_rate": 1.6976580836436385e-08, + "loss": 0.9798, + "step": 69345 + }, + { + "epoch": 0.99, + "grad_norm": 0.51953125, + "learning_rate": 1.6518407287902105e-08, + "loss": 0.8781, + "step": 69350 + }, + { + "epoch": 0.99, + "grad_norm": 0.50390625, + "learning_rate": 1.606650081785732e-08, + "loss": 0.9757, + "step": 69355 + }, + { + "epoch": 0.99, + "grad_norm": 0.57421875, + "learning_rate": 1.562086145463493e-08, + "loss": 0.9517, + "step": 69360 + }, + { + "epoch": 1.0, + "grad_norm": 0.490234375, + "learning_rate": 1.5181489226168132e-08, + "loss": 0.8726, + "step": 69365 + }, + { + "epoch": 1.0, + "grad_norm": 0.55078125, + "learning_rate": 1.4748384160001571e-08, + "loss": 1.044, + "step": 69370 + }, + { + "epoch": 1.0, + "grad_norm": 0.5625, + "learning_rate": 1.4321546283280197e-08, + "loss": 0.9414, + "step": 69375 + }, + { + "epoch": 1.0, + "grad_norm": 0.578125, + "learning_rate": 1.3900975622760381e-08, + "loss": 0.9418, + "step": 69380 + }, + { + "epoch": 1.0, + "grad_norm": 0.462890625, + "learning_rate": 1.3486672204798822e-08, + "loss": 1.0239, + "step": 69385 + }, + { + "epoch": 1.0, + "grad_norm": 0.61328125, + "learning_rate": 1.3078636055374738e-08, + "loss": 1.046, + "step": 69390 + }, + { + "epoch": 1.0, + "grad_norm": 0.58203125, + "learning_rate": 1.2676867200045461e-08, + "loss": 0.9348, + "step": 69395 + }, + { + "epoch": 1.0, + "grad_norm": 0.52734375, + "learning_rate": 1.2281365664013056e-08, + "loss": 0.8424, + "step": 69400 + }, + { + "epoch": 1.0, + "grad_norm": 0.58203125, + "learning_rate": 1.1892131472068801e-08, + "loss": 1.0332, + "step": 69405 + }, + { + "epoch": 1.0, + "grad_norm": 0.5234375, + "learning_rate": 1.1509164648593195e-08, + "loss": 1.0344, + "step": 69410 + }, + { + "epoch": 1.0, + "grad_norm": 0.53515625, + "learning_rate": 1.1132465217600363e-08, + "loss": 1.0642, + "step": 69415 + }, + { + "epoch": 1.0, + "grad_norm": 0.50390625, + "learning_rate": 1.0762033202704746e-08, + "loss": 0.9868, + "step": 69420 + }, + { + "epoch": 1.0, + "grad_norm": 0.54296875, + "learning_rate": 1.0397868627121111e-08, + "loss": 0.951, + "step": 69425 + }, + { + "epoch": 1.0, + "grad_norm": 0.6328125, + "learning_rate": 1.0039971513675638e-08, + "loss": 0.8531, + "step": 69430 + }, + { + "epoch": 1.0, + "grad_norm": 0.50390625, + "learning_rate": 9.68834188480594e-09, + "loss": 1.0586, + "step": 69435 + }, + { + "epoch": 1.0, + "grad_norm": 0.486328125, + "learning_rate": 9.342979762549942e-09, + "loss": 0.8869, + "step": 69440 + }, + { + "epoch": 1.0, + "grad_norm": 0.54296875, + "learning_rate": 9.003885168556991e-09, + "loss": 0.9267, + "step": 69445 + }, + { + "epoch": 1.0, + "grad_norm": 0.515625, + "learning_rate": 8.67105812408786e-09, + "loss": 1.0981, + "step": 69450 + }, + { + "epoch": 1.0, + "grad_norm": 0.6328125, + "learning_rate": 8.344498649981436e-09, + "loss": 0.8177, + "step": 69455 + }, + { + "epoch": 1.0, + "grad_norm": 0.61328125, + "learning_rate": 8.024206766732435e-09, + "loss": 1.07, + "step": 69460 + }, + { + "epoch": 1.0, + "grad_norm": 0.515625, + "learning_rate": 7.710182494413687e-09, + "loss": 0.9687, + "step": 69465 + }, + { + "epoch": 1.0, + "grad_norm": 0.578125, + "learning_rate": 7.402425852687245e-09, + "loss": 0.9563, + "step": 69470 + }, + { + "epoch": 1.0, + "grad_norm": 0.59375, + "learning_rate": 7.100936860870988e-09, + "loss": 1.0244, + "step": 69475 + }, + { + "epoch": 1.0, + "grad_norm": 0.5234375, + "learning_rate": 6.8057155378498105e-09, + "loss": 0.9991, + "step": 69480 + }, + { + "epoch": 1.0, + "grad_norm": 0.5703125, + "learning_rate": 6.5167619021200274e-09, + "loss": 1.0204, + "step": 69485 + }, + { + "epoch": 1.0, + "grad_norm": 0.55859375, + "learning_rate": 6.2340759718115815e-09, + "loss": 1.0564, + "step": 69490 + }, + { + "epoch": 1.0, + "grad_norm": 0.498046875, + "learning_rate": 5.957657764632529e-09, + "loss": 0.9994, + "step": 69495 + }, + { + "epoch": 1.0, + "grad_norm": 0.56640625, + "learning_rate": 5.687507297913452e-09, + "loss": 0.9983, + "step": 69500 + }, + { + "epoch": 1.0, + "grad_norm": 0.5546875, + "learning_rate": 5.42362458857415e-09, + "loss": 0.8027, + "step": 69505 + }, + { + "epoch": 1.0, + "grad_norm": 0.54296875, + "learning_rate": 5.166009653179149e-09, + "loss": 0.9545, + "step": 69510 + }, + { + "epoch": 1.0, + "grad_norm": 0.498046875, + "learning_rate": 4.9146625078599906e-09, + "loss": 0.9419, + "step": 69515 + }, + { + "epoch": 1.0, + "grad_norm": 0.515625, + "learning_rate": 4.669583168370739e-09, + "loss": 1.0382, + "step": 69520 + }, + { + "epoch": 1.0, + "grad_norm": 0.5546875, + "learning_rate": 4.4307716500768816e-09, + "loss": 0.9006, + "step": 69525 + }, + { + "epoch": 1.0, + "grad_norm": 0.5703125, + "learning_rate": 4.198227967944224e-09, + "loss": 0.8615, + "step": 69530 + }, + { + "epoch": 1.0, + "grad_norm": 0.4921875, + "learning_rate": 3.9719521365610964e-09, + "loss": 1.0192, + "step": 69535 + }, + { + "epoch": 1.0, + "grad_norm": 0.53125, + "learning_rate": 3.751944170105048e-09, + "loss": 1.0237, + "step": 69540 + }, + { + "epoch": 1.0, + "grad_norm": 0.54296875, + "learning_rate": 3.5382040823539464e-09, + "loss": 0.9387, + "step": 69545 + }, + { + "epoch": 1.0, + "grad_norm": 0.578125, + "learning_rate": 3.330731886719285e-09, + "loss": 0.8864, + "step": 69550 + }, + { + "epoch": 1.0, + "grad_norm": 0.51171875, + "learning_rate": 3.1295275961906733e-09, + "loss": 0.9047, + "step": 69555 + }, + { + "epoch": 1.0, + "grad_norm": 0.53515625, + "learning_rate": 2.93459122340245e-09, + "loss": 0.8942, + "step": 69560 + }, + { + "epoch": 1.0, + "grad_norm": 0.53515625, + "learning_rate": 2.745922780555965e-09, + "loss": 0.8649, + "step": 69565 + }, + { + "epoch": 1.0, + "grad_norm": 0.55078125, + "learning_rate": 2.563522279486197e-09, + "loss": 1.0669, + "step": 69570 + }, + { + "epoch": 1.0, + "grad_norm": 0.5234375, + "learning_rate": 2.3873897316173397e-09, + "loss": 0.9068, + "step": 69575 + }, + { + "epoch": 1.0, + "grad_norm": 0.57421875, + "learning_rate": 2.2175251480072156e-09, + "loss": 0.8953, + "step": 69580 + }, + { + "epoch": 1.0, + "grad_norm": 0.56640625, + "learning_rate": 2.0539285392806584e-09, + "loss": 0.9438, + "step": 69585 + }, + { + "epoch": 1.0, + "grad_norm": 0.59375, + "learning_rate": 1.896599915696129e-09, + "loss": 1.0569, + "step": 69590 + }, + { + "epoch": 1.0, + "grad_norm": 0.58984375, + "learning_rate": 1.74553928712351e-09, + "loss": 0.9274, + "step": 69595 + }, + { + "epoch": 1.0, + "grad_norm": 0.5625, + "learning_rate": 1.6007466630330036e-09, + "loss": 0.8161, + "step": 69600 + }, + { + "epoch": 1.0, + "grad_norm": 0.55859375, + "learning_rate": 1.4622220524951324e-09, + "loss": 1.098, + "step": 69605 + }, + { + "epoch": 1.0, + "grad_norm": 0.5859375, + "learning_rate": 1.3299654642029425e-09, + "loss": 0.9883, + "step": 69610 + }, + { + "epoch": 1.0, + "grad_norm": 0.52734375, + "learning_rate": 1.2039769064275952e-09, + "loss": 0.9939, + "step": 69615 + }, + { + "epoch": 1.0, + "grad_norm": 0.57421875, + "learning_rate": 1.0842563870738788e-09, + "loss": 0.9728, + "step": 69620 + }, + { + "epoch": 1.0, + "grad_norm": 0.5234375, + "learning_rate": 9.708039136580028e-10, + "loss": 0.8537, + "step": 69625 + }, + { + "epoch": 1.0, + "grad_norm": 0.51171875, + "learning_rate": 8.636194932742925e-10, + "loss": 0.9281, + "step": 69630 + }, + { + "epoch": 1.0, + "grad_norm": 0.55859375, + "learning_rate": 7.627031326395973e-10, + "loss": 0.9818, + "step": 69635 + }, + { + "epoch": 1.0, + "grad_norm": 0.59765625, + "learning_rate": 6.680548381043927e-10, + "loss": 0.979, + "step": 69640 + }, + { + "epoch": 1.0, + "grad_norm": 0.56640625, + "learning_rate": 5.796746155750654e-10, + "loss": 0.9615, + "step": 69645 + }, + { + "epoch": 1.0, + "grad_norm": 0.52734375, + "learning_rate": 4.975624706027305e-10, + "loss": 0.932, + "step": 69650 + }, + { + "epoch": 1.0, + "grad_norm": 0.5390625, + "learning_rate": 4.2171840833882257e-10, + "loss": 0.9145, + "step": 69655 + }, + { + "epoch": 1.0, + "grad_norm": 0.63671875, + "learning_rate": 3.5214243353509646e-10, + "loss": 1.0162, + "step": 69660 + }, + { + "epoch": 1.0, + "grad_norm": 0.45703125, + "learning_rate": 2.888345505436263e-10, + "loss": 1.0425, + "step": 69665 + }, + { + "epoch": 1.0, + "grad_norm": 0.5390625, + "learning_rate": 2.3179476332790827e-10, + "loss": 0.9054, + "step": 69670 + }, + { + "epoch": 1.0, + "grad_norm": 0.466796875, + "learning_rate": 1.8102307548506503e-10, + "loss": 0.9386, + "step": 69675 + }, + { + "epoch": 1.0, + "grad_norm": 0.55078125, + "learning_rate": 1.3651949017923216e-10, + "loss": 0.9464, + "step": 69680 + }, + { + "epoch": 1.0, + "grad_norm": 0.5390625, + "learning_rate": 9.828401021927392e-11, + "loss": 0.9135, + "step": 69685 + }, + { + "epoch": 1.0, + "grad_norm": 0.5390625, + "learning_rate": 6.631663798106758e-11, + "loss": 0.8707, + "step": 69690 + }, + { + "epoch": 1.0, + "grad_norm": 0.5234375, + "learning_rate": 4.061737547411681e-11, + "loss": 0.9513, + "step": 69695 + }, + { + "epoch": 1.0, + "grad_norm": 0.56640625, + "learning_rate": 2.1186224308245018e-11, + "loss": 0.9132, + "step": 69700 + }, + { + "epoch": 1.0, + "grad_norm": 0.5625, + "learning_rate": 8.023185715799742e-12, + "loss": 1.0332, + "step": 69705 + }, + { + "epoch": 1.0, + "grad_norm": 0.54296875, + "learning_rate": 1.1282605072437947e-12, + "loss": 0.8443, + "step": 69710 + }, + { + "epoch": 1.0, + "eval_loss": 0.9567210078239441, + "eval_runtime": 6401.306, + "eval_samples_per_second": 2.411, + "eval_steps_per_second": 1.205, + "step": 69713 + }, + { + "epoch": 1.0, + "step": 69713, + "total_flos": 1.225431715317547e+19, + "train_loss": 0.9623549959262275, + "train_runtime": 219590.8488, + "train_samples_per_second": 0.635, + "train_steps_per_second": 0.317 + } + ], + "logging_steps": 5, + "max_steps": 69713, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 1.225431715317547e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}